Coverage for src/dataknobs_fsm/io/adapters.py: 0%
483 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 16:46 -0600
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 16:46 -0600
1"""I/O adapters for specific data sources.
3This module provides adapters for different I/O sources like files, databases, and APIs.
4"""
6import json
7import csv
8from pathlib import Path
9from typing import Any, Dict, List, Union, AsyncIterator, Iterator
10import aiofiles
11from dataknobs_data import AsyncDatabase, Record, Query
13from .base import (
14 IOConfig, IOMode, IOFormat, IOProvider,
15 AsyncIOProvider, SyncIOProvider, IOAdapter
16)
19class FileIOAdapter(IOAdapter):
20 """Adapter for file-based I/O operations."""
22 def adapt_config(self, config: IOConfig) -> Dict[str, Any]:
23 """Adapt configuration for file operations."""
24 return {
25 'path': config.source,
26 'mode': self._get_file_mode(config.mode),
27 'encoding': config.encoding,
28 'buffering': config.buffer_size,
29 }
31 def adapt_data(self, data: Any, direction: IOMode) -> Any:
32 """Adapt data format for file operations."""
33 if self.format == IOFormat.JSON:
34 if direction == IOMode.WRITE:
35 return json.dumps(data)
36 else:
37 return json.loads(data) if isinstance(data, str) else data
38 elif self.format == IOFormat.CSV:
39 if direction == IOMode.WRITE:
40 return self._dict_to_csv_row(data)
41 else:
42 return self._csv_row_to_dict(data)
43 return data
45 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider:
46 """Create file I/O provider."""
47 if is_async:
48 return AsyncFileProvider(config)
49 return SyncFileProvider(config)
51 def _get_file_mode(self, mode: IOMode) -> str:
52 """Convert IOMode to file mode string."""
53 mode_map = {
54 IOMode.READ: 'r',
55 IOMode.WRITE: 'w',
56 IOMode.APPEND: 'a',
57 IOMode.STREAM: 'r',
58 IOMode.BATCH: 'r',
59 }
60 return mode_map.get(mode, 'r')
62 def _dict_to_csv_row(self, data: Dict[str, Any]) -> List[Any]:
63 """Convert dictionary to CSV row."""
64 return list(data.values())
66 def _csv_row_to_dict(self, row: List[Any], headers: List[str] | None = None) -> Dict[str, Any]:
67 """Convert CSV row to dictionary."""
68 if headers:
69 return dict(zip(headers, row, strict=False))
70 return {f'col_{i}': val for i, val in enumerate(row)}
73class AsyncFileProvider(AsyncIOProvider):
74 """Async file I/O provider."""
76 def __init__(self, config: IOConfig):
77 super().__init__(config)
78 self.file_handle = None
79 self.adapter = FileIOAdapter()
81 async def open(self) -> None:
82 """Open file for async I/O."""
83 mode = self.adapter._get_file_mode(self.config.mode)
84 self.file_handle = await aiofiles.open(
85 self.config.source,
86 mode=mode,
87 encoding=self.config.encoding
88 )
89 self._is_open = True
91 async def close(self) -> None:
92 """Close file handle."""
93 if self.file_handle:
94 await self.file_handle.close() # type: ignore[unreachable]
95 self._is_open = False
97 async def validate(self) -> bool:
98 """Validate file path and permissions."""
99 path = Path(self.config.source) # type: ignore
100 if self.config.mode == IOMode.READ:
101 return path.exists() and path.is_file()
102 return path.parent.exists()
104 async def read(self, **kwargs) -> Any:
105 """Read entire file."""
106 if not self.file_handle:
107 await self.open()
108 content = await self.file_handle.read()
109 return self._parse_content(content)
111 async def write(self, data: Any, **kwargs) -> None:
112 """Write data to file."""
113 if not self.file_handle:
114 await self.open()
115 content = self._format_content(data)
116 await self.file_handle.write(content)
118 async def stream_read(self, **kwargs) -> AsyncIterator[Any]:
119 """Stream read file line by line."""
120 if not self.file_handle:
121 await self.open()
122 async for line in self.file_handle:
123 yield self._parse_line(line)
125 async def stream_write(self, data_stream: AsyncIterator[Any], **kwargs) -> None:
126 """Stream write data to file."""
127 if not self.file_handle:
128 await self.open()
129 async for data in data_stream:
130 content = self._format_content(data)
131 await self.file_handle.write(content + '\n')
133 async def batch_read(self, batch_size: int | None = None, **kwargs) -> AsyncIterator[List[Any]]:
134 """Read file in batches."""
135 batch_size = batch_size or self.config.batch_size
136 batch = []
137 async for item in self.stream_read(**kwargs):
138 batch.append(item)
139 if len(batch) >= batch_size:
140 yield batch
141 batch = []
142 if batch:
143 yield batch
145 async def batch_write(self, batches: AsyncIterator[List[Any]], **kwargs) -> None:
146 """Write data in batches."""
147 async for batch in batches:
148 for item in batch:
149 await self.write(item, **kwargs)
151 def _parse_content(self, content: str) -> Any:
152 """Parse file content based on format."""
153 if self.config.format == IOFormat.JSON:
154 return json.loads(content)
155 elif self.config.format == IOFormat.CSV:
156 return list(csv.DictReader(content.splitlines()))
157 return content
159 def _parse_line(self, line: str) -> Any:
160 """Parse single line based on format."""
161 line = line.strip()
162 if self.config.format == IOFormat.JSON:
163 return json.loads(line)
164 return line
166 def _format_content(self, data: Any) -> str:
167 """Format data for writing based on format."""
168 if self.config.format == IOFormat.JSON:
169 return json.dumps(data, indent=2)
170 return str(data)
173class SyncFileProvider(SyncIOProvider):
174 """Synchronous file I/O provider."""
176 def __init__(self, config: IOConfig):
177 super().__init__(config)
178 self.file_handle = None
179 self.adapter = FileIOAdapter()
181 def open(self) -> None:
182 """Open file for sync I/O."""
183 mode = self.adapter._get_file_mode(self.config.mode)
184 self.file_handle = open(
185 self.config.source, # type: ignore
186 mode=mode,
187 encoding=self.config.encoding,
188 buffering=self.config.buffer_size
189 )
190 self._is_open = True
192 def close(self) -> None:
193 """Close file handle."""
194 if self.file_handle:
195 self.file_handle.close() # type: ignore[unreachable]
196 self._is_open = False
198 def validate(self) -> bool:
199 """Validate file path and permissions."""
200 path = Path(self.config.source) # type: ignore
201 if self.config.mode == IOMode.READ:
202 return path.exists() and path.is_file()
203 return path.parent.exists()
205 def read(self, **kwargs) -> Any:
206 """Read entire file."""
207 if not self.file_handle:
208 self.open()
209 return self.file_handle.read()
211 def write(self, data: Any, **kwargs) -> None:
212 """Write data to file."""
213 if not self.file_handle:
214 self.open()
215 self.file_handle.write(str(data))
217 def stream_read(self, **kwargs) -> Iterator[Any]:
218 """Stream read file line by line."""
219 if not self.file_handle:
220 self.open()
221 for line in self.file_handle:
222 yield line.strip()
224 def stream_write(self, data_stream: Iterator[Any], **kwargs) -> None:
225 """Stream write data to file."""
226 if not self.file_handle:
227 self.open()
228 for data in data_stream:
229 self.file_handle.write(str(data) + '\n')
231 def batch_read(self, batch_size: int | None = None, **kwargs) -> Iterator[List[Any]]:
232 """Read file in batches."""
233 batch_size = batch_size or self.config.batch_size
234 batch = []
235 for item in self.stream_read(**kwargs):
236 batch.append(item)
237 if len(batch) >= batch_size:
238 yield batch
239 batch = []
240 if batch:
241 yield batch
243 def batch_write(self, batches: Iterator[List[Any]], **kwargs) -> None:
244 """Write data in batches."""
245 for batch in batches:
246 for item in batch:
247 self.write(item, **kwargs)
250class DatabaseIOAdapter(IOAdapter):
251 """Adapter for database I/O operations."""
253 def adapt_config(self, config: IOConfig) -> Dict[str, Any]:
254 """Adapt configuration for database operations."""
255 if isinstance(config.source, dict):
256 return config.source
257 # Parse connection string if needed
258 return {'connection_string': config.source}
260 def adapt_data(self, data: Any, direction: IOMode) -> Any:
261 """Adapt data format for database operations."""
262 if isinstance(data, Record):
263 return data.to_dict() if direction == IOMode.WRITE else data
264 return data
266 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider:
267 """Create database I/O provider."""
268 if is_async:
269 return AsyncDatabaseProvider(config)
270 return SyncDatabaseProvider(config)
273class AsyncDatabaseProvider(AsyncIOProvider):
274 """Async database I/O provider."""
276 def __init__(self, config: IOConfig):
277 super().__init__(config)
278 self.db = None
279 self.adapter = DatabaseIOAdapter()
281 async def open(self) -> None:
282 """Open database connection."""
283 db_config = self.adapter.adapt_config(self.config)
284 self.db = await AsyncDatabase.create(
285 db_config.get('type', 'postgresql'),
286 db_config # type: ignore
287 )
288 self._is_open = True
290 async def close(self) -> None:
291 """Close database connection."""
292 if self.db:
293 await self.db.close() # type: ignore[unreachable]
294 self._is_open = False
296 async def validate(self) -> bool:
297 """Validate database connection."""
298 try:
299 if self.db:
300 # Test connection with simple query
301 await self.db.execute("SELECT 1") # type: ignore[unreachable]
302 return True
303 except Exception:
304 return False
305 return False
307 async def read(self, query: Union[str, Query] = None, **kwargs) -> List[Dict[str, Any]]:
308 """Read data from database."""
309 if not self.db:
310 await self.open()
311 if isinstance(query, str):
312 query = Query(query) # type: ignore
313 results = await self.db.read(query)
314 return [r.to_dict() for r in results]
316 async def write(self, data: Any, table: str = None, **kwargs) -> None:
317 """Write data to database."""
318 if not self.db:
319 await self.open()
320 if isinstance(data, dict):
321 data = [data]
322 for item in data:
323 await self.db.upsert(table, item)
325 async def stream_read(self, query: Union[str, Query] = None, **kwargs) -> AsyncIterator[Dict[str, Any]]:
326 """Stream read from database."""
327 if not self.db:
328 await self.open()
329 if isinstance(query, str):
330 query = Query(query) # type: ignore
331 async for record in self.db.stream_read(query):
332 yield record.to_dict()
334 async def stream_write(self, data_stream: AsyncIterator[Any], table: str = None, **kwargs) -> None:
335 """Stream write to database."""
336 if not self.db:
337 await self.open()
338 async for data in data_stream:
339 await self.db.upsert(table, data)
341 async def batch_read(self, query: Union[str, Query] = None, batch_size: int | None = None, **kwargs) -> AsyncIterator[List[Dict[str, Any]]]:
342 """Read from database in batches."""
343 batch_size = batch_size or self.config.batch_size
344 batch = []
345 async for item in self.stream_read(query, **kwargs):
346 batch.append(item)
347 if len(batch) >= batch_size:
348 yield batch
349 batch = []
350 if batch:
351 yield batch
353 async def batch_write(self, batches: AsyncIterator[List[Any]], table: str = None, **kwargs) -> None:
354 """Write to database in batches."""
355 if not self.db:
356 await self.open()
357 async for batch in batches:
358 # Use bulk insert if available
359 await self.db.bulk_upsert(table, batch)
362class SyncDatabaseProvider(SyncIOProvider):
363 """Synchronous database I/O provider."""
365 def __init__(self, config: IOConfig):
366 super().__init__(config)
367 self.db = None
368 self.adapter = DatabaseIOAdapter()
370 def open(self) -> None:
371 """Open database connection."""
372 import sqlite3
373 # For sync operations, use sqlite3 as a simple fallback
374 db_config = self.adapter.adapt_config(self.config)
375 db_path = db_config.get('path', ':memory:')
376 self.db = sqlite3.connect(db_path)
377 self.db.row_factory = sqlite3.Row # Enable dict-like access
378 self._is_open = True
380 def close(self) -> None:
381 """Close database connection."""
382 if self.db:
383 self.db.close() # type: ignore[unreachable]
384 self._is_open = False
386 def validate(self) -> bool:
387 """Validate database connection."""
388 try:
389 if self.db:
390 # Test connection with simple query
391 self.db.execute("SELECT 1").fetchone() # type: ignore[unreachable]
392 return True
393 except Exception:
394 return False
395 return False
397 def read(self, query: str = None, **kwargs) -> List[Dict[str, Any]]:
398 """Read data from database."""
399 if not self.db:
400 self.open()
401 if not query:
402 query = "SELECT * FROM data"
403 cursor = self.db.execute(query)
404 return [dict(row) for row in cursor.fetchall()]
406 def write(self, data: Any, table: str = "data", **kwargs) -> None:
407 """Write data to database."""
408 if not self.db:
409 self.open()
410 if isinstance(data, dict):
411 data = [data]
412 for item in data:
413 # Simple upsert using INSERT OR REPLACE
414 columns = ', '.join(item.keys())
415 placeholders = ', '.join(['?' for _ in item.keys()])
416 query = f"INSERT OR REPLACE INTO {table} ({columns}) VALUES ({placeholders})"
417 self.db.execute(query, list(item.values()))
418 self.db.commit()
420 def stream_read(self, query: str = None, **kwargs) -> Iterator[Dict[str, Any]]:
421 """Stream read from database."""
422 if not self.db:
423 self.open()
424 if not query:
425 query = "SELECT * FROM data"
426 cursor = self.db.execute(query)
427 for row in cursor:
428 yield dict(row)
430 def stream_write(self, data_stream: Iterator[Any], table: str = "data", **kwargs) -> None:
431 """Stream write to database."""
432 if not self.db:
433 self.open()
434 for data in data_stream:
435 self.write(data, table, **kwargs)
437 def batch_read(self, query: str = None, batch_size: int | None = None, **kwargs) -> Iterator[List[Dict[str, Any]]]:
438 """Read from database in batches."""
439 batch_size = batch_size or self.config.batch_size
440 batch = []
441 for item in self.stream_read(query, **kwargs):
442 batch.append(item)
443 if len(batch) >= batch_size:
444 yield batch
445 batch = []
446 if batch:
447 yield batch
449 def batch_write(self, batches: Iterator[List[Any]], table: str = "data", **kwargs) -> None:
450 """Write to database in batches."""
451 if not self.db:
452 self.open()
453 for batch in batches:
454 self.write(batch, table, **kwargs)
457class HTTPIOAdapter(IOAdapter):
458 """Adapter for HTTP/API I/O operations."""
460 def adapt_config(self, config: IOConfig) -> Dict[str, Any]:
461 """Adapt configuration for HTTP operations."""
462 return {
463 'url': config.source,
464 'headers': config.headers or {},
465 'timeout': config.timeout,
466 'retry_count': config.retry_count,
467 }
469 def adapt_data(self, data: Any, direction: IOMode) -> Any:
470 """Adapt data format for HTTP operations."""
471 if direction == IOMode.WRITE and not isinstance(data, (str, bytes)):
472 return json.dumps(data)
473 elif direction == IOMode.READ and isinstance(data, bytes):
474 return json.loads(data.decode('utf-8'))
475 return data
477 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider:
478 """Create HTTP I/O provider."""
479 if is_async:
480 return AsyncHTTPProvider(config)
481 return SyncHTTPProvider(config)
484class AsyncHTTPProvider(AsyncIOProvider):
485 """Async HTTP/API I/O provider."""
487 def __init__(self, config: IOConfig):
488 super().__init__(config)
489 self.session = None
490 self.adapter = HTTPIOAdapter()
492 async def open(self) -> None:
493 """Open HTTP session."""
494 import aiohttp
495 self.session = aiohttp.ClientSession(
496 headers=self.config.headers,
497 timeout=aiohttp.ClientTimeout(total=self.config.timeout)
498 )
499 self._is_open = True
501 async def close(self) -> None:
502 """Close HTTP session."""
503 if self.session:
504 await self.session.close() # type: ignore[unreachable]
505 self._is_open = False
507 async def validate(self) -> bool:
508 """Validate HTTP endpoint."""
509 try:
510 if self.session:
511 async with self.session.head(self.config.source) as response: # type: ignore[unreachable]
512 return response.status < 400
513 except Exception:
514 return False
515 return False
517 async def read(self, **kwargs) -> Any:
518 """Read data from HTTP endpoint."""
519 if not self.session:
520 await self.open()
521 async with self.session.get(self.config.source, **kwargs) as response:
522 response.raise_for_status()
523 if 'json' in response.content_type:
524 return await response.json()
525 return await response.text()
527 async def write(self, data: Any, **kwargs) -> None:
528 """Write data to HTTP endpoint."""
529 if not self.session:
530 await self.open()
531 json_data = self.adapter.adapt_data(data, IOMode.WRITE)
532 async with self.session.post(
533 self.config.source,
534 data=json_data,
535 **kwargs
536 ) as response:
537 response.raise_for_status()
539 async def stream_read(self, **kwargs) -> AsyncIterator[Any]:
540 """Stream read from HTTP endpoint (e.g., SSE)."""
541 if not self.session:
542 await self.open()
543 async with self.session.get(self.config.source, **kwargs) as response:
544 response.raise_for_status()
545 async for line in response.content:
546 if line:
547 yield json.loads(line.decode('utf-8'))
549 async def stream_write(self, data_stream: AsyncIterator[Any], **kwargs) -> None:
550 """Stream write to HTTP endpoint using chunked transfer encoding.
552 This method supports both chunked upload for large files and
553 streaming of multiple records to an API endpoint.
555 Args:
556 data_stream: Async iterator of data chunks or records
557 **kwargs: Additional arguments including:
558 - content_type: Content type for the upload (default: application/octet-stream)
559 - chunk_size: Size of chunks for file uploads (default: 8192)
560 - upload_mode: 'chunked' for file uploads, 'stream' for record streaming
561 """
562 upload_mode = kwargs.pop('upload_mode', 'stream')
564 if upload_mode == 'chunked':
565 await self._chunked_file_upload(data_stream, **kwargs)
566 else:
567 await self._stream_records(data_stream, **kwargs)
569 async def _chunked_file_upload(self, data_stream: AsyncIterator[Any], **kwargs) -> None:
570 """Upload a file using chunked transfer encoding.
572 Args:
573 data_stream: Async iterator yielding file chunks (bytes)
574 **kwargs: Additional arguments
575 """
576 import aiohttp
578 url = kwargs.get('url', self.config.source)
579 headers = kwargs.get('headers', self.config.headers or {})
580 headers['Transfer-Encoding'] = 'chunked'
581 content_type = kwargs.get('content_type', 'application/octet-stream')
582 headers['Content-Type'] = content_type
584 async def chunk_generator():
585 """Generate chunks for upload."""
586 async for chunk in data_stream:
587 if isinstance(chunk, str):
588 encoded_chunk = chunk.encode('utf-8')
589 yield encoded_chunk
590 else:
591 yield chunk
593 async with aiohttp.ClientSession() as session:
594 async with session.post(
595 url,
596 data=chunk_generator(),
597 headers=headers,
598 timeout=aiohttp.ClientTimeout(total=self.config.timeout)
599 ) as response:
600 response.raise_for_status()
601 return await response.json() if response.content_type == 'application/json' else await response.text() # type: ignore
603 async def _stream_records(self, data_stream: AsyncIterator[Any], **kwargs) -> None:
604 """Stream individual records to an API endpoint.
606 Args:
607 data_stream: Async iterator yielding records
608 **kwargs: Additional arguments
609 """
610 # Existing implementation for streaming individual records
611 async for data in data_stream:
612 await self.write(data, **kwargs)
614 async def chunked_upload_from_file(self, file_path: str, chunk_size: int = 8192, **kwargs) -> Any:
615 """Upload a file in chunks.
617 Args:
618 file_path: Path to the file to upload
619 chunk_size: Size of each chunk in bytes
620 **kwargs: Additional arguments for the upload
622 Returns:
623 Response from the server
624 """
625 import aiofiles
627 async def read_chunks():
628 """Read file in chunks."""
629 async with aiofiles.open(file_path, 'rb') as f:
630 while True:
631 chunk = await f.read(chunk_size)
632 if not chunk:
633 break
634 yield chunk
636 kwargs['upload_mode'] = 'chunked'
637 kwargs['chunk_size'] = chunk_size
638 return await self.stream_write(read_chunks(), **kwargs)
640 async def batch_read(self, batch_size: int | None = None, **kwargs) -> AsyncIterator[List[Any]]:
641 """Read from HTTP endpoint in batches (pagination)."""
642 batch_size = batch_size or self.config.batch_size
643 page = 0
644 while True:
645 params = kwargs.get('params', {})
646 params.update({'page': page, 'limit': batch_size})
647 kwargs['params'] = params
649 data = await self.read(**kwargs)
650 if not data:
651 break
653 yield data if isinstance(data, list) else [data]
654 page += 1
656 async def batch_write(self, batches: AsyncIterator[List[Any]], **kwargs) -> None:
657 """Write to HTTP endpoint in batches."""
658 async for batch in batches:
659 # Send batch as single request
660 await self.write(batch, **kwargs)
663class SyncHTTPProvider(SyncIOProvider):
664 """Synchronous HTTP/API I/O provider."""
666 def __init__(self, config: IOConfig):
667 super().__init__(config)
668 self.session = None
669 self.adapter = HTTPIOAdapter()
671 def open(self) -> None:
672 """Open HTTP session."""
673 import requests
674 self.session = requests.Session()
675 if self.config.headers:
676 self.session.headers.update(self.config.headers)
677 self._is_open = True
679 def close(self) -> None:
680 """Close HTTP session."""
681 if self.session:
682 self.session.close() # type: ignore[unreachable]
683 self._is_open = False
685 def validate(self) -> bool:
686 """Validate HTTP endpoint."""
687 try:
688 if self.session:
689 response = self.session.head( # type: ignore[unreachable]
690 self.config.source,
691 timeout=self.config.timeout or 30
692 )
693 return response.status_code < 400
694 except Exception:
695 return False
696 return False
698 def read(self, **kwargs) -> Any:
699 """Read data from HTTP endpoint."""
700 if not self.session:
701 self.open()
702 response = self.session.get(
703 self.config.source,
704 timeout=self.config.timeout or 30,
705 **kwargs
706 )
707 response.raise_for_status()
709 if 'json' in response.headers.get('content-type', '').lower():
710 return response.json()
711 return response.text
713 def write(self, data: Any, **kwargs) -> None:
714 """Write data to HTTP endpoint."""
715 if not self.session:
716 self.open()
717 json_data = self.adapter.adapt_data(data, IOMode.WRITE)
718 response = self.session.post(
719 self.config.source,
720 data=json_data,
721 timeout=self.config.timeout or 30,
722 **kwargs
723 )
724 response.raise_for_status()
726 def stream_read(self, **kwargs) -> Iterator[Any]:
727 """Stream read from HTTP endpoint."""
728 if not self.session:
729 self.open()
730 response = self.session.get(
731 self.config.source,
732 stream=True,
733 timeout=self.config.timeout or 30,
734 **kwargs
735 )
736 response.raise_for_status()
738 for line in response.iter_lines():
739 if line:
740 try:
741 yield json.loads(line.decode('utf-8'))
742 except json.JSONDecodeError:
743 yield line.decode('utf-8')
745 def stream_write(self, data_stream: Iterator[Any], **kwargs) -> None:
746 """Stream write to HTTP endpoint."""
747 # For sync HTTP, write each item individually
748 for data in data_stream:
749 self.write(data, **kwargs)
751 def batch_read(self, batch_size: int | None = None, **kwargs) -> Iterator[List[Any]]:
752 """Read from HTTP endpoint in batches (pagination)."""
753 batch_size = batch_size or self.config.batch_size
754 page = 0
755 while True:
756 params = kwargs.get('params', {})
757 params.update({'page': page, 'limit': batch_size})
758 kwargs['params'] = params
760 data = self.read(**kwargs)
761 if not data:
762 break
764 yield data if isinstance(data, list) else [data]
765 page += 1
767 def batch_write(self, batches: Iterator[List[Any]], **kwargs) -> None:
768 """Write to HTTP endpoint in batches."""
769 for batch in batches:
770 # Send batch as single request
771 self.write(batch, **kwargs)
774class StreamIOAdapter(IOAdapter):
775 """Adapter for stream-based I/O operations."""
777 def adapt_config(self, config: IOConfig) -> Dict[str, Any]:
778 """Adapt configuration for stream operations."""
779 return {
780 'buffer_size': config.buffer_size,
781 'chunk_size': config.batch_size,
782 }
784 def adapt_data(self, data: Any, direction: IOMode) -> Any:
785 """Adapt data format for stream operations."""
786 return data
788 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider:
789 """Create stream I/O provider."""
790 # Determine the underlying source type and create appropriate provider
791 if isinstance(config.source, str):
792 if config.source.startswith(('http://', 'https://')):
793 return HTTPIOAdapter().create_provider(config, is_async)
794 else:
795 return FileIOAdapter().create_provider(config, is_async)
796 elif isinstance(config.source, dict):
797 return DatabaseIOAdapter().create_provider(config, is_async)
798 raise ValueError(f"Unsupported source type: {type(config.source)}")