Coverage for src/dataknobs_fsm/io/adapters.py: 0%

483 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 16:46 -0600

1"""I/O adapters for specific data sources. 

2 

3This module provides adapters for different I/O sources like files, databases, and APIs. 

4""" 

5 

6import json 

7import csv 

8from pathlib import Path 

9from typing import Any, Dict, List, Union, AsyncIterator, Iterator 

10import aiofiles 

11from dataknobs_data import AsyncDatabase, Record, Query 

12 

13from .base import ( 

14 IOConfig, IOMode, IOFormat, IOProvider, 

15 AsyncIOProvider, SyncIOProvider, IOAdapter 

16) 

17 

18 

19class FileIOAdapter(IOAdapter): 

20 """Adapter for file-based I/O operations.""" 

21 

22 def adapt_config(self, config: IOConfig) -> Dict[str, Any]: 

23 """Adapt configuration for file operations.""" 

24 return { 

25 'path': config.source, 

26 'mode': self._get_file_mode(config.mode), 

27 'encoding': config.encoding, 

28 'buffering': config.buffer_size, 

29 } 

30 

31 def adapt_data(self, data: Any, direction: IOMode) -> Any: 

32 """Adapt data format for file operations.""" 

33 if self.format == IOFormat.JSON: 

34 if direction == IOMode.WRITE: 

35 return json.dumps(data) 

36 else: 

37 return json.loads(data) if isinstance(data, str) else data 

38 elif self.format == IOFormat.CSV: 

39 if direction == IOMode.WRITE: 

40 return self._dict_to_csv_row(data) 

41 else: 

42 return self._csv_row_to_dict(data) 

43 return data 

44 

45 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider: 

46 """Create file I/O provider.""" 

47 if is_async: 

48 return AsyncFileProvider(config) 

49 return SyncFileProvider(config) 

50 

51 def _get_file_mode(self, mode: IOMode) -> str: 

52 """Convert IOMode to file mode string.""" 

53 mode_map = { 

54 IOMode.READ: 'r', 

55 IOMode.WRITE: 'w', 

56 IOMode.APPEND: 'a', 

57 IOMode.STREAM: 'r', 

58 IOMode.BATCH: 'r', 

59 } 

60 return mode_map.get(mode, 'r') 

61 

62 def _dict_to_csv_row(self, data: Dict[str, Any]) -> List[Any]: 

63 """Convert dictionary to CSV row.""" 

64 return list(data.values()) 

65 

66 def _csv_row_to_dict(self, row: List[Any], headers: List[str] | None = None) -> Dict[str, Any]: 

67 """Convert CSV row to dictionary.""" 

68 if headers: 

69 return dict(zip(headers, row, strict=False)) 

70 return {f'col_{i}': val for i, val in enumerate(row)} 

71 

72 

73class AsyncFileProvider(AsyncIOProvider): 

74 """Async file I/O provider.""" 

75 

76 def __init__(self, config: IOConfig): 

77 super().__init__(config) 

78 self.file_handle = None 

79 self.adapter = FileIOAdapter() 

80 

81 async def open(self) -> None: 

82 """Open file for async I/O.""" 

83 mode = self.adapter._get_file_mode(self.config.mode) 

84 self.file_handle = await aiofiles.open( 

85 self.config.source, 

86 mode=mode, 

87 encoding=self.config.encoding 

88 ) 

89 self._is_open = True 

90 

91 async def close(self) -> None: 

92 """Close file handle.""" 

93 if self.file_handle: 

94 await self.file_handle.close() # type: ignore[unreachable] 

95 self._is_open = False 

96 

97 async def validate(self) -> bool: 

98 """Validate file path and permissions.""" 

99 path = Path(self.config.source) # type: ignore 

100 if self.config.mode == IOMode.READ: 

101 return path.exists() and path.is_file() 

102 return path.parent.exists() 

103 

104 async def read(self, **kwargs) -> Any: 

105 """Read entire file.""" 

106 if not self.file_handle: 

107 await self.open() 

108 content = await self.file_handle.read() 

109 return self._parse_content(content) 

110 

111 async def write(self, data: Any, **kwargs) -> None: 

112 """Write data to file.""" 

113 if not self.file_handle: 

114 await self.open() 

115 content = self._format_content(data) 

116 await self.file_handle.write(content) 

117 

118 async def stream_read(self, **kwargs) -> AsyncIterator[Any]: 

119 """Stream read file line by line.""" 

120 if not self.file_handle: 

121 await self.open() 

122 async for line in self.file_handle: 

123 yield self._parse_line(line) 

124 

125 async def stream_write(self, data_stream: AsyncIterator[Any], **kwargs) -> None: 

126 """Stream write data to file.""" 

127 if not self.file_handle: 

128 await self.open() 

129 async for data in data_stream: 

130 content = self._format_content(data) 

131 await self.file_handle.write(content + '\n') 

132 

133 async def batch_read(self, batch_size: int | None = None, **kwargs) -> AsyncIterator[List[Any]]: 

134 """Read file in batches.""" 

135 batch_size = batch_size or self.config.batch_size 

136 batch = [] 

137 async for item in self.stream_read(**kwargs): 

138 batch.append(item) 

139 if len(batch) >= batch_size: 

140 yield batch 

141 batch = [] 

142 if batch: 

143 yield batch 

144 

145 async def batch_write(self, batches: AsyncIterator[List[Any]], **kwargs) -> None: 

146 """Write data in batches.""" 

147 async for batch in batches: 

148 for item in batch: 

149 await self.write(item, **kwargs) 

150 

151 def _parse_content(self, content: str) -> Any: 

152 """Parse file content based on format.""" 

153 if self.config.format == IOFormat.JSON: 

154 return json.loads(content) 

155 elif self.config.format == IOFormat.CSV: 

156 return list(csv.DictReader(content.splitlines())) 

157 return content 

158 

159 def _parse_line(self, line: str) -> Any: 

160 """Parse single line based on format.""" 

161 line = line.strip() 

162 if self.config.format == IOFormat.JSON: 

163 return json.loads(line) 

164 return line 

165 

166 def _format_content(self, data: Any) -> str: 

167 """Format data for writing based on format.""" 

168 if self.config.format == IOFormat.JSON: 

169 return json.dumps(data, indent=2) 

170 return str(data) 

171 

172 

173class SyncFileProvider(SyncIOProvider): 

174 """Synchronous file I/O provider.""" 

175 

176 def __init__(self, config: IOConfig): 

177 super().__init__(config) 

178 self.file_handle = None 

179 self.adapter = FileIOAdapter() 

180 

181 def open(self) -> None: 

182 """Open file for sync I/O.""" 

183 mode = self.adapter._get_file_mode(self.config.mode) 

184 self.file_handle = open( 

185 self.config.source, # type: ignore 

186 mode=mode, 

187 encoding=self.config.encoding, 

188 buffering=self.config.buffer_size 

189 ) 

190 self._is_open = True 

191 

192 def close(self) -> None: 

193 """Close file handle.""" 

194 if self.file_handle: 

195 self.file_handle.close() # type: ignore[unreachable] 

196 self._is_open = False 

197 

198 def validate(self) -> bool: 

199 """Validate file path and permissions.""" 

200 path = Path(self.config.source) # type: ignore 

201 if self.config.mode == IOMode.READ: 

202 return path.exists() and path.is_file() 

203 return path.parent.exists() 

204 

205 def read(self, **kwargs) -> Any: 

206 """Read entire file.""" 

207 if not self.file_handle: 

208 self.open() 

209 return self.file_handle.read() 

210 

211 def write(self, data: Any, **kwargs) -> None: 

212 """Write data to file.""" 

213 if not self.file_handle: 

214 self.open() 

215 self.file_handle.write(str(data)) 

216 

217 def stream_read(self, **kwargs) -> Iterator[Any]: 

218 """Stream read file line by line.""" 

219 if not self.file_handle: 

220 self.open() 

221 for line in self.file_handle: 

222 yield line.strip() 

223 

224 def stream_write(self, data_stream: Iterator[Any], **kwargs) -> None: 

225 """Stream write data to file.""" 

226 if not self.file_handle: 

227 self.open() 

228 for data in data_stream: 

229 self.file_handle.write(str(data) + '\n') 

230 

231 def batch_read(self, batch_size: int | None = None, **kwargs) -> Iterator[List[Any]]: 

232 """Read file in batches.""" 

233 batch_size = batch_size or self.config.batch_size 

234 batch = [] 

235 for item in self.stream_read(**kwargs): 

236 batch.append(item) 

237 if len(batch) >= batch_size: 

238 yield batch 

239 batch = [] 

240 if batch: 

241 yield batch 

242 

243 def batch_write(self, batches: Iterator[List[Any]], **kwargs) -> None: 

244 """Write data in batches.""" 

245 for batch in batches: 

246 for item in batch: 

247 self.write(item, **kwargs) 

248 

249 

250class DatabaseIOAdapter(IOAdapter): 

251 """Adapter for database I/O operations.""" 

252 

253 def adapt_config(self, config: IOConfig) -> Dict[str, Any]: 

254 """Adapt configuration for database operations.""" 

255 if isinstance(config.source, dict): 

256 return config.source 

257 # Parse connection string if needed 

258 return {'connection_string': config.source} 

259 

260 def adapt_data(self, data: Any, direction: IOMode) -> Any: 

261 """Adapt data format for database operations.""" 

262 if isinstance(data, Record): 

263 return data.to_dict() if direction == IOMode.WRITE else data 

264 return data 

265 

266 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider: 

267 """Create database I/O provider.""" 

268 if is_async: 

269 return AsyncDatabaseProvider(config) 

270 return SyncDatabaseProvider(config) 

271 

272 

273class AsyncDatabaseProvider(AsyncIOProvider): 

274 """Async database I/O provider.""" 

275 

276 def __init__(self, config: IOConfig): 

277 super().__init__(config) 

278 self.db = None 

279 self.adapter = DatabaseIOAdapter() 

280 

281 async def open(self) -> None: 

282 """Open database connection.""" 

283 db_config = self.adapter.adapt_config(self.config) 

284 self.db = await AsyncDatabase.create( 

285 db_config.get('type', 'postgresql'), 

286 db_config # type: ignore 

287 ) 

288 self._is_open = True 

289 

290 async def close(self) -> None: 

291 """Close database connection.""" 

292 if self.db: 

293 await self.db.close() # type: ignore[unreachable] 

294 self._is_open = False 

295 

296 async def validate(self) -> bool: 

297 """Validate database connection.""" 

298 try: 

299 if self.db: 

300 # Test connection with simple query 

301 await self.db.execute("SELECT 1") # type: ignore[unreachable] 

302 return True 

303 except Exception: 

304 return False 

305 return False 

306 

307 async def read(self, query: Union[str, Query] = None, **kwargs) -> List[Dict[str, Any]]: 

308 """Read data from database.""" 

309 if not self.db: 

310 await self.open() 

311 if isinstance(query, str): 

312 query = Query(query) # type: ignore 

313 results = await self.db.read(query) 

314 return [r.to_dict() for r in results] 

315 

316 async def write(self, data: Any, table: str = None, **kwargs) -> None: 

317 """Write data to database.""" 

318 if not self.db: 

319 await self.open() 

320 if isinstance(data, dict): 

321 data = [data] 

322 for item in data: 

323 await self.db.upsert(table, item) 

324 

325 async def stream_read(self, query: Union[str, Query] = None, **kwargs) -> AsyncIterator[Dict[str, Any]]: 

326 """Stream read from database.""" 

327 if not self.db: 

328 await self.open() 

329 if isinstance(query, str): 

330 query = Query(query) # type: ignore 

331 async for record in self.db.stream_read(query): 

332 yield record.to_dict() 

333 

334 async def stream_write(self, data_stream: AsyncIterator[Any], table: str = None, **kwargs) -> None: 

335 """Stream write to database.""" 

336 if not self.db: 

337 await self.open() 

338 async for data in data_stream: 

339 await self.db.upsert(table, data) 

340 

341 async def batch_read(self, query: Union[str, Query] = None, batch_size: int | None = None, **kwargs) -> AsyncIterator[List[Dict[str, Any]]]: 

342 """Read from database in batches.""" 

343 batch_size = batch_size or self.config.batch_size 

344 batch = [] 

345 async for item in self.stream_read(query, **kwargs): 

346 batch.append(item) 

347 if len(batch) >= batch_size: 

348 yield batch 

349 batch = [] 

350 if batch: 

351 yield batch 

352 

353 async def batch_write(self, batches: AsyncIterator[List[Any]], table: str = None, **kwargs) -> None: 

354 """Write to database in batches.""" 

355 if not self.db: 

356 await self.open() 

357 async for batch in batches: 

358 # Use bulk insert if available 

359 await self.db.bulk_upsert(table, batch) 

360 

361 

362class SyncDatabaseProvider(SyncIOProvider): 

363 """Synchronous database I/O provider.""" 

364 

365 def __init__(self, config: IOConfig): 

366 super().__init__(config) 

367 self.db = None 

368 self.adapter = DatabaseIOAdapter() 

369 

370 def open(self) -> None: 

371 """Open database connection.""" 

372 import sqlite3 

373 # For sync operations, use sqlite3 as a simple fallback 

374 db_config = self.adapter.adapt_config(self.config) 

375 db_path = db_config.get('path', ':memory:') 

376 self.db = sqlite3.connect(db_path) 

377 self.db.row_factory = sqlite3.Row # Enable dict-like access 

378 self._is_open = True 

379 

380 def close(self) -> None: 

381 """Close database connection.""" 

382 if self.db: 

383 self.db.close() # type: ignore[unreachable] 

384 self._is_open = False 

385 

386 def validate(self) -> bool: 

387 """Validate database connection.""" 

388 try: 

389 if self.db: 

390 # Test connection with simple query 

391 self.db.execute("SELECT 1").fetchone() # type: ignore[unreachable] 

392 return True 

393 except Exception: 

394 return False 

395 return False 

396 

397 def read(self, query: str = None, **kwargs) -> List[Dict[str, Any]]: 

398 """Read data from database.""" 

399 if not self.db: 

400 self.open() 

401 if not query: 

402 query = "SELECT * FROM data" 

403 cursor = self.db.execute(query) 

404 return [dict(row) for row in cursor.fetchall()] 

405 

406 def write(self, data: Any, table: str = "data", **kwargs) -> None: 

407 """Write data to database.""" 

408 if not self.db: 

409 self.open() 

410 if isinstance(data, dict): 

411 data = [data] 

412 for item in data: 

413 # Simple upsert using INSERT OR REPLACE 

414 columns = ', '.join(item.keys()) 

415 placeholders = ', '.join(['?' for _ in item.keys()]) 

416 query = f"INSERT OR REPLACE INTO {table} ({columns}) VALUES ({placeholders})" 

417 self.db.execute(query, list(item.values())) 

418 self.db.commit() 

419 

420 def stream_read(self, query: str = None, **kwargs) -> Iterator[Dict[str, Any]]: 

421 """Stream read from database.""" 

422 if not self.db: 

423 self.open() 

424 if not query: 

425 query = "SELECT * FROM data" 

426 cursor = self.db.execute(query) 

427 for row in cursor: 

428 yield dict(row) 

429 

430 def stream_write(self, data_stream: Iterator[Any], table: str = "data", **kwargs) -> None: 

431 """Stream write to database.""" 

432 if not self.db: 

433 self.open() 

434 for data in data_stream: 

435 self.write(data, table, **kwargs) 

436 

437 def batch_read(self, query: str = None, batch_size: int | None = None, **kwargs) -> Iterator[List[Dict[str, Any]]]: 

438 """Read from database in batches.""" 

439 batch_size = batch_size or self.config.batch_size 

440 batch = [] 

441 for item in self.stream_read(query, **kwargs): 

442 batch.append(item) 

443 if len(batch) >= batch_size: 

444 yield batch 

445 batch = [] 

446 if batch: 

447 yield batch 

448 

449 def batch_write(self, batches: Iterator[List[Any]], table: str = "data", **kwargs) -> None: 

450 """Write to database in batches.""" 

451 if not self.db: 

452 self.open() 

453 for batch in batches: 

454 self.write(batch, table, **kwargs) 

455 

456 

457class HTTPIOAdapter(IOAdapter): 

458 """Adapter for HTTP/API I/O operations.""" 

459 

460 def adapt_config(self, config: IOConfig) -> Dict[str, Any]: 

461 """Adapt configuration for HTTP operations.""" 

462 return { 

463 'url': config.source, 

464 'headers': config.headers or {}, 

465 'timeout': config.timeout, 

466 'retry_count': config.retry_count, 

467 } 

468 

469 def adapt_data(self, data: Any, direction: IOMode) -> Any: 

470 """Adapt data format for HTTP operations.""" 

471 if direction == IOMode.WRITE and not isinstance(data, (str, bytes)): 

472 return json.dumps(data) 

473 elif direction == IOMode.READ and isinstance(data, bytes): 

474 return json.loads(data.decode('utf-8')) 

475 return data 

476 

477 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider: 

478 """Create HTTP I/O provider.""" 

479 if is_async: 

480 return AsyncHTTPProvider(config) 

481 return SyncHTTPProvider(config) 

482 

483 

484class AsyncHTTPProvider(AsyncIOProvider): 

485 """Async HTTP/API I/O provider.""" 

486 

487 def __init__(self, config: IOConfig): 

488 super().__init__(config) 

489 self.session = None 

490 self.adapter = HTTPIOAdapter() 

491 

492 async def open(self) -> None: 

493 """Open HTTP session.""" 

494 import aiohttp 

495 self.session = aiohttp.ClientSession( 

496 headers=self.config.headers, 

497 timeout=aiohttp.ClientTimeout(total=self.config.timeout) 

498 ) 

499 self._is_open = True 

500 

501 async def close(self) -> None: 

502 """Close HTTP session.""" 

503 if self.session: 

504 await self.session.close() # type: ignore[unreachable] 

505 self._is_open = False 

506 

507 async def validate(self) -> bool: 

508 """Validate HTTP endpoint.""" 

509 try: 

510 if self.session: 

511 async with self.session.head(self.config.source) as response: # type: ignore[unreachable] 

512 return response.status < 400 

513 except Exception: 

514 return False 

515 return False 

516 

517 async def read(self, **kwargs) -> Any: 

518 """Read data from HTTP endpoint.""" 

519 if not self.session: 

520 await self.open() 

521 async with self.session.get(self.config.source, **kwargs) as response: 

522 response.raise_for_status() 

523 if 'json' in response.content_type: 

524 return await response.json() 

525 return await response.text() 

526 

527 async def write(self, data: Any, **kwargs) -> None: 

528 """Write data to HTTP endpoint.""" 

529 if not self.session: 

530 await self.open() 

531 json_data = self.adapter.adapt_data(data, IOMode.WRITE) 

532 async with self.session.post( 

533 self.config.source, 

534 data=json_data, 

535 **kwargs 

536 ) as response: 

537 response.raise_for_status() 

538 

539 async def stream_read(self, **kwargs) -> AsyncIterator[Any]: 

540 """Stream read from HTTP endpoint (e.g., SSE).""" 

541 if not self.session: 

542 await self.open() 

543 async with self.session.get(self.config.source, **kwargs) as response: 

544 response.raise_for_status() 

545 async for line in response.content: 

546 if line: 

547 yield json.loads(line.decode('utf-8')) 

548 

549 async def stream_write(self, data_stream: AsyncIterator[Any], **kwargs) -> None: 

550 """Stream write to HTTP endpoint using chunked transfer encoding. 

551  

552 This method supports both chunked upload for large files and 

553 streaming of multiple records to an API endpoint. 

554  

555 Args: 

556 data_stream: Async iterator of data chunks or records 

557 **kwargs: Additional arguments including: 

558 - content_type: Content type for the upload (default: application/octet-stream) 

559 - chunk_size: Size of chunks for file uploads (default: 8192) 

560 - upload_mode: 'chunked' for file uploads, 'stream' for record streaming 

561 """ 

562 upload_mode = kwargs.pop('upload_mode', 'stream') 

563 

564 if upload_mode == 'chunked': 

565 await self._chunked_file_upload(data_stream, **kwargs) 

566 else: 

567 await self._stream_records(data_stream, **kwargs) 

568 

569 async def _chunked_file_upload(self, data_stream: AsyncIterator[Any], **kwargs) -> None: 

570 """Upload a file using chunked transfer encoding. 

571  

572 Args: 

573 data_stream: Async iterator yielding file chunks (bytes) 

574 **kwargs: Additional arguments 

575 """ 

576 import aiohttp 

577 

578 url = kwargs.get('url', self.config.source) 

579 headers = kwargs.get('headers', self.config.headers or {}) 

580 headers['Transfer-Encoding'] = 'chunked' 

581 content_type = kwargs.get('content_type', 'application/octet-stream') 

582 headers['Content-Type'] = content_type 

583 

584 async def chunk_generator(): 

585 """Generate chunks for upload.""" 

586 async for chunk in data_stream: 

587 if isinstance(chunk, str): 

588 encoded_chunk = chunk.encode('utf-8') 

589 yield encoded_chunk 

590 else: 

591 yield chunk 

592 

593 async with aiohttp.ClientSession() as session: 

594 async with session.post( 

595 url, 

596 data=chunk_generator(), 

597 headers=headers, 

598 timeout=aiohttp.ClientTimeout(total=self.config.timeout) 

599 ) as response: 

600 response.raise_for_status() 

601 return await response.json() if response.content_type == 'application/json' else await response.text() # type: ignore 

602 

603 async def _stream_records(self, data_stream: AsyncIterator[Any], **kwargs) -> None: 

604 """Stream individual records to an API endpoint. 

605  

606 Args: 

607 data_stream: Async iterator yielding records 

608 **kwargs: Additional arguments 

609 """ 

610 # Existing implementation for streaming individual records 

611 async for data in data_stream: 

612 await self.write(data, **kwargs) 

613 

614 async def chunked_upload_from_file(self, file_path: str, chunk_size: int = 8192, **kwargs) -> Any: 

615 """Upload a file in chunks. 

616  

617 Args: 

618 file_path: Path to the file to upload 

619 chunk_size: Size of each chunk in bytes 

620 **kwargs: Additional arguments for the upload 

621  

622 Returns: 

623 Response from the server 

624 """ 

625 import aiofiles 

626 

627 async def read_chunks(): 

628 """Read file in chunks.""" 

629 async with aiofiles.open(file_path, 'rb') as f: 

630 while True: 

631 chunk = await f.read(chunk_size) 

632 if not chunk: 

633 break 

634 yield chunk 

635 

636 kwargs['upload_mode'] = 'chunked' 

637 kwargs['chunk_size'] = chunk_size 

638 return await self.stream_write(read_chunks(), **kwargs) 

639 

640 async def batch_read(self, batch_size: int | None = None, **kwargs) -> AsyncIterator[List[Any]]: 

641 """Read from HTTP endpoint in batches (pagination).""" 

642 batch_size = batch_size or self.config.batch_size 

643 page = 0 

644 while True: 

645 params = kwargs.get('params', {}) 

646 params.update({'page': page, 'limit': batch_size}) 

647 kwargs['params'] = params 

648 

649 data = await self.read(**kwargs) 

650 if not data: 

651 break 

652 

653 yield data if isinstance(data, list) else [data] 

654 page += 1 

655 

656 async def batch_write(self, batches: AsyncIterator[List[Any]], **kwargs) -> None: 

657 """Write to HTTP endpoint in batches.""" 

658 async for batch in batches: 

659 # Send batch as single request 

660 await self.write(batch, **kwargs) 

661 

662 

663class SyncHTTPProvider(SyncIOProvider): 

664 """Synchronous HTTP/API I/O provider.""" 

665 

666 def __init__(self, config: IOConfig): 

667 super().__init__(config) 

668 self.session = None 

669 self.adapter = HTTPIOAdapter() 

670 

671 def open(self) -> None: 

672 """Open HTTP session.""" 

673 import requests 

674 self.session = requests.Session() 

675 if self.config.headers: 

676 self.session.headers.update(self.config.headers) 

677 self._is_open = True 

678 

679 def close(self) -> None: 

680 """Close HTTP session.""" 

681 if self.session: 

682 self.session.close() # type: ignore[unreachable] 

683 self._is_open = False 

684 

685 def validate(self) -> bool: 

686 """Validate HTTP endpoint.""" 

687 try: 

688 if self.session: 

689 response = self.session.head( # type: ignore[unreachable] 

690 self.config.source, 

691 timeout=self.config.timeout or 30 

692 ) 

693 return response.status_code < 400 

694 except Exception: 

695 return False 

696 return False 

697 

698 def read(self, **kwargs) -> Any: 

699 """Read data from HTTP endpoint.""" 

700 if not self.session: 

701 self.open() 

702 response = self.session.get( 

703 self.config.source, 

704 timeout=self.config.timeout or 30, 

705 **kwargs 

706 ) 

707 response.raise_for_status() 

708 

709 if 'json' in response.headers.get('content-type', '').lower(): 

710 return response.json() 

711 return response.text 

712 

713 def write(self, data: Any, **kwargs) -> None: 

714 """Write data to HTTP endpoint.""" 

715 if not self.session: 

716 self.open() 

717 json_data = self.adapter.adapt_data(data, IOMode.WRITE) 

718 response = self.session.post( 

719 self.config.source, 

720 data=json_data, 

721 timeout=self.config.timeout or 30, 

722 **kwargs 

723 ) 

724 response.raise_for_status() 

725 

726 def stream_read(self, **kwargs) -> Iterator[Any]: 

727 """Stream read from HTTP endpoint.""" 

728 if not self.session: 

729 self.open() 

730 response = self.session.get( 

731 self.config.source, 

732 stream=True, 

733 timeout=self.config.timeout or 30, 

734 **kwargs 

735 ) 

736 response.raise_for_status() 

737 

738 for line in response.iter_lines(): 

739 if line: 

740 try: 

741 yield json.loads(line.decode('utf-8')) 

742 except json.JSONDecodeError: 

743 yield line.decode('utf-8') 

744 

745 def stream_write(self, data_stream: Iterator[Any], **kwargs) -> None: 

746 """Stream write to HTTP endpoint.""" 

747 # For sync HTTP, write each item individually 

748 for data in data_stream: 

749 self.write(data, **kwargs) 

750 

751 def batch_read(self, batch_size: int | None = None, **kwargs) -> Iterator[List[Any]]: 

752 """Read from HTTP endpoint in batches (pagination).""" 

753 batch_size = batch_size or self.config.batch_size 

754 page = 0 

755 while True: 

756 params = kwargs.get('params', {}) 

757 params.update({'page': page, 'limit': batch_size}) 

758 kwargs['params'] = params 

759 

760 data = self.read(**kwargs) 

761 if not data: 

762 break 

763 

764 yield data if isinstance(data, list) else [data] 

765 page += 1 

766 

767 def batch_write(self, batches: Iterator[List[Any]], **kwargs) -> None: 

768 """Write to HTTP endpoint in batches.""" 

769 for batch in batches: 

770 # Send batch as single request 

771 self.write(batch, **kwargs) 

772 

773 

774class StreamIOAdapter(IOAdapter): 

775 """Adapter for stream-based I/O operations.""" 

776 

777 def adapt_config(self, config: IOConfig) -> Dict[str, Any]: 

778 """Adapt configuration for stream operations.""" 

779 return { 

780 'buffer_size': config.buffer_size, 

781 'chunk_size': config.batch_size, 

782 } 

783 

784 def adapt_data(self, data: Any, direction: IOMode) -> Any: 

785 """Adapt data format for stream operations.""" 

786 return data 

787 

788 def create_provider(self, config: IOConfig, is_async: bool = True) -> IOProvider: 

789 """Create stream I/O provider.""" 

790 # Determine the underlying source type and create appropriate provider 

791 if isinstance(config.source, str): 

792 if config.source.startswith(('http://', 'https://')): 

793 return HTTPIOAdapter().create_provider(config, is_async) 

794 else: 

795 return FileIOAdapter().create_provider(config, is_async) 

796 elif isinstance(config.source, dict): 

797 return DatabaseIOAdapter().create_provider(config, is_async) 

798 raise ValueError(f"Unsupported source type: {type(config.source)}")