Coverage for src/dataknobs_fsm/storage/database.py: 14%
177 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 16:46 -0600
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 16:46 -0600
1"""Database storage backend for execution history using dataknobs_data.
3This module provides a unified storage backend that works with ANY dataknobs_data
4database backend (SQLite, PostgreSQL, MongoDB, Elasticsearch, S3, etc.) through
5the common AsyncDatabase interface.
6"""
8import time
9import uuid
10from typing import Any, Dict, List, TYPE_CHECKING
12from dataknobs_data.records import Record
13from dataknobs_data.query import Query
14from dataknobs_data.schema import DatabaseSchema, FieldSchema
16if TYPE_CHECKING:
17 from dataknobs_data.database import AsyncDatabase
19from dataknobs_fsm.core.data_modes import DataHandlingMode
20from dataknobs_fsm.execution.history import ExecutionHistory, ExecutionStep, ExecutionStatus
21from dataknobs_fsm.storage.base import BaseHistoryStorage, StorageBackend, StorageConfig, StorageFactory
24class UnifiedDatabaseStorage(BaseHistoryStorage):
25 """Unified database storage that works with any dataknobs_data backend.
27 This single implementation works with:
28 - Memory (AsyncMemoryDatabase)
29 - SQLite (AsyncSQLiteDatabase)
30 - PostgreSQL (AsyncPostgresDatabase)
31 - MongoDB (AsyncMongoDatabase)
32 - Elasticsearch (AsyncElasticsearchDatabase)
33 - S3 (AsyncS3Database)
34 - File (AsyncFileDatabase)
36 All through the same AsyncDatabase interface from dataknobs_data.
37 """
39 def __init__(self, config: StorageConfig):
40 """Initialize database storage.
42 Args:
43 config: Storage configuration with backend type in connection_params.
44 """
45 super().__init__(config)
46 self._db: AsyncDatabase | None = None
47 self._steps_db: AsyncDatabase | None = None # Separate DB for steps if needed
49 async def _setup_backend(self) -> None:
50 """Set up the database backend using dataknobs_data factory."""
51 # Extract backend type from config
52 backend_type = self.config.connection_params.get('type', 'memory')
54 # Prepare dataknobs_data configuration
55 db_config = {
56 **self.config.connection_params,
57 'schema': self._create_history_schema()
58 }
60 # Remove 'type' as it's not needed by dataknobs_data
61 db_config.pop('type', None)
63 # Use AsyncDatabaseFactory to create database instance
64 from dataknobs_data.factory import AsyncDatabaseFactory
65 factory = AsyncDatabaseFactory()
67 # The factory expects 'backend' not 'type'
68 db_config['backend'] = backend_type
70 self._db = factory.create(**db_config)
72 # Connect to the database if it has a connect method
73 if hasattr(self._db, 'connect'):
74 await self._db.connect()
76 # For steps, use the same database instance
77 # Different backends handle collections/tables differently
78 self._steps_db = self._db
80 def _create_history_schema(self) -> DatabaseSchema:
81 """Create schema for history records."""
82 from dataknobs_data.fields import FieldType
84 schema = DatabaseSchema()
86 # Core fields
87 schema.add_field(FieldSchema(
88 name='id',
89 type=FieldType.TEXT,
90 metadata={'primary_key': True}
91 ))
92 schema.add_field(FieldSchema(
93 name='execution_id',
94 type=FieldType.TEXT,
95 metadata={'indexed': True, 'unique': True}
96 ))
97 schema.add_field(FieldSchema(
98 name='fsm_name',
99 type=FieldType.TEXT,
100 metadata={'indexed': True}
101 ))
102 schema.add_field(FieldSchema(
103 name='data_mode',
104 type=FieldType.TEXT,
105 metadata={'indexed': True}
106 ))
107 schema.add_field(FieldSchema(
108 name='status',
109 type=FieldType.TEXT,
110 metadata={'indexed': True}
111 ))
113 # Timing fields
114 schema.add_field(FieldSchema(
115 name='start_time',
116 type=FieldType.FLOAT,
117 metadata={'indexed': True}
118 ))
119 schema.add_field(FieldSchema(
120 name='end_time',
121 type=FieldType.FLOAT,
122 required=False
123 ))
125 # Metrics
126 schema.add_field(FieldSchema(
127 name='total_steps',
128 type=FieldType.INTEGER,
129 default=0
130 ))
131 schema.add_field(FieldSchema(
132 name='failed_steps',
133 type=FieldType.INTEGER,
134 default=0
135 ))
136 schema.add_field(FieldSchema(
137 name='skipped_steps',
138 type=FieldType.INTEGER,
139 default=0
140 ))
142 # JSON data fields
143 schema.add_field(FieldSchema(
144 name='history_data',
145 type=FieldType.JSON
146 ))
147 schema.add_field(FieldSchema(
148 name='metadata',
149 type=FieldType.JSON
150 ))
152 # Timestamps
153 schema.add_field(FieldSchema(
154 name='created_at',
155 type=FieldType.FLOAT
156 ))
157 schema.add_field(FieldSchema(
158 name='updated_at',
159 type=FieldType.FLOAT
160 ))
162 return schema
164 def _create_steps_schema(self) -> DatabaseSchema:
165 """Create schema for step records."""
166 from dataknobs_data.fields import FieldType
168 schema = DatabaseSchema()
170 schema.add_field(FieldSchema(
171 name='id',
172 type=FieldType.TEXT,
173 metadata={'primary_key': True}
174 ))
175 schema.add_field(FieldSchema(
176 name='execution_id',
177 type=FieldType.TEXT,
178 metadata={'indexed': True}
179 ))
180 schema.add_field(FieldSchema(
181 name='step_id',
182 type=FieldType.TEXT,
183 metadata={'indexed': True}
184 ))
185 schema.add_field(FieldSchema(
186 name='parent_id',
187 type=FieldType.TEXT,
188 required=False
189 ))
190 schema.add_field(FieldSchema(
191 name='state_name',
192 type=FieldType.TEXT,
193 metadata={'indexed': True}
194 ))
195 schema.add_field(FieldSchema(
196 name='network_name',
197 type=FieldType.TEXT
198 ))
199 schema.add_field(FieldSchema(
200 name='status',
201 type=FieldType.TEXT,
202 metadata={'indexed': True}
203 ))
204 schema.add_field(FieldSchema(
205 name='timestamp',
206 type=FieldType.FLOAT,
207 metadata={'indexed': True}
208 ))
209 schema.add_field(FieldSchema(
210 name='step_data',
211 type=FieldType.JSON
212 ))
214 return schema
216 async def save_history(
217 self,
218 history: ExecutionHistory,
219 metadata: Dict[str, Any] | None = None
220 ) -> str:
221 """Save execution history to database."""
222 if not self._db:
223 await self.initialize()
225 history_id = history.execution_id
227 # Serialize history based on data mode
228 history_data = self._serialize_history(history)
230 # Create record using dataknobs_data Record
231 record = Record({
232 'id': str(uuid.uuid4()),
233 'execution_id': history_id,
234 'fsm_name': history.fsm_name,
235 'data_mode': history.data_mode.value,
236 'status': 'completed' if history.end_time else 'in_progress',
237 'start_time': history.start_time,
238 'end_time': history.end_time,
239 'total_steps': history.total_steps,
240 'failed_steps': history.failed_steps,
241 'skipped_steps': history.skipped_steps,
242 'history_data': history_data,
243 'metadata': metadata or {},
244 'created_at': time.time(),
245 'updated_at': time.time()
246 })
248 # Save using dataknobs_data interface - just pass the record
249 await self._db.upsert(record)
251 return history_id
253 async def load_history(self, history_id: str) -> ExecutionHistory | None:
254 """Load execution history from database."""
255 if not self._db:
256 await self.initialize()
258 # Query using dataknobs_data Query builder
259 query = Query().filter('execution_id', '=', history_id)
261 # Find record
262 results = await self._db.search(query)
263 record = results[0] if results else None
265 if not record:
266 return None
268 # Deserialize history
269 history = self._deserialize_history(
270 record['history_data'],
271 record['fsm_name'],
272 history_id
273 )
275 return history
277 async def save_step(
278 self,
279 execution_id: str,
280 step: ExecutionStep,
281 parent_id: str | None = None
282 ) -> str:
283 """Save a single execution step."""
284 if not self._steps_db:
285 await self.initialize()
287 # Create step record
288 record = Record({
289 'id': str(uuid.uuid4()),
290 'execution_id': execution_id,
291 'step_id': step.step_id,
292 'parent_id': parent_id,
293 'state_name': step.state_name,
294 'network_name': step.network_name,
295 'status': step.status.value,
296 'timestamp': step.timestamp,
297 'step_data': step.to_dict()
298 })
300 await self._steps_db.upsert(record)
301 return step.step_id
303 async def load_steps(
304 self,
305 execution_id: str,
306 filters: Dict[str, Any] | None = None
307 ) -> List[ExecutionStep]:
308 """Load execution steps from database."""
309 if not self._steps_db:
310 await self.initialize()
312 # Build query
313 query = Query().filter('execution_id', '=', execution_id)
315 if filters:
316 for key, value in filters.items():
317 query = query.filter(key, '=', value)
319 # Load and reconstruct steps
320 steps = []
321 results = await self._steps_db.search(query)
322 for record in results:
323 step_data = record['step_data']
325 step = ExecutionStep(
326 step_id=step_data['step_id'],
327 state_name=step_data['state_name'],
328 network_name=step_data['network_name'],
329 timestamp=step_data['timestamp'],
330 data_mode=DataHandlingMode(step_data['data_mode']),
331 status=ExecutionStatus(step_data['status'])
332 )
334 # Restore other properties
335 for attr in ['start_time', 'end_time', 'arc_taken', 'metrics',
336 'resource_usage', 'stream_progress', 'chunks_processed',
337 'records_processed']:
338 if attr in step_data:
339 setattr(step, attr, step_data[attr])
341 if step_data.get('error'):
342 step.error = Exception(step_data['error'])
344 steps.append(step)
346 return steps
348 async def query_histories(
349 self,
350 filters: Dict[str, Any],
351 limit: int = 100,
352 offset: int = 0
353 ) -> List[Dict[str, Any]]:
354 """Query execution histories."""
355 if not self._db:
356 await self.initialize()
358 # Build query using dataknobs_data Query
359 query = Query()
361 # Map filter keys to database fields
362 for key, value in filters.items():
363 if key in ['fsm_name', 'data_mode', 'status']:
364 query = query.filter(key, '=', value)
365 elif key == 'start_time_after':
366 query = query.filter('start_time', '>=', value)
367 elif key == 'start_time_before':
368 query = query.filter('start_time', '<=', value)
369 elif key == 'failed':
370 if value:
371 query = query.filter('failed_steps', '>', 0)
372 else:
373 query = query.filter('failed_steps', '=', 0)
375 # Apply pagination
376 query = query.sort_by('start_time', 'desc').limit(limit).offset(offset)
378 # Execute and return results
379 results = []
380 search_results = await self._db.search(query)
381 for record in search_results:
382 results.append({
383 'id': record['execution_id'],
384 'fsm_name': record['fsm_name'],
385 'data_mode': record['data_mode'],
386 'status': record['status'],
387 'start_time': record['start_time'],
388 'end_time': record.get_value('end_time'),
389 'total_steps': record['total_steps'],
390 'failed_steps': record['failed_steps'],
391 'metadata': record.get_value('metadata', {})
392 })
394 return results
396 async def delete_history(self, history_id: str) -> bool:
397 """Delete execution history."""
398 if not self._db:
399 await self.initialize()
401 # Find and delete history records
402 query = Query().filter('execution_id', '=', history_id)
403 records = await self._db.search(query)
405 deleted_count = 0
406 for record in records:
407 # Get the storage ID from the record
408 record_id = record.storage_id or record.get_value('id')
409 if record_id and await self._db.delete(record_id):
410 deleted_count += 1
412 # Delete associated steps
413 if self._steps_db:
414 step_query = Query().filter('execution_id', '=', history_id)
415 step_records = await self._steps_db.search(step_query)
416 for step_record in step_records:
417 step_id = step_record.storage_id or step_record.get_value('id')
418 if step_id:
419 await self._steps_db.delete(step_id)
421 return deleted_count > 0
423 async def get_statistics(
424 self,
425 execution_id: str | None = None
426 ) -> Dict[str, Any]:
427 """Get storage statistics."""
428 if not self._db:
429 await self.initialize()
431 if execution_id:
432 # Specific execution stats
433 query = Query().filter('execution_id', '=', execution_id)
435 search_results = await self._db.search(query)
436 for record in search_results:
437 return {
438 'execution_id': execution_id,
439 'fsm_name': record['fsm_name'],
440 'data_mode': record['data_mode'],
441 'status': record['status'],
442 'total_steps': record['total_steps'],
443 'failed_steps': record['failed_steps'],
444 'start_time': record['start_time'],
445 'end_time': record.get_value('end_time')
446 }
447 return {}
448 else:
449 # Overall stats
450 stats = {
451 'total_histories': 0,
452 'mode_distribution': {},
453 'status_distribution': {},
454 'backend_type': self.config.connection_params.get('type', 'unknown')
455 }
457 all_records = await self._db.search(Query())
458 for record in all_records:
459 stats['total_histories'] += 1
461 mode = record['data_mode']
462 stats['mode_distribution'][mode] = stats['mode_distribution'].get(mode, 0) + 1
464 status = record['status']
465 stats['status_distribution'][status] = stats['status_distribution'].get(status, 0) + 1
467 return stats
469 async def cleanup(
470 self,
471 before_timestamp: float | None = None,
472 keep_failed: bool = True
473 ) -> int:
474 """Clean up old histories."""
475 if not self._db:
476 await self.initialize()
478 if before_timestamp is None:
479 before_timestamp = time.time() - (7 * 86400) # 7 days
481 # Build query
482 query = Query().filter('start_time', '<', before_timestamp)
484 if keep_failed:
485 query = query.filter('failed_steps', '=', 0)
487 # Get histories to delete
488 to_delete = []
489 search_results = await self._db.search(query)
490 for record in search_results:
491 to_delete.append(record['execution_id'])
493 # Delete each
494 deleted = 0
495 for history_id in to_delete:
496 if await self.delete_history(history_id):
497 deleted += 1
499 # Close database connection if supported
500 if hasattr(self._db, 'close'):
501 await self._db.close()
503 return deleted
506# Register all backends with the same implementation
507for backend in [StorageBackend.MEMORY, StorageBackend.SQLITE,
508 StorageBackend.POSTGRES, StorageBackend.MONGODB,
509 StorageBackend.ELASTICSEARCH, StorageBackend.S3]:
510 StorageFactory.register(backend, UnifiedDatabaseStorage)