Coverage for src/dataknobs_fsm/storage/base.py: 41%

78 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 16:46 -0600

1"""Base interfaces and classes for history storage.""" 

2 

3from abc import ABC, abstractmethod 

4from enum import Enum 

5from typing import Any, Dict, List, Type 

6 

7from dataknobs_fsm.core.data_modes import DataHandlingMode 

8from dataknobs_fsm.execution.history import ExecutionHistory, ExecutionStep 

9 

10 

11class StorageBackend(Enum): 

12 """Available storage backends.""" 

13 MEMORY = "memory" 

14 FILE = "file" 

15 SQLITE = "sqlite" 

16 POSTGRES = "postgres" 

17 MONGODB = "mongodb" 

18 S3 = "s3" 

19 ELASTICSEARCH = "elasticsearch" 

20 

21 

22class StorageConfig: 

23 """Configuration for history storage.""" 

24 

25 def __init__( 

26 self, 

27 backend: StorageBackend = StorageBackend.MEMORY, 

28 connection_params: Dict[str, Any] | None = None, 

29 retention_policy: Dict[str, Any] | None = None, 

30 compression: bool = False, 

31 batch_size: int = 100, 

32 mode_specific_config: Dict[DataHandlingMode, Dict[str, Any]] | None = None 

33 ): 

34 """Initialize storage configuration. 

35  

36 Args: 

37 backend: Storage backend to use. 

38 connection_params: Backend-specific connection parameters. 

39 retention_policy: Policy for data retention. 

40 compression: Whether to compress stored data. 

41 batch_size: Batch size for bulk operations. 

42 mode_specific_config: Configuration per data mode. 

43 """ 

44 self.backend = backend 

45 self.connection_params = connection_params or {} 

46 self.retention_policy = retention_policy or {} 

47 self.compression = compression 

48 self.batch_size = batch_size 

49 self.mode_specific_config = mode_specific_config or {} 

50 

51 def get_mode_config(self, mode: DataHandlingMode) -> Dict[str, Any]: 

52 """Get configuration for a specific data mode. 

53  

54 Args: 

55 mode: Data mode. 

56  

57 Returns: 

58 Configuration for that mode. 

59 """ 

60 return self.mode_specific_config.get(mode, {}) 

61 

62 

63class IHistoryStorage(ABC): 

64 """Interface for history storage backends.""" 

65 

66 @abstractmethod 

67 async def save_history( 

68 self, 

69 history: ExecutionHistory, 

70 metadata: Dict[str, Any] | None = None 

71 ) -> str: 

72 """Save execution history. 

73  

74 Args: 

75 history: Execution history to save. 

76 metadata: Optional metadata. 

77  

78 Returns: 

79 Storage ID for the saved history. 

80 """ 

81 pass 

82 

83 @abstractmethod 

84 async def load_history( 

85 self, 

86 history_id: str 

87 ) -> ExecutionHistory | None: 

88 """Load execution history by ID. 

89  

90 Args: 

91 history_id: ID of the history to load. 

92  

93 Returns: 

94 ExecutionHistory if found, None otherwise. 

95 """ 

96 pass 

97 

98 @abstractmethod 

99 async def save_step( 

100 self, 

101 execution_id: str, 

102 step: ExecutionStep, 

103 parent_id: str | None = None 

104 ) -> str: 

105 """Save a single execution step. 

106  

107 Args: 

108 execution_id: Execution ID this step belongs to. 

109 step: Execution step to save. 

110 parent_id: Parent step ID if branching. 

111  

112 Returns: 

113 Storage ID for the saved step. 

114 """ 

115 pass 

116 

117 @abstractmethod 

118 async def load_steps( 

119 self, 

120 execution_id: str, 

121 filters: Dict[str, Any] | None = None 

122 ) -> List[ExecutionStep]: 

123 """Load execution steps. 

124  

125 Args: 

126 execution_id: Execution ID to load steps for. 

127 filters: Optional filters (e.g., state_name, status). 

128  

129 Returns: 

130 List of execution steps. 

131 """ 

132 pass 

133 

134 @abstractmethod 

135 async def query_histories( 

136 self, 

137 filters: Dict[str, Any], 

138 limit: int = 100, 

139 offset: int = 0 

140 ) -> List[Dict[str, Any]]: 

141 """Query execution histories. 

142  

143 Args: 

144 filters: Query filters. 

145 limit: Maximum results to return. 

146 offset: Result offset for pagination. 

147  

148 Returns: 

149 List of history summaries. 

150 """ 

151 pass 

152 

153 @abstractmethod 

154 async def delete_history( 

155 self, 

156 history_id: str 

157 ) -> bool: 

158 """Delete execution history. 

159  

160 Args: 

161 history_id: ID of history to delete. 

162  

163 Returns: 

164 True if deleted successfully. 

165 """ 

166 pass 

167 

168 @abstractmethod 

169 async def get_statistics( 

170 self, 

171 execution_id: str | None = None 

172 ) -> Dict[str, Any]: 

173 """Get storage statistics. 

174  

175 Args: 

176 execution_id: Optional execution ID for specific stats. 

177  

178 Returns: 

179 Storage statistics. 

180 """ 

181 pass 

182 

183 @abstractmethod 

184 async def cleanup( 

185 self, 

186 before_timestamp: float | None = None, 

187 keep_failed: bool = True 

188 ) -> int: 

189 """Clean up old histories. 

190  

191 Args: 

192 before_timestamp: Delete histories before this timestamp. 

193 keep_failed: Whether to keep failed executions. 

194  

195 Returns: 

196 Number of histories deleted. 

197 """ 

198 pass 

199 

200 

201class BaseHistoryStorage(IHistoryStorage): 

202 """Base class for history storage implementations.""" 

203 

204 def __init__(self, config: StorageConfig): 

205 """Initialize storage with configuration. 

206  

207 Args: 

208 config: Storage configuration. 

209 """ 

210 self.config = config 

211 self._initialized = False 

212 

213 async def initialize(self) -> None: 

214 """Initialize the storage backend.""" 

215 if not self._initialized: 

216 await self._setup_backend() 

217 self._initialized = True 

218 

219 @abstractmethod 

220 async def _setup_backend(self) -> None: 

221 """Set up the backend storage.""" 

222 pass 

223 

224 def _serialize_history(self, history: ExecutionHistory) -> Dict[str, Any]: 

225 """Serialize execution history for storage. 

226  

227 Args: 

228 history: Execution history to serialize. 

229  

230 Returns: 

231 Serialized history. 

232 """ 

233 data = history.to_dict() 

234 

235 # Apply compression if configured 

236 if self.config.compression: 

237 import zlib 

238 import json 

239 import base64 

240 

241 json_str = json.dumps(data) 

242 compressed = zlib.compress(json_str.encode('utf-8')) 

243 data = { 

244 'compressed': True, 

245 'data': base64.b64encode(compressed).decode('utf-8') 

246 } 

247 

248 return data 

249 

250 def _deserialize_history( 

251 self, 

252 data: Dict[str, Any], 

253 fsm_name: str, 

254 execution_id: str 

255 ) -> ExecutionHistory: 

256 """Deserialize execution history from storage. 

257  

258 Args: 

259 data: Serialized history data. 

260 fsm_name: FSM name. 

261 execution_id: Execution ID. 

262  

263 Returns: 

264 ExecutionHistory instance. 

265 """ 

266 # Decompress if needed 

267 if data.get('compressed'): 

268 import zlib 

269 import json 

270 import base64 

271 

272 compressed = base64.b64decode(data['data']) 

273 json_str = zlib.decompress(compressed).decode('utf-8') 

274 data = json.loads(json_str) 

275 

276 # Use ExecutionHistory.from_dict which properly reconstructs the tree 

277 return ExecutionHistory.from_dict(data) 

278 

279 def _apply_retention_policy(self, histories: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 

280 """Apply retention policy to histories. 

281  

282 Args: 

283 histories: List of history records. 

284  

285 Returns: 

286 Filtered list based on retention policy. 

287 """ 

288 if not self.config.retention_policy: 

289 return histories 

290 

291 import time 

292 

293 max_age = self.config.retention_policy.get('max_age_days') 

294 max_count = self.config.retention_policy.get('max_count') 

295 

296 if max_age: 

297 cutoff = time.time() - (max_age * 86400) 

298 histories = [h for h in histories if h.get('timestamp', 0) > cutoff] 

299 

300 if max_count and len(histories) > max_count: 

301 # Keep most recent 

302 histories = sorted(histories, key=lambda x: x.get('timestamp', 0), reverse=True) 

303 histories = histories[:max_count] 

304 

305 return histories 

306 

307 

308class StorageFactory: 

309 """Factory for creating history storage instances.""" 

310 

311 _registry: Dict[StorageBackend, Type[IHistoryStorage]] = {} 

312 

313 @classmethod 

314 def register( 

315 cls, 

316 backend: StorageBackend, 

317 storage_class: Type[IHistoryStorage] 

318 ) -> None: 

319 """Register a storage backend. 

320  

321 Args: 

322 backend: Backend type. 

323 storage_class: Storage class to register. 

324 """ 

325 cls._registry[backend] = storage_class 

326 

327 @classmethod 

328 def create( 

329 cls, 

330 config: StorageConfig 

331 ) -> IHistoryStorage: 

332 """Create a storage instance. 

333  

334 Args: 

335 config: Storage configuration. 

336  

337 Returns: 

338 Storage instance. 

339  

340 Raises: 

341 ValueError: If backend not registered. 

342 """ 

343 storage_class = cls._registry.get(config.backend) 

344 if not storage_class: 

345 raise ValueError(f"Unknown storage backend: {config.backend}") 

346 

347 return storage_class(config) # type: ignore 

348 

349 @classmethod 

350 def get_available_backends(cls) -> List[StorageBackend]: 

351 """Get list of available backends. 

352  

353 Returns: 

354 List of registered backends. 

355 """ 

356 return list(cls._registry.keys())