Coverage for src/dataknobs_fsm/patterns/error_recovery.py: 0%

395 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 16:46 -0600

1"""Error recovery pattern implementation. 

2 

3This module provides pre-configured FSM patterns for error recovery and resilience, 

4including retry strategies, circuit breakers, fallback mechanisms, and compensation. 

5""" 

6 

7from typing import Any, Dict, List, Callable 

8from dataclasses import dataclass 

9from enum import Enum 

10import asyncio 

11import time 

12from datetime import datetime 

13import random 

14import logging 

15 

16from ..api.simple import SimpleFSM 

17from ..core.data_modes import DataHandlingMode 

18 

19logger = logging.getLogger(__name__) 

20 

21 

22class RecoveryStrategy(Enum): 

23 """Error recovery strategies.""" 

24 RETRY = "retry" # Simple retry with backoff 

25 CIRCUIT_BREAKER = "circuit_breaker" # Circuit breaker pattern 

26 FALLBACK = "fallback" # Use fallback value/service 

27 COMPENSATE = "compensate" # Compensation/rollback 

28 DEADLINE = "deadline" # Deadline-based timeout 

29 BULKHEAD = "bulkhead" # Isolate failures 

30 CACHE = "cache" # Use cached results 

31 

32 

33class BackoffStrategy(Enum): 

34 """Backoff strategies for retries.""" 

35 FIXED = "fixed" # Fixed delay 

36 LINEAR = "linear" # Linear increase 

37 EXPONENTIAL = "exponential" # Exponential increase 

38 JITTER = "jitter" # Random jitter added 

39 DECORRELATED = "decorrelated" # Decorrelated jitter 

40 

41 

42@dataclass 

43class RetryConfig: 

44 """Configuration for retry strategy.""" 

45 max_attempts: int = 3 

46 initial_delay: float = 1.0 # seconds 

47 max_delay: float = 60.0 # seconds 

48 backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL 

49 backoff_multiplier: float = 2.0 

50 jitter_range: float = 0.1 # 10% jitter 

51 

52 # Retry conditions 

53 retry_on_exceptions: List[type] | None = None 

54 retry_on_result: Callable[[Any], bool] | None = None 

55 

56 # Hooks 

57 on_retry: Callable[[int, Exception], None] | None = None 

58 on_failure: Callable[[Exception], None] | None = None 

59 

60 

61@dataclass 

62class CircuitBreakerConfig: 

63 """Configuration for circuit breaker.""" 

64 failure_threshold: int = 5 # Failures to open circuit 

65 success_threshold: int = 2 # Successes to close circuit 

66 timeout: float = 60.0 # Time before half-open state 

67 

68 # Monitoring window 

69 window_size: int = 10 # Rolling window size 

70 window_duration: float = 60.0 # Window duration in seconds 

71 

72 # Callbacks 

73 on_open: Callable[[], None] | None = None 

74 on_close: Callable[[], None] | None = None 

75 on_half_open: Callable[[], None] | None = None 

76 

77 

78@dataclass 

79class FallbackConfig: 

80 """Configuration for fallback strategy.""" 

81 fallback_value: Any | None = None 

82 fallback_function: Callable[[Exception], Any] | None = None 

83 fallback_service: str | None = None # Alternative service URL 

84 

85 # Cache fallback 

86 use_cache: bool = False 

87 cache_ttl: float = 300.0 # 5 minutes 

88 

89 # Conditions 

90 fallback_on_exceptions: List[type] | None = None 

91 fallback_on_timeout: bool = True 

92 

93 

94@dataclass 

95class CompensationConfig: 

96 """Configuration for compensation strategy.""" 

97 compensation_actions: List[Callable[[Any], None]] 

98 save_state: bool = True # Save state before operation 

99 

100 # Sagas pattern 

101 use_sagas: bool = False 

102 saga_timeout: float = 300.0 

103 

104 # Callbacks 

105 on_compensation_start: Callable[[], None] | None = None 

106 on_compensation_complete: Callable[[], None] | None = None 

107 

108 

109@dataclass 

110class BulkheadConfig: 

111 """Configuration for bulkhead isolation.""" 

112 max_concurrent: int = 10 

113 max_queue_size: int = 100 

114 queue_timeout: float = 30.0 

115 

116 # Thread pool isolation 

117 use_thread_pool: bool = False 

118 thread_pool_size: int = 5 

119 

120 # Metrics 

121 track_metrics: bool = True 

122 

123 

124@dataclass 

125class ErrorRecoveryConfig: 

126 """Configuration for error recovery workflow.""" 

127 primary_strategy: RecoveryStrategy 

128 secondary_strategies: List[RecoveryStrategy] | None = None 

129 

130 # Strategy configurations 

131 retry_config: RetryConfig | None = None 

132 circuit_breaker_config: CircuitBreakerConfig | None = None 

133 fallback_config: FallbackConfig | None = None 

134 compensation_config: CompensationConfig | None = None 

135 bulkhead_config: BulkheadConfig | None = None 

136 

137 # Global settings 

138 max_total_attempts: int = 10 

139 global_timeout: float = 300.0 

140 

141 # Error classification 

142 transient_errors: List[type] | None = None 

143 permanent_errors: List[type] | None = None 

144 

145 # Monitoring 

146 log_errors: bool = True 

147 metrics_enabled: bool = True 

148 alert_threshold: int = 10 # Errors before alerting 

149 

150 

151class RetryExecutor: 

152 """Executor for retry logic.""" 

153 

154 def __init__(self, config: RetryConfig): 

155 self.config = config 

156 

157 def _calculate_delay(self, attempt: int, previous_delay: float = None) -> float: 

158 """Calculate delay for next retry.""" 

159 if self.config.backoff_strategy == BackoffStrategy.FIXED: 

160 delay = self.config.initial_delay 

161 

162 elif self.config.backoff_strategy == BackoffStrategy.LINEAR: 

163 delay = self.config.initial_delay * attempt 

164 

165 elif self.config.backoff_strategy == BackoffStrategy.EXPONENTIAL: 

166 delay = self.config.initial_delay * (self.config.backoff_multiplier ** (attempt - 1)) 

167 

168 elif self.config.backoff_strategy == BackoffStrategy.JITTER: 

169 base_delay = self.config.initial_delay * (self.config.backoff_multiplier ** (attempt - 1)) 

170 jitter = random.uniform(-self.config.jitter_range, self.config.jitter_range) 

171 delay = base_delay * (1 + jitter) 

172 

173 elif self.config.backoff_strategy == BackoffStrategy.DECORRELATED: 

174 if previous_delay is None: 

175 delay = self.config.initial_delay # type: ignore[unreachable] 

176 else: 

177 delay = random.uniform(self.config.initial_delay, previous_delay * 3) 

178 

179 else: 

180 delay = self.config.initial_delay # type: ignore[unreachable] 

181 

182 return min(delay, self.config.max_delay) 

183 

184 async def execute(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

185 """Execute function with retry logic.""" 

186 last_exception = None 

187 previous_delay = None 

188 

189 for attempt in range(1, self.config.max_attempts + 1): 

190 try: 

191 result = await func(*args, **kwargs) if asyncio.iscoroutinefunction(func) else func(*args, **kwargs) 

192 

193 # Check if should retry based on result 

194 if self.config.retry_on_result and self.config.retry_on_result(result): 

195 if attempt < self.config.max_attempts: 

196 delay = self._calculate_delay(attempt, previous_delay) # type: ignore 

197 previous_delay = delay 

198 await asyncio.sleep(delay) 

199 continue 

200 

201 return result 

202 

203 except Exception as e: 

204 last_exception = e 

205 

206 # Check if should retry this exception 

207 if self.config.retry_on_exceptions: 

208 if not any(isinstance(e, exc_type) for exc_type in self.config.retry_on_exceptions): 

209 raise 

210 

211 if attempt < self.config.max_attempts: 

212 # Calculate delay 

213 delay = self._calculate_delay(attempt, previous_delay) # type: ignore 

214 previous_delay = delay 

215 

216 # Call retry hook 

217 if self.config.on_retry: 

218 self.config.on_retry(attempt, e) 

219 

220 await asyncio.sleep(delay) 

221 else: 

222 # Final failure 

223 if self.config.on_failure: 

224 self.config.on_failure(e) 

225 raise 

226 

227 raise last_exception # type: ignore 

228 

229 

230class CircuitBreakerState(Enum): 

231 """Circuit breaker states.""" 

232 CLOSED = "closed" # Normal operation 

233 OPEN = "open" # Failing fast 

234 HALF_OPEN = "half_open" # Testing recovery 

235 

236 

237class CircuitBreaker: 

238 """Circuit breaker implementation.""" 

239 

240 def __init__(self, config: CircuitBreakerConfig): 

241 self.config = config 

242 self.state = CircuitBreakerState.CLOSED 

243 self.failure_count = 0 

244 self.success_count = 0 

245 self.last_failure_time = None 

246 self.window_start = time.time() 

247 self.window_failures = [] 

248 self._lock = asyncio.Lock() 

249 

250 async def call(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

251 """Execute function with circuit breaker protection.""" 

252 async with self._lock: 

253 # Check state 

254 if self.state == CircuitBreakerState.OPEN: 

255 # Check if should transition to half-open 

256 if self.last_failure_time: 

257 elapsed = time.time() - self.last_failure_time # type: ignore[unreachable] 

258 if elapsed >= self.config.timeout: 

259 self.state = CircuitBreakerState.HALF_OPEN 

260 if self.config.on_half_open: 

261 self.config.on_half_open() 

262 else: 

263 from ..core.exceptions import CircuitBreakerError 

264 raise CircuitBreakerError(wait_time=self.config.timeout - elapsed) 

265 else: 

266 from ..core.exceptions import CircuitBreakerError 

267 raise CircuitBreakerError() 

268 

269 try: 

270 # Execute function 

271 if asyncio.iscoroutinefunction(func): 

272 result = await func(*args, **kwargs) 

273 else: 

274 result = func(*args, **kwargs) 

275 

276 # Success 

277 async with self._lock: 

278 if self.state == CircuitBreakerState.HALF_OPEN: 

279 self.success_count += 1 

280 if self.success_count >= self.config.success_threshold: 

281 self.state = CircuitBreakerState.CLOSED 

282 self.failure_count = 0 

283 self.success_count = 0 

284 if self.config.on_close: 

285 self.config.on_close() 

286 

287 elif self.state == CircuitBreakerState.CLOSED: 

288 # Reset failure count on success 

289 self.failure_count = 0 

290 

291 return result 

292 

293 except Exception: 

294 # Failure 

295 async with self._lock: 

296 self.failure_count += 1 

297 self.last_failure_time = time.time() 

298 

299 # Add to window 

300 self.window_failures.append(time.time()) 

301 

302 # Clean old failures from window 

303 cutoff = time.time() - self.config.window_duration 

304 self.window_failures = [t for t in self.window_failures if t > cutoff] 

305 

306 # Check if should open circuit 

307 if self.state == CircuitBreakerState.HALF_OPEN: 

308 self.state = CircuitBreakerState.OPEN 

309 self.success_count = 0 

310 if self.config.on_open: 

311 self.config.on_open() 

312 

313 elif self.state == CircuitBreakerState.CLOSED: 

314 if len(self.window_failures) >= self.config.failure_threshold: 

315 self.state = CircuitBreakerState.OPEN 

316 if self.config.on_open: 

317 self.config.on_open() 

318 

319 raise 

320 

321 

322class Bulkhead: 

323 """Bulkhead isolation pattern.""" 

324 

325 def __init__(self, config: BulkheadConfig): 

326 self.config = config 

327 self.semaphore = asyncio.Semaphore(config.max_concurrent) 

328 self.queue = asyncio.Queue(maxsize=config.max_queue_size) 

329 self.active_count = 0 

330 self.queued_count = 0 

331 self.metrics = { 

332 'executed': 0, 

333 'rejected': 0, 

334 'timeout': 0 

335 } if config.track_metrics else None 

336 

337 async def execute(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

338 """Execute function with bulkhead isolation.""" 

339 # Try to acquire semaphore 

340 try: 

341 await asyncio.wait_for( 

342 self.semaphore.acquire(), 

343 timeout=self.config.queue_timeout 

344 ) 

345 except asyncio.TimeoutError: 

346 if self.metrics: 

347 self.metrics['timeout'] += 1 

348 from ..core.exceptions import BulkheadTimeoutError 

349 raise BulkheadTimeoutError("Bulkhead queue timeout") from None 

350 

351 self.active_count += 1 

352 

353 try: 

354 # Execute function 

355 if asyncio.iscoroutinefunction(func): 

356 result = await func(*args, **kwargs) 

357 else: 

358 result = func(*args, **kwargs) 

359 

360 if self.metrics: 

361 self.metrics['executed'] += 1 

362 

363 return result 

364 

365 finally: 

366 self.active_count -= 1 

367 self.semaphore.release() 

368 

369 

370class ErrorRecoveryWorkflow: 

371 """Error recovery workflow orchestrator.""" 

372 

373 def __init__(self, config: ErrorRecoveryConfig): 

374 """Initialize error recovery workflow. 

375  

376 Args: 

377 config: Error recovery configuration 

378 """ 

379 self.config = config 

380 self._fsm = self._build_fsm() 

381 self._retry_executor = None 

382 self._circuit_breaker = None 

383 self._bulkhead = None 

384 self._cache = {} 

385 self._state_history = [] 

386 self._error_count = 0 

387 self._metrics = { 

388 'attempts': 0, 

389 'successes': 0, 

390 'failures': 0, 

391 'fallbacks': 0, 

392 'compensations': 0 

393 } 

394 

395 # Initialize components 

396 if config.retry_config: 

397 self._retry_executor = RetryExecutor(config.retry_config) 

398 if config.circuit_breaker_config: 

399 self._circuit_breaker = CircuitBreaker(config.circuit_breaker_config) 

400 if config.bulkhead_config: 

401 self._bulkhead = Bulkhead(config.bulkhead_config) 

402 

403 def _build_fsm(self) -> SimpleFSM: 

404 """Build FSM for error recovery workflow.""" 

405 # Add start state 

406 states = [{'name': 'start', 'type': 'initial', 'is_start': True}] 

407 arcs = [] 

408 

409 # Main execution state 

410 states.append({'name': 'execute', 'type': 'task'}) 

411 arcs.append({'from': 'start', 'to': 'execute', 'name': 'init'}) 

412 

413 # Recovery states based on strategy 

414 if self.config.primary_strategy == RecoveryStrategy.RETRY: 

415 states.append({'name': 'retry', 'type': 'task'}) 

416 arcs.append({ 

417 'from': 'execute', 

418 'to': 'retry', 

419 'name': 'on_error', 

420 'condition': {'type': 'inline', 'code': 'data.get("error") is not None'} # type: ignore 

421 }) 

422 arcs.append({'from': 'retry', 'to': 'execute', 'name': 'retry_attempt'}) 

423 arcs.append({'from': 'retry', 'to': 'end', 'name': 'max_retries_reached'}) 

424 

425 elif self.config.primary_strategy == RecoveryStrategy.CIRCUIT_BREAKER: 

426 states.append({'name': 'circuit_check', 'type': 'decision'}) 

427 arcs.append({'from': 'execute', 'to': 'circuit_check', 'name': 'check_circuit'}) 

428 arcs.append({'from': 'circuit_check', 'to': 'end', 'name': 'circuit_open'}) 

429 arcs.append({'from': 'circuit_check', 'to': 'execute', 'name': 'circuit_closed'}) 

430 

431 elif self.config.primary_strategy == RecoveryStrategy.FALLBACK: 

432 states.append({'name': 'fallback', 'type': 'task'}) 

433 arcs.append({ 

434 'from': 'execute', 

435 'to': 'fallback', 

436 'name': 'on_error', 

437 'condition': {'type': 'inline', 'code': 'data.get("error") is not None'} # type: ignore 

438 }) 

439 arcs.append({'from': 'fallback', 'to': 'end', 'name': 'fallback_complete'}) 

440 

441 elif self.config.primary_strategy == RecoveryStrategy.COMPENSATE: 

442 states.extend([ 

443 {'name': 'save_state', 'type': 'task'}, 

444 {'name': 'compensate', 'type': 'task'} 

445 ]) 

446 arcs.append({'from': 'start', 'to': 'save_state', 'name': 'init'}) 

447 arcs.append({'from': 'save_state', 'to': 'execute', 'name': 'state_saved'}) 

448 arcs.append({ 

449 'from': 'execute', 

450 'to': 'compensate', 

451 'name': 'on_error', 

452 'condition': {'type': 'inline', 'code': 'data.get("error") is not None'} # type: ignore 

453 }) 

454 arcs.append({'from': 'compensate', 'to': 'end', 'name': 'compensation_complete'}) 

455 

456 # Success path 

457 arcs.append({ 

458 'from': 'execute', 

459 'to': 'end', 

460 'name': 'success', 

461 'condition': {'type': 'inline', 'code': 'data.get("error") is None'} # type: ignore 

462 }) 

463 

464 # Add end state 

465 states.append({ 

466 'name': 'end', 

467 'type': 'terminal' 

468 }) 

469 

470 # Build FSM configuration 

471 fsm_config = { 

472 'name': 'Error_Recovery', 

473 'data_mode': DataHandlingMode.COPY.value, 

474 'states': states, 

475 'arcs': arcs, 

476 'resources': [] 

477 } 

478 

479 return SimpleFSM(fsm_config) 

480 

481 async def _execute_with_retry(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

482 """Execute with retry strategy.""" 

483 if not self._retry_executor: 

484 self._retry_executor = RetryExecutor(self.config.retry_config or RetryConfig()) 

485 

486 return await self._retry_executor.execute(func, *args, **kwargs) 

487 

488 async def _execute_with_circuit_breaker(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

489 """Execute with circuit breaker.""" 

490 if not self._circuit_breaker: 

491 self._circuit_breaker = CircuitBreaker( 

492 self.config.circuit_breaker_config or CircuitBreakerConfig() 

493 ) 

494 

495 return await self._circuit_breaker.call(func, *args, **kwargs) 

496 

497 async def _execute_with_fallback(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

498 """Execute with fallback.""" 

499 try: 

500 # Try primary function 

501 result = await func(*args, **kwargs) if asyncio.iscoroutinefunction(func) else func(*args, **kwargs) 

502 return result 

503 

504 except Exception as e: 

505 # Check if should use fallback 

506 if self.config.fallback_config: 

507 config = self.config.fallback_config 

508 

509 # Check exception type 

510 if config.fallback_on_exceptions: 

511 if not any(isinstance(e, exc_type) for exc_type in config.fallback_on_exceptions): 

512 raise 

513 

514 # Use fallback 

515 self._metrics['fallbacks'] += 1 

516 

517 if config.fallback_value is not None: 

518 return config.fallback_value 

519 elif config.fallback_function: 

520 return config.fallback_function(e) 

521 elif config.use_cache and self._cache: 

522 # Return last cached result 

523 return self._cache.get('last_result') 

524 

525 raise 

526 

527 async def _execute_with_compensation(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

528 """Execute with compensation.""" 

529 # Save state if configured 

530 saved_state = None 

531 if self.config.compensation_config and self.config.compensation_config.save_state: 

532 saved_state = {'args': args, 'kwargs': kwargs, 'timestamp': datetime.now()} 

533 self._state_history.append(saved_state) 

534 

535 try: 

536 # Execute function 

537 result = await func(*args, **kwargs) if asyncio.iscoroutinefunction(func) else func(*args, **kwargs) 

538 return result 

539 

540 except Exception: 

541 # Execute compensation 

542 if self.config.compensation_config: 

543 self._metrics['compensations'] += 1 

544 

545 if self.config.compensation_config.on_compensation_start: 

546 self.config.compensation_config.on_compensation_start() 

547 

548 # Run compensation actions 

549 for action in self.config.compensation_config.compensation_actions: 

550 try: 

551 if asyncio.iscoroutinefunction(action): 

552 await action(saved_state) 

553 else: 

554 action(saved_state) 

555 except Exception as comp_error: 

556 # Log compensation error 

557 if self.config.log_errors: 

558 logger.error(f"Compensation error: {comp_error}") 

559 

560 if self.config.compensation_config.on_compensation_complete: 

561 self.config.compensation_config.on_compensation_complete() 

562 

563 raise 

564 

565 async def _execute_with_bulkhead(self, func: Callable, *args: Any, **kwargs: Any) -> Any: 

566 """Execute with bulkhead isolation.""" 

567 if not self._bulkhead: 

568 self._bulkhead = Bulkhead(self.config.bulkhead_config or BulkheadConfig()) 

569 

570 return await self._bulkhead.execute(func, *args, **kwargs) 

571 

572 async def execute( 

573 self, 

574 func: Callable, 

575 *args: Any, 

576 **kwargs: Any 

577 ) -> Any: 

578 """Execute function with error recovery. 

579  

580 Args: 

581 func: Function to execute 

582 *args: Any, **kwargs: Any: Function arguments 

583  

584 Returns: 

585 Function result or fallback value 

586 """ 

587 self._metrics['attempts'] += 1 

588 start_time = time.time() 

589 

590 try: 

591 # Apply primary strategy 

592 if self.config.primary_strategy == RecoveryStrategy.RETRY: 

593 result = await self._execute_with_retry(func, *args, **kwargs) 

594 

595 elif self.config.primary_strategy == RecoveryStrategy.CIRCUIT_BREAKER: 

596 result = await self._execute_with_circuit_breaker(func, *args, **kwargs) 

597 

598 elif self.config.primary_strategy == RecoveryStrategy.FALLBACK: 

599 result = await self._execute_with_fallback(func, *args, **kwargs) 

600 

601 elif self.config.primary_strategy == RecoveryStrategy.COMPENSATE: 

602 result = await self._execute_with_compensation(func, *args, **kwargs) 

603 

604 elif self.config.primary_strategy == RecoveryStrategy.BULKHEAD: 

605 result = await self._execute_with_bulkhead(func, *args, **kwargs) 

606 

607 elif self.config.primary_strategy == RecoveryStrategy.DEADLINE: 

608 # Execute with timeout 

609 timeout = self.config.global_timeout 

610 result = await asyncio.wait_for( 

611 func(*args, **kwargs) if asyncio.iscoroutinefunction(func) else asyncio.create_task(func(*args, **kwargs)), 

612 timeout=timeout 

613 ) 

614 

615 else: 

616 # Direct execution 

617 result = await func(*args, **kwargs) if asyncio.iscoroutinefunction(func) else func(*args, **kwargs) 

618 

619 # Cache successful result 

620 if self.config.fallback_config and self.config.fallback_config.use_cache: 

621 self._cache['last_result'] = result 

622 self._cache['last_success_time'] = time.time() 

623 

624 # Track execution time 

625 execution_time = time.time() - start_time 

626 self._metrics['successes'] += 1 

627 self._metrics['last_execution_time'] = execution_time 

628 if 'total_execution_time' not in self._metrics: 

629 self._metrics['total_execution_time'] = 0 

630 self._metrics['total_execution_time'] += execution_time 

631 

632 return result 

633 

634 except Exception as e: 

635 self._error_count += 1 

636 self._metrics['failures'] += 1 

637 

638 # Track execution time even on failure 

639 execution_time = time.time() - start_time 

640 self._metrics['last_execution_time'] = execution_time 

641 if 'total_execution_time' not in self._metrics: 

642 self._metrics['total_execution_time'] = 0 

643 self._metrics['total_execution_time'] += execution_time 

644 

645 # Log error 

646 if self.config.log_errors: 

647 logger.error(f"Error in recovery workflow: {e}") 

648 

649 # Check if should alert 

650 if self._error_count >= self.config.alert_threshold: 

651 logger.warning(f"Alert: Error threshold reached ({self._error_count} errors)") 

652 

653 # Apply secondary strategies 

654 if self.config.secondary_strategies: 

655 for strategy in self.config.secondary_strategies: 

656 try: 

657 if strategy == RecoveryStrategy.FALLBACK: 

658 return await self._execute_with_fallback(func, *args, **kwargs) 

659 elif strategy == RecoveryStrategy.CACHE: 

660 if 'last_result' in self._cache: 

661 return self._cache['last_result'] 

662 except Exception: 

663 continue 

664 

665 raise 

666 

667 def get_metrics(self) -> Dict[str, Any]: 

668 """Get execution metrics. 

669  

670 Returns: 

671 Metrics dictionary 

672 """ 

673 metrics = self._metrics.copy() 

674 

675 if self._bulkhead: 

676 metrics['bulkhead'] = self._bulkhead.metrics 

677 

678 if self._circuit_breaker: 

679 metrics['circuit_breaker'] = { 

680 'state': self._circuit_breaker.state.value, 

681 'failure_count': self._circuit_breaker.failure_count 

682 } 

683 

684 return metrics 

685 

686 

687def create_retry_workflow( 

688 max_attempts: int = 3, 

689 backoff_strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL, 

690 **kwargs 

691) -> ErrorRecoveryWorkflow: 

692 """Create retry-based error recovery workflow. 

693  

694 Args: 

695 max_attempts: Maximum retry attempts 

696 backoff_strategy: Backoff strategy 

697 **kwargs: Additional configuration 

698  

699 Returns: 

700 Configured error recovery workflow 

701 """ 

702 config = ErrorRecoveryConfig( 

703 primary_strategy=RecoveryStrategy.RETRY, 

704 retry_config=RetryConfig( 

705 max_attempts=max_attempts, 

706 backoff_strategy=backoff_strategy, 

707 **kwargs 

708 ) 

709 ) 

710 

711 return ErrorRecoveryWorkflow(config) 

712 

713 

714def create_circuit_breaker_workflow( 

715 failure_threshold: int = 5, 

716 timeout: float = 60.0, 

717 **kwargs 

718) -> ErrorRecoveryWorkflow: 

719 """Create circuit breaker workflow. 

720  

721 Args: 

722 failure_threshold: Failures before opening circuit 

723 timeout: Time before attempting recovery 

724 **kwargs: Additional configuration 

725  

726 Returns: 

727 Configured error recovery workflow 

728 """ 

729 config = ErrorRecoveryConfig( 

730 primary_strategy=RecoveryStrategy.CIRCUIT_BREAKER, 

731 circuit_breaker_config=CircuitBreakerConfig( 

732 failure_threshold=failure_threshold, 

733 timeout=timeout, 

734 **kwargs 

735 ) 

736 ) 

737 

738 return ErrorRecoveryWorkflow(config) 

739 

740 

741def create_resilient_workflow( 

742 primary_strategy: RecoveryStrategy = RecoveryStrategy.RETRY, 

743 enable_circuit_breaker: bool = True, 

744 enable_fallback: bool = True, 

745 enable_bulkhead: bool = False 

746) -> ErrorRecoveryWorkflow: 

747 """Create fully resilient workflow with multiple strategies. 

748  

749 Args: 

750 primary_strategy: Primary recovery strategy 

751 enable_circuit_breaker: Enable circuit breaker 

752 enable_fallback: Enable fallback 

753 enable_bulkhead: Enable bulkhead isolation 

754  

755 Returns: 

756 Configured error recovery workflow 

757 """ 

758 secondary_strategies = [] 

759 

760 if enable_fallback: 

761 secondary_strategies.append(RecoveryStrategy.FALLBACK) 

762 if enable_circuit_breaker and primary_strategy != RecoveryStrategy.CIRCUIT_BREAKER: 

763 secondary_strategies.append(RecoveryStrategy.CIRCUIT_BREAKER) 

764 

765 config = ErrorRecoveryConfig( 

766 primary_strategy=primary_strategy, 

767 secondary_strategies=secondary_strategies, 

768 retry_config=RetryConfig() if primary_strategy == RecoveryStrategy.RETRY else None, 

769 circuit_breaker_config=CircuitBreakerConfig() if enable_circuit_breaker else None, 

770 fallback_config=FallbackConfig(use_cache=True) if enable_fallback else None, 

771 bulkhead_config=BulkheadConfig() if enable_bulkhead else None 

772 ) 

773 

774 return ErrorRecoveryWorkflow(config)