Coverage for excalidraw_mcp/http_client.py: 95%

206 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-16 08:08 -0700

1"""HTTP client management with connection pooling, health caching, and request tracing.""" 

2 

3import asyncio 

4import logging 

5import time 

6import uuid 

7from dataclasses import dataclass 

8from typing import Any 

9 

10import httpx 

11 

12from .config import config 

13from .retry_utils import RetryConfig, retry_async 

14 

15logger = logging.getLogger(__name__) 

16 

17 

18@dataclass 

19class HealthCacheEntry: 

20 """Cache entry for health check results.""" 

21 

22 status: bool 

23 timestamp: float 

24 failure_count: int = 0 

25 

26 

27class CanvasHTTPClient: 

28 """HTTP client for canvas server communication with connection pooling, caching, and tracing.""" 

29 

30 def __init__(self) -> None: 

31 self._client: httpx.AsyncClient | None = None 

32 self._health_cache = HealthCacheEntry(status=False, timestamp=0) 

33 self._lock = asyncio.Lock() 

34 

35 # Request tracing 

36 self._request_metrics: dict[str, Any] = { 

37 "total_requests": 0, 

38 "successful_requests": 0, 

39 "failed_requests": 0, 

40 "total_response_time": 0.0, 

41 } 

42 

43 async def __aenter__(self) -> "CanvasHTTPClient": 

44 await self._ensure_client() 

45 return self 

46 

47 async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: 

48 await self.close() 

49 

50 async def _ensure_client(self) -> None: 

51 """Ensure HTTP client is initialized.""" 

52 if self._client is None: 

53 limits = httpx.Limits( 

54 max_keepalive_connections=config.performance.http_pool_connections, 

55 max_connections=config.performance.http_pool_maxsize, 

56 keepalive_expiry=300 if config.performance.http_keep_alive else 0, 

57 ) 

58 

59 self._client = httpx.AsyncClient( 

60 timeout=httpx.Timeout(config.server.sync_operation_timeout_seconds), 

61 limits=limits, 

62 http2=True, 

63 follow_redirects=True, 

64 ) 

65 

66 async def close(self) -> None: 

67 """Close the HTTP client.""" 

68 if self._client: 

69 await self._client.aclose() 

70 self._client = None 

71 

72 async def _perform_health_check_request(self, trace_id: str, config: Any) -> bool: 

73 """Perform a single health check request.""" 

74 try: 

75 await self._ensure_client() 

76 

77 # Add tracing headers if enabled 

78 headers = ( 

79 self._get_tracing_headers(trace_id) 

80 if config.monitoring.request_tracing_enabled 

81 else {} 

82 ) 

83 

84 if self._client is not None: 

85 response = await self._client.get( 

86 f"{config.server.express_url}/health", 

87 timeout=config.server.health_check_timeout_seconds, 

88 headers=headers, 

89 ) 

90 else: 

91 raise RuntimeError("HTTP client not initialized") 

92 

93 is_healthy: bool = response.status_code == 200 

94 return is_healthy 

95 

96 except Exception as e: 

97 logger.warning( 

98 f"Canvas server health check failed: {e} (trace: {trace_id})" 

99 ) 

100 raise # Re-raise to trigger retry 

101 

102 async def check_health( 

103 self, force: bool = False, correlation_id: str | None = None 

104 ) -> bool: 

105 """Check canvas server health with caching and tracing.""" 

106 current_time = time.time() 

107 trace_id = correlation_id or self._generate_correlation_id() 

108 

109 # Use cached result if recent and not forced 

110 if ( 

111 not force 

112 and current_time - self._health_cache.timestamp 

113 < config.server.health_check_interval_seconds 

114 ): 

115 return self._health_cache.status 

116 

117 async with self._lock: 

118 # Double-check after acquiring lock 

119 if ( 

120 not force 

121 and current_time - self._health_cache.timestamp 

122 < config.server.health_check_interval_seconds 

123 ): 

124 return self._health_cache.status 

125 

126 start_time = time.time() 

127 

128 # Configure retry for health checks 

129 retry_config = RetryConfig( 

130 base_delay=0.5, # Quick retries for health checks 

131 max_delay=5.0, 

132 exponential_base=config.server.sync_retry_exponential_base, 

133 jitter=config.server.sync_retry_jitter, 

134 ) 

135 

136 async def _health_check_request() -> bool: 

137 return await self._perform_health_check_request(trace_id, config) 

138 

139 try: 

140 is_healthy = await retry_async( 

141 _health_check_request, 

142 retry_config=retry_config, 

143 retry_on_exceptions=(Exception,), 

144 ) 

145 except Exception: 

146 # On failure, consider server unhealthy 

147 is_healthy = False 

148 

149 # Update cache 

150 self._health_cache = HealthCacheEntry( 

151 status=is_healthy, 

152 timestamp=current_time, 

153 failure_count=0 if is_healthy else self._health_cache.failure_count + 1, 

154 ) 

155 

156 # Log the result 

157 response_time = time.time() - start_time 

158 if is_healthy: 

159 logger.debug( 

160 f"Canvas server health check passed (trace: {trace_id}, time: {response_time:.3f}s)" 

161 ) 

162 self._update_request_metrics(True, response_time, "GET", "/health") 

163 else: 

164 logger.warning( 

165 f"Canvas server health check failed: (trace: {trace_id})" 

166 ) 

167 self._update_request_metrics(False, response_time, "GET", "/health") 

168 

169 return is_healthy 

170 

171 async def post_json( 

172 self, 

173 endpoint: str, 

174 data: dict[str, Any], 

175 retries: int | None = None, 

176 correlation_id: str | None = None, 

177 ) -> dict[str, Any] | None: 

178 """POST JSON data to canvas server with retries and tracing.""" 

179 retry_count = ( 

180 retries if retries is not None else config.server.sync_retry_attempts 

181 ) 

182 

183 trace_id = correlation_id or self._generate_correlation_id() 

184 await self._ensure_client() 

185 url = f"{config.server.express_url}{endpoint}" 

186 

187 # Configure retry behavior 

188 retry_config = RetryConfig( 

189 max_attempts=retry_count + 1, 

190 base_delay=config.server.sync_retry_delay_seconds, 

191 max_delay=config.server.sync_retry_max_delay_seconds, 

192 exponential_base=config.server.sync_retry_exponential_base, 

193 jitter=config.server.sync_retry_jitter, 

194 ) 

195 

196 async def _post_request() -> dict[str, Any] | None: 

197 start_time = time.time() 

198 try: 

199 # Prepare headers with tracing 

200 headers = {"Content-Type": "application/json"} 

201 if config.monitoring.request_tracing_enabled: 

202 headers.update(self._get_tracing_headers(trace_id)) 

203 

204 if self._client is not None: 

205 response = await self._client.post(url, json=data, headers=headers) 

206 else: 

207 raise RuntimeError("HTTP client not initialized") 

208 response_time = time.time() - start_time 

209 

210 if response.status_code in (200, 201): 

211 self._update_request_metrics(True, response_time, "POST", endpoint) 

212 logger.debug( 

213 f"POST {endpoint} successful (trace: {trace_id}, time: {response_time:.3f}s)" 

214 ) 

215 result: dict[str, Any] = response.json() 

216 return result 

217 else: 

218 self._update_request_metrics(False, response_time, "POST", endpoint) 

219 logger.warning( 

220 f"Canvas server returned HTTP {response.status_code}: {response.text} (trace: {trace_id})" 

221 ) 

222 # Raise exception to trigger retry 

223 raise httpx.HTTPStatusError( 

224 f"HTTP {response.status_code}: {response.text}", 

225 request=response.request, 

226 response=response, 

227 ) 

228 

229 except httpx.TimeoutException: 

230 response_time = time.time() - start_time 

231 self._update_request_metrics(False, response_time, "POST", endpoint) 

232 logger.warning(f"Canvas server request timeout (trace: {trace_id})") 

233 raise 

234 

235 except Exception as e: 

236 response_time = time.time() - start_time 

237 self._update_request_metrics(False, response_time, "POST", endpoint) 

238 logger.error(f"Canvas server request failed: {e} (trace: {trace_id})") 

239 raise 

240 

241 # Use enhanced retry with exponential backoff and jitter 

242 try: 

243 return await retry_async( 

244 _post_request, 

245 retry_config=retry_config, 

246 retry_on_exceptions=( 

247 httpx.TimeoutException, 

248 httpx.HTTPStatusError, 

249 Exception, 

250 ), 

251 ) 

252 except Exception: 

253 # Return None on complete failure as per original behavior 

254 return None 

255 

256 async def put_json( 

257 self, endpoint: str, data: dict[str, Any], correlation_id: str | None = None 

258 ) -> dict[str, Any] | None: 

259 """PUT JSON data to canvas server with tracing.""" 

260 trace_id = correlation_id or self._generate_correlation_id() 

261 await self._ensure_client() 

262 url = f"{config.server.express_url}{endpoint}" 

263 

264 start_time = time.time() 

265 try: 

266 # Prepare headers with tracing 

267 headers = {"Content-Type": "application/json"} 

268 if config.monitoring.request_tracing_enabled: 

269 headers.update(self._get_tracing_headers(trace_id)) 

270 

271 if self._client is not None: 

272 response = await self._client.put(url, json=data, headers=headers) 

273 else: 

274 raise RuntimeError("HTTP client not initialized") 

275 response_time = time.time() - start_time 

276 

277 if response.status_code == 200: 

278 self._update_request_metrics(True, response_time, "PUT", endpoint) 

279 logger.debug( 

280 f"PUT {endpoint} successful (trace: {trace_id}, time: {response_time:.3f}s)" 

281 ) 

282 result: dict[str, Any] = response.json() 

283 return result 

284 else: 

285 self._update_request_metrics(False, response_time, "PUT", endpoint) 

286 logger.warning( 

287 f"Canvas server PUT returned HTTP {response.status_code}: {response.text} (trace: {trace_id})" 

288 ) 

289 return None 

290 

291 except Exception as e: 

292 response_time = time.time() - start_time 

293 self._update_request_metrics(False, response_time, "PUT", endpoint) 

294 logger.error(f"Canvas server PUT request failed: {e} (trace: {trace_id})") 

295 return None 

296 

297 async def delete(self, endpoint: str, correlation_id: str | None = None) -> bool: 

298 """DELETE request to canvas server with tracing.""" 

299 trace_id = correlation_id or self._generate_correlation_id() 

300 await self._ensure_client() 

301 url = f"{config.server.express_url}{endpoint}" 

302 

303 start_time = time.time() 

304 try: 

305 # Prepare headers with tracing 

306 headers = ( 

307 self._get_tracing_headers(trace_id) 

308 if config.monitoring.request_tracing_enabled 

309 else {} 

310 ) 

311 

312 if self._client is not None: 

313 response = await self._client.delete(url, headers=headers) 

314 else: 

315 raise RuntimeError("HTTP client not initialized") 

316 response_time = time.time() - start_time 

317 

318 success = response.status_code in (200, 204) 

319 self._update_request_metrics(success, response_time, "DELETE", endpoint) 

320 

321 if success: 

322 logger.debug( 

323 f"DELETE {endpoint} successful (trace: {trace_id}, time: {response_time:.3f}s)" 

324 ) 

325 else: 

326 logger.warning( 

327 f"DELETE {endpoint} failed with HTTP {response.status_code} (trace: {trace_id})" 

328 ) 

329 

330 return success 

331 

332 except Exception as e: 

333 response_time = time.time() - start_time 

334 self._update_request_metrics(False, response_time, "DELETE", endpoint) 

335 logger.error( 

336 f"Canvas server DELETE request failed: {e} (trace: {trace_id})" 

337 ) 

338 return False 

339 

340 async def get_json( 

341 self, endpoint: str, correlation_id: str | None = None 

342 ) -> dict[str, Any] | None: 

343 """GET JSON data from canvas server with tracing.""" 

344 trace_id = correlation_id or self._generate_correlation_id() 

345 await self._ensure_client() 

346 url = f"{config.server.express_url}{endpoint}" 

347 

348 start_time = time.time() 

349 try: 

350 # Prepare headers with tracing 

351 headers = ( 

352 self._get_tracing_headers(trace_id) 

353 if config.monitoring.request_tracing_enabled 

354 else {} 

355 ) 

356 

357 if self._client is not None: 

358 response = await self._client.get(url, headers=headers) 

359 else: 

360 raise RuntimeError("HTTP client not initialized") 

361 response_time = time.time() - start_time 

362 

363 if response.status_code == 200: 

364 self._update_request_metrics(True, response_time, "GET", endpoint) 

365 logger.debug( 

366 f"GET {endpoint} successful (trace: {trace_id}, time: {response_time:.3f}s)" 

367 ) 

368 result: dict[str, Any] = response.json() 

369 return result 

370 else: 

371 self._update_request_metrics(False, response_time, "GET", endpoint) 

372 logger.warning( 

373 f"Canvas server GET returned HTTP {response.status_code}: {response.text} (trace: {trace_id})" 

374 ) 

375 return None 

376 

377 except Exception as e: 

378 response_time = time.time() - start_time 

379 self._update_request_metrics(False, response_time, "GET", endpoint) 

380 logger.error(f"Canvas server GET request failed: {e} (trace: {trace_id})") 

381 return None 

382 

383 @property 

384 def health_failure_count(self) -> int: 

385 """Get the current health check failure count.""" 

386 return self._health_cache.failure_count 

387 

388 def _generate_correlation_id(self) -> str: 

389 """Generate a unique correlation ID for request tracing.""" 

390 return str(uuid.uuid4())[:8] 

391 

392 def _get_tracing_headers(self, correlation_id: str) -> dict[str, str]: 

393 """Get headers for request tracing.""" 

394 if not config.monitoring.trace_headers_enabled: 

395 return {} 

396 

397 return { 

398 config.logging.correlation_header: correlation_id, 

399 "X-Request-ID": correlation_id, 

400 "X-Trace-ID": correlation_id, 

401 } 

402 

403 def _update_request_metrics( 

404 self, success: bool, response_time: float, method: str, endpoint: str 

405 ) -> None: 

406 """Update request metrics for monitoring.""" 

407 self._request_metrics["total_requests"] += 1 

408 self._request_metrics["total_response_time"] += response_time 

409 

410 if success: 

411 self._request_metrics["successful_requests"] += 1 

412 else: 

413 self._request_metrics["failed_requests"] += 1 

414 

415 # Log slow requests 

416 if response_time > 1.0: # Requests over 1 second 

417 logger.warning( 

418 f"Slow request: {method} {endpoint} took {response_time:.3f}s" 

419 ) 

420 

421 def get_request_metrics(self) -> dict[str, Any]: 

422 """Get request metrics for monitoring.""" 

423 total_requests = max(self._request_metrics["total_requests"], 1) 

424 

425 return self._request_metrics | { 

426 "success_rate": ( 

427 self._request_metrics["successful_requests"] / total_requests 

428 ) 

429 * 100, 

430 "average_response_time": self._request_metrics["total_response_time"] 

431 / total_requests, 

432 "error_rate": (self._request_metrics["failed_requests"] / total_requests) 

433 * 100, 

434 } 

435 

436 def reset_request_metrics(self) -> None: 

437 """Reset request metrics.""" 

438 self._request_metrics = { 

439 "total_requests": 0, 

440 "successful_requests": 0, 

441 "failed_requests": 0, 

442 "total_response_time": 0.0, 

443 } 

444 

445 @property 

446 def is_healthy(self) -> bool: 

447 """Get the last known health status.""" 

448 return self._health_cache.status 

449 

450 

451# Global HTTP client instance 

452http_client = CanvasHTTPClient()