Coverage for excalidraw_mcp/process_manager.py: 90%

184 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-16 08:08 -0700

1"""Process management for canvas server lifecycle.""" 

2 

3import asyncio 

4import atexit 

5import logging 

6import os 

7import signal 

8import subprocess 

9import time 

10from collections.abc import Awaitable, Callable 

11from pathlib import Path 

12from typing import Any 

13 

14import psutil 

15 

16from .config import config 

17from .http_client import http_client 

18from .retry_utils import RetryConfig, retry_async 

19 

20logger = logging.getLogger(__name__) 

21 

22 

23class CanvasProcessManager: 

24 """Manages the canvas server process lifecycle with monitoring hooks.""" 

25 

26 def __init__(self) -> None: 

27 self.process: subprocess.Popen[Any] | None = None 

28 self.process_pid: int | None = None 

29 self._startup_lock = asyncio.Lock() 

30 self._start_time: float | None = None 

31 self._restart_count = 0 

32 

33 # Event hooks for monitoring integration 

34 self._on_start_callbacks: list[Callable[..., Awaitable[None]]] = [] 

35 self._on_stop_callbacks: list[Callable[..., Awaitable[None]]] = [] 

36 self._on_restart_callbacks: list[Callable[..., Awaitable[None]]] = [] 

37 self._on_health_change_callbacks: list[Callable[..., Awaitable[None]]] = [] 

38 

39 # Register cleanup handlers 

40 atexit.register(self.cleanup) 

41 signal.signal(signal.SIGINT, self._signal_handler) 

42 signal.signal(signal.SIGTERM, self._signal_handler) 

43 

44 async def _check_process_health(self) -> bool: 

45 """Check if process is already running and healthy.""" 

46 return await self._is_process_healthy() 

47 

48 async def _handle_disabled_auto_start(self) -> bool: 

49 """Handle case when auto-start is disabled.""" 

50 logger.warning("Canvas server not running and auto-start is disabled") 

51 return False 

52 

53 async def _attempt_process_start(self) -> bool: 

54 """Attempt to start the canvas server process.""" 

55 success = await self._start_process() 

56 if not success: 

57 logger.error("Failed to start canvas server") 

58 return success 

59 

60 async def _ensure_process_healthy(self) -> bool: 

61 """Ensure process is healthy or start it if needed.""" 

62 # Check if process is already running and healthy 

63 if await self._check_process_health(): 

64 return True 

65 

66 # If auto-start is disabled, just check health 

67 if not config.server.canvas_auto_start: 

68 return await self._handle_disabled_auto_start() 

69 

70 # Try to start the process 

71 if not await self._attempt_process_start(): 

72 return False 

73 

74 # Wait for process to become healthy 

75 return await self._wait_for_health() 

76 

77 async def ensure_running(self) -> bool: 

78 """Ensure canvas server is running and healthy.""" 

79 async with self._startup_lock: 

80 return await self._ensure_process_healthy() 

81 

82 async def _is_process_healthy(self) -> bool: 

83 """Check if the current process is running and healthy.""" 

84 if not self._is_process_running(): 

85 return False 

86 

87 return await http_client.check_health() 

88 

89 def _is_process_running(self) -> bool: 

90 """Check if the canvas server process is running.""" 

91 if not self.process or not self.process_pid: 

92 return False 

93 

94 try: 

95 # Check if process is still running 

96 if self.process.poll() is not None: 

97 logger.debug("Canvas server process has exited") 

98 self._reset_process_info() 

99 return False 

100 

101 # Verify PID is valid 

102 if not psutil.pid_exists(self.process_pid): 

103 logger.debug("Canvas server PID no longer exists") 

104 self._reset_process_info() 

105 return False 

106 

107 return True 

108 

109 except Exception as e: 

110 logger.debug(f"Error checking process status: {e}") 

111 self._reset_process_info() 

112 return False 

113 

114 async def _start_process(self) -> bool: 

115 """Start the canvas server process.""" 

116 try: 

117 project_root = self._get_project_root() 

118 logger.info(f"Starting canvas server from {project_root}") 

119 

120 # Kill any existing process 

121 self._terminate_existing_process() 

122 

123 # Start new process 

124 self.process = subprocess.Popen( 

125 ["npm", "run", "canvas"], 

126 cwd=project_root, 

127 stdout=subprocess.PIPE, 

128 stderr=subprocess.PIPE, 

129 preexec_fn=os.setsid if os.name != "nt" else None, 

130 ) 

131 

132 self.process_pid = self.process.pid 

133 self._start_time = time.time() 

134 logger.info(f"Canvas server started with PID: {self.process_pid}") 

135 

136 # Trigger start callbacks 

137 await self._trigger_callbacks(self._on_start_callbacks, self.process_pid) 

138 

139 # Give the server a moment to start 

140 await asyncio.sleep(config.server.startup_retry_delay_seconds) 

141 

142 return True 

143 

144 except Exception as e: 

145 logger.error(f"Failed to start canvas server: {e}") 

146 self._reset_process_info() 

147 return False 

148 

149 async def _check_health_with_process_check(self) -> bool: 

150 """Check health with process validation.""" 

151 if not self._is_process_running(): 

152 raise RuntimeError("Canvas server process died during startup") 

153 

154 if await http_client.check_health(force=True): 

155 return True 

156 else: 

157 raise RuntimeError("Canvas server not yet healthy") 

158 

159 async def _wait_for_health(self) -> bool: 

160 """Wait for canvas server to become healthy.""" 

161 logger.info("Waiting for canvas server to become healthy...") 

162 

163 # Configure retry for health checks 

164 retry_config = RetryConfig( 

165 max_attempts=config.server.startup_timeout_seconds, 

166 max_delay=5.0, 

167 exponential_base=config.server.sync_retry_exponential_base, 

168 jitter=config.server.sync_retry_jitter, 

169 ) 

170 

171 try: 

172 await retry_async( 

173 self._check_health_with_process_check, 

174 retry_config=retry_config, 

175 retry_on_exceptions=(RuntimeError, Exception), 

176 ) 

177 logger.info("Canvas server is healthy and ready") 

178 return True 

179 except Exception as e: 

180 logger.error(f"Canvas server failed to become healthy: {e}") 

181 self._terminate_current_process() 

182 return False 

183 

184 def _send_termination_signal(self, sig: int) -> None: 

185 """Send termination signal to the process group.""" 

186 if self.process is not None and self.process_pid is not None: 

187 if os.name != "nt": 

188 os.killpg(os.getpgid(self.process_pid), sig) 

189 else: 

190 if sig == signal.SIGTERM: 

191 self.process.terminate() 

192 else: 

193 self.process.kill() 

194 

195 def _terminate_existing_process(self) -> None: 

196 """Terminate any existing canvas server process.""" 

197 if self.process_pid: 

198 try: 

199 # Trigger stop callbacks before termination 

200 asyncio.create_task( 

201 self._trigger_callbacks( 

202 self._on_stop_callbacks, self.process_pid, "terminating" 

203 ) 

204 ) 

205 

206 # Try to find and kill the process group 

207 self._send_termination_signal(signal.SIGTERM) 

208 

209 # Wait a moment for graceful shutdown 

210 time.sleep(2) 

211 

212 # Force kill if still running 

213 if self.process is not None and psutil.pid_exists(self.process_pid): 

214 self._send_termination_signal(signal.SIGKILL) 

215 

216 except (ProcessLookupError, OSError) as e: 

217 logger.debug(f"Process already terminated: {e}") 

218 except Exception as e: 

219 logger.warning(f"Error terminating existing process: {e}") 

220 

221 self._reset_process_info() 

222 

223 def _terminate_current_process(self) -> None: 

224 """Terminate the current canvas server process.""" 

225 self._terminate_existing_process() 

226 

227 def _reset_process_info(self) -> None: 

228 """Reset process information.""" 

229 was_running = self.process_pid is not None 

230 self.process = None 

231 self.process_pid = None 

232 self._start_time = None 

233 

234 if was_running: 

235 # Trigger stop callbacks when process info is reset 

236 try: 

237 asyncio.create_task( 

238 self._trigger_callbacks(self._on_stop_callbacks, None, "stopped") 

239 ) 

240 except RuntimeError: 

241 # No running event loop, skip callback triggering 

242 logger.debug("No event loop running, skipping stop callbacks") 

243 

244 def _get_project_root(self) -> Path: 

245 """Get the project root directory.""" 

246 current_file = Path(__file__).resolve() 

247 return current_file.parent.parent 

248 

249 def _signal_handler(self, signum: int, frame: Any) -> None: 

250 """Handle system signals for graceful shutdown.""" 

251 logger.info(f"Received signal {signum}, cleaning up...") 

252 self.cleanup() 

253 

254 def cleanup(self) -> None: 

255 """Clean up resources and terminate processes.""" 

256 logger.info("Cleaning up canvas process manager...") 

257 self._terminate_current_process() 

258 

259 async def restart(self) -> bool: 

260 """Restart the canvas server.""" 

261 logger.info("Restarting canvas server...") 

262 self._restart_count += 1 

263 

264 # Trigger restart callbacks 

265 await self._trigger_callbacks( 

266 self._on_restart_callbacks, self._restart_count, "starting" 

267 ) 

268 

269 self._terminate_current_process() 

270 success = await self.ensure_running() 

271 

272 # Trigger restart completion callbacks 

273 status = "success" if success else "failed" 

274 await self._trigger_callbacks( 

275 self._on_restart_callbacks, self._restart_count, status 

276 ) 

277 

278 return success 

279 

280 async def stop(self) -> None: 

281 """Stop the canvas server.""" 

282 logger.info("Stopping canvas server...") 

283 self._terminate_current_process() 

284 

285 def get_status(self) -> dict[str, Any]: 

286 """Get comprehensive process status information.""" 

287 is_running = self._is_process_running() 

288 uptime = ( 

289 time.time() - self._start_time if self._start_time and is_running else 0 

290 ) 

291 

292 return { 

293 "running": is_running, 

294 "pid": self.process_pid, 

295 "healthy": False, # Will be updated by health check 

296 "auto_start_enabled": config.server.canvas_auto_start, 

297 "start_time": self._start_time, 

298 "uptime_seconds": uptime, 

299 "restart_count": self._restart_count, 

300 } 

301 

302 # Event hook management methods 

303 def add_start_callback(self, callback: Callable[..., Awaitable[None]]) -> None: 

304 """Add callback for process start events.""" 

305 self._on_start_callbacks.append(callback) 

306 

307 def add_stop_callback(self, callback: Callable[..., Awaitable[None]]) -> None: 

308 """Add callback for process stop events.""" 

309 self._on_stop_callbacks.append(callback) 

310 

311 def add_restart_callback(self, callback: Callable[..., Awaitable[None]]) -> None: 

312 """Add callback for process restart events.""" 

313 self._on_restart_callbacks.append(callback) 

314 

315 def add_health_change_callback( 

316 self, callback: Callable[..., Awaitable[None]] 

317 ) -> None: 

318 """Add callback for health status changes.""" 

319 self._on_health_change_callbacks.append(callback) 

320 

321 async def _trigger_callbacks( 

322 self, callbacks: list[Callable[..., Awaitable[None]]], *args: Any 

323 ) -> None: 

324 """Trigger a list of callbacks with error handling.""" 

325 for callback in callbacks: 

326 try: 

327 if asyncio.iscoroutinefunction(callback): 

328 await callback(*args) 

329 else: 

330 callback(*args) 

331 except Exception as e: 

332 logger.error(f"Error in process manager callback: {e}") 

333 

334 def get_restart_count(self) -> int: 

335 """Get the number of times the process has been restarted.""" 

336 return self._restart_count 

337 

338 def get_uptime(self) -> float: 

339 """Get process uptime in seconds.""" 

340 if not self._start_time or not self._is_process_running(): 

341 return 0.0 

342 return time.time() - self._start_time 

343 

344 

345# Global process manager instance 

346process_manager = CanvasProcessManager()