Coverage for excalidraw_mcp/process_manager.py: 90%
184 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-16 08:08 -0700
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-16 08:08 -0700
1"""Process management for canvas server lifecycle."""
3import asyncio
4import atexit
5import logging
6import os
7import signal
8import subprocess
9import time
10from collections.abc import Awaitable, Callable
11from pathlib import Path
12from typing import Any
14import psutil
16from .config import config
17from .http_client import http_client
18from .retry_utils import RetryConfig, retry_async
20logger = logging.getLogger(__name__)
23class CanvasProcessManager:
24 """Manages the canvas server process lifecycle with monitoring hooks."""
26 def __init__(self) -> None:
27 self.process: subprocess.Popen[Any] | None = None
28 self.process_pid: int | None = None
29 self._startup_lock = asyncio.Lock()
30 self._start_time: float | None = None
31 self._restart_count = 0
33 # Event hooks for monitoring integration
34 self._on_start_callbacks: list[Callable[..., Awaitable[None]]] = []
35 self._on_stop_callbacks: list[Callable[..., Awaitable[None]]] = []
36 self._on_restart_callbacks: list[Callable[..., Awaitable[None]]] = []
37 self._on_health_change_callbacks: list[Callable[..., Awaitable[None]]] = []
39 # Register cleanup handlers
40 atexit.register(self.cleanup)
41 signal.signal(signal.SIGINT, self._signal_handler)
42 signal.signal(signal.SIGTERM, self._signal_handler)
44 async def _check_process_health(self) -> bool:
45 """Check if process is already running and healthy."""
46 return await self._is_process_healthy()
48 async def _handle_disabled_auto_start(self) -> bool:
49 """Handle case when auto-start is disabled."""
50 logger.warning("Canvas server not running and auto-start is disabled")
51 return False
53 async def _attempt_process_start(self) -> bool:
54 """Attempt to start the canvas server process."""
55 success = await self._start_process()
56 if not success:
57 logger.error("Failed to start canvas server")
58 return success
60 async def _ensure_process_healthy(self) -> bool:
61 """Ensure process is healthy or start it if needed."""
62 # Check if process is already running and healthy
63 if await self._check_process_health():
64 return True
66 # If auto-start is disabled, just check health
67 if not config.server.canvas_auto_start:
68 return await self._handle_disabled_auto_start()
70 # Try to start the process
71 if not await self._attempt_process_start():
72 return False
74 # Wait for process to become healthy
75 return await self._wait_for_health()
77 async def ensure_running(self) -> bool:
78 """Ensure canvas server is running and healthy."""
79 async with self._startup_lock:
80 return await self._ensure_process_healthy()
82 async def _is_process_healthy(self) -> bool:
83 """Check if the current process is running and healthy."""
84 if not self._is_process_running():
85 return False
87 return await http_client.check_health()
89 def _is_process_running(self) -> bool:
90 """Check if the canvas server process is running."""
91 if not self.process or not self.process_pid:
92 return False
94 try:
95 # Check if process is still running
96 if self.process.poll() is not None:
97 logger.debug("Canvas server process has exited")
98 self._reset_process_info()
99 return False
101 # Verify PID is valid
102 if not psutil.pid_exists(self.process_pid):
103 logger.debug("Canvas server PID no longer exists")
104 self._reset_process_info()
105 return False
107 return True
109 except Exception as e:
110 logger.debug(f"Error checking process status: {e}")
111 self._reset_process_info()
112 return False
114 async def _start_process(self) -> bool:
115 """Start the canvas server process."""
116 try:
117 project_root = self._get_project_root()
118 logger.info(f"Starting canvas server from {project_root}")
120 # Kill any existing process
121 self._terminate_existing_process()
123 # Start new process
124 self.process = subprocess.Popen(
125 ["npm", "run", "canvas"],
126 cwd=project_root,
127 stdout=subprocess.PIPE,
128 stderr=subprocess.PIPE,
129 preexec_fn=os.setsid if os.name != "nt" else None,
130 )
132 self.process_pid = self.process.pid
133 self._start_time = time.time()
134 logger.info(f"Canvas server started with PID: {self.process_pid}")
136 # Trigger start callbacks
137 await self._trigger_callbacks(self._on_start_callbacks, self.process_pid)
139 # Give the server a moment to start
140 await asyncio.sleep(config.server.startup_retry_delay_seconds)
142 return True
144 except Exception as e:
145 logger.error(f"Failed to start canvas server: {e}")
146 self._reset_process_info()
147 return False
149 async def _check_health_with_process_check(self) -> bool:
150 """Check health with process validation."""
151 if not self._is_process_running():
152 raise RuntimeError("Canvas server process died during startup")
154 if await http_client.check_health(force=True):
155 return True
156 else:
157 raise RuntimeError("Canvas server not yet healthy")
159 async def _wait_for_health(self) -> bool:
160 """Wait for canvas server to become healthy."""
161 logger.info("Waiting for canvas server to become healthy...")
163 # Configure retry for health checks
164 retry_config = RetryConfig(
165 max_attempts=config.server.startup_timeout_seconds,
166 max_delay=5.0,
167 exponential_base=config.server.sync_retry_exponential_base,
168 jitter=config.server.sync_retry_jitter,
169 )
171 try:
172 await retry_async(
173 self._check_health_with_process_check,
174 retry_config=retry_config,
175 retry_on_exceptions=(RuntimeError, Exception),
176 )
177 logger.info("Canvas server is healthy and ready")
178 return True
179 except Exception as e:
180 logger.error(f"Canvas server failed to become healthy: {e}")
181 self._terminate_current_process()
182 return False
184 def _send_termination_signal(self, sig: int) -> None:
185 """Send termination signal to the process group."""
186 if self.process is not None and self.process_pid is not None:
187 if os.name != "nt":
188 os.killpg(os.getpgid(self.process_pid), sig)
189 else:
190 if sig == signal.SIGTERM:
191 self.process.terminate()
192 else:
193 self.process.kill()
195 def _terminate_existing_process(self) -> None:
196 """Terminate any existing canvas server process."""
197 if self.process_pid:
198 try:
199 # Trigger stop callbacks before termination
200 asyncio.create_task(
201 self._trigger_callbacks(
202 self._on_stop_callbacks, self.process_pid, "terminating"
203 )
204 )
206 # Try to find and kill the process group
207 self._send_termination_signal(signal.SIGTERM)
209 # Wait a moment for graceful shutdown
210 time.sleep(2)
212 # Force kill if still running
213 if self.process is not None and psutil.pid_exists(self.process_pid):
214 self._send_termination_signal(signal.SIGKILL)
216 except (ProcessLookupError, OSError) as e:
217 logger.debug(f"Process already terminated: {e}")
218 except Exception as e:
219 logger.warning(f"Error terminating existing process: {e}")
221 self._reset_process_info()
223 def _terminate_current_process(self) -> None:
224 """Terminate the current canvas server process."""
225 self._terminate_existing_process()
227 def _reset_process_info(self) -> None:
228 """Reset process information."""
229 was_running = self.process_pid is not None
230 self.process = None
231 self.process_pid = None
232 self._start_time = None
234 if was_running:
235 # Trigger stop callbacks when process info is reset
236 try:
237 asyncio.create_task(
238 self._trigger_callbacks(self._on_stop_callbacks, None, "stopped")
239 )
240 except RuntimeError:
241 # No running event loop, skip callback triggering
242 logger.debug("No event loop running, skipping stop callbacks")
244 def _get_project_root(self) -> Path:
245 """Get the project root directory."""
246 current_file = Path(__file__).resolve()
247 return current_file.parent.parent
249 def _signal_handler(self, signum: int, frame: Any) -> None:
250 """Handle system signals for graceful shutdown."""
251 logger.info(f"Received signal {signum}, cleaning up...")
252 self.cleanup()
254 def cleanup(self) -> None:
255 """Clean up resources and terminate processes."""
256 logger.info("Cleaning up canvas process manager...")
257 self._terminate_current_process()
259 async def restart(self) -> bool:
260 """Restart the canvas server."""
261 logger.info("Restarting canvas server...")
262 self._restart_count += 1
264 # Trigger restart callbacks
265 await self._trigger_callbacks(
266 self._on_restart_callbacks, self._restart_count, "starting"
267 )
269 self._terminate_current_process()
270 success = await self.ensure_running()
272 # Trigger restart completion callbacks
273 status = "success" if success else "failed"
274 await self._trigger_callbacks(
275 self._on_restart_callbacks, self._restart_count, status
276 )
278 return success
280 async def stop(self) -> None:
281 """Stop the canvas server."""
282 logger.info("Stopping canvas server...")
283 self._terminate_current_process()
285 def get_status(self) -> dict[str, Any]:
286 """Get comprehensive process status information."""
287 is_running = self._is_process_running()
288 uptime = (
289 time.time() - self._start_time if self._start_time and is_running else 0
290 )
292 return {
293 "running": is_running,
294 "pid": self.process_pid,
295 "healthy": False, # Will be updated by health check
296 "auto_start_enabled": config.server.canvas_auto_start,
297 "start_time": self._start_time,
298 "uptime_seconds": uptime,
299 "restart_count": self._restart_count,
300 }
302 # Event hook management methods
303 def add_start_callback(self, callback: Callable[..., Awaitable[None]]) -> None:
304 """Add callback for process start events."""
305 self._on_start_callbacks.append(callback)
307 def add_stop_callback(self, callback: Callable[..., Awaitable[None]]) -> None:
308 """Add callback for process stop events."""
309 self._on_stop_callbacks.append(callback)
311 def add_restart_callback(self, callback: Callable[..., Awaitable[None]]) -> None:
312 """Add callback for process restart events."""
313 self._on_restart_callbacks.append(callback)
315 def add_health_change_callback(
316 self, callback: Callable[..., Awaitable[None]]
317 ) -> None:
318 """Add callback for health status changes."""
319 self._on_health_change_callbacks.append(callback)
321 async def _trigger_callbacks(
322 self, callbacks: list[Callable[..., Awaitable[None]]], *args: Any
323 ) -> None:
324 """Trigger a list of callbacks with error handling."""
325 for callback in callbacks:
326 try:
327 if asyncio.iscoroutinefunction(callback):
328 await callback(*args)
329 else:
330 callback(*args)
331 except Exception as e:
332 logger.error(f"Error in process manager callback: {e}")
334 def get_restart_count(self) -> int:
335 """Get the number of times the process has been restarted."""
336 return self._restart_count
338 def get_uptime(self) -> float:
339 """Get process uptime in seconds."""
340 if not self._start_time or not self._is_process_running():
341 return 0.0
342 return time.time() - self._start_time
345# Global process manager instance
346process_manager = CanvasProcessManager()