Coverage for src/otg_mcp/schema_registry.py: 99%

220 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-19 00:42 -0700

1""" 

2Schema registry for the Open Traffic Generator API. 

3Loads and provides access to OpenAPI schemas based on version. 

4""" 

5 

6import logging 

7import os 

8from typing import Any, Dict, List, Optional 

9 

10import yaml 

11 

12logger = logging.getLogger(__name__) 

13 

14 

15class SchemaRegistry: 

16 """ 

17 Registry for Open Traffic Generator API schemas. 

18 

19 This class loads and provides access to OpenAPI schemas 

20 for the various versions of the OTG API. 

21 """ 

22 

23 def __init__(self, custom_schemas_dir: Optional[str] = None): 

24 """ 

25 Initialize the schema registry. 

26 

27 Args: 

28 custom_schemas_dir: Optional path to custom schemas directory 

29 """ 

30 logger.info("Initializing schema registry") 

31 self.schemas: Dict[str, Dict[str, Any]] = {} 

32 self._available_schemas: Optional[List[str]] = None 

33 self._builtin_schemas_dir = os.path.join(os.path.dirname(__file__), "schemas") 

34 self._custom_schemas_dir = custom_schemas_dir 

35 

36 logger.info( 

37 f"Schema registry initialized with built-in schemas directory: {self._builtin_schemas_dir}" 

38 ) 

39 if self._custom_schemas_dir: 

40 logger.info(f"Custom schemas directory: {self._custom_schemas_dir}") 

41 

42 def _normalize_version(self, version: str) -> str: 

43 """ 

44 Normalize version string to directory format. 

45 

46 Args: 

47 version: Version string (e.g. "1.30.0" or "1_30_0") 

48 

49 Returns: 

50 Normalized version string using underscores (e.g. "1_30_0") 

51 """ 

52 logger.debug(f"Normalizing version string: {version}") 

53 return version.replace(".", "_") 

54 

55 def get_available_schemas(self) -> List[str]: 

56 """ 

57 Get a list of available schema versions. 

58 

59 Returns: 

60 List of available schema versions 

61 """ 

62 logger.info("Getting available schemas") 

63 if self._available_schemas is None: 

64 self._available_schemas = [] 

65 

66 logger.debug("Checking custom schemas directory if specified") 

67 if self._custom_schemas_dir and os.path.exists(self._custom_schemas_dir): 

68 logger.info( 

69 f"Scanning custom schemas directory: {self._custom_schemas_dir}" 

70 ) 

71 try: 

72 custom_schemas = [ 

73 d 

74 for d in os.listdir(self._custom_schemas_dir) 

75 if os.path.isdir(os.path.join(self._custom_schemas_dir, d)) 

76 and os.path.exists( 

77 os.path.join(self._custom_schemas_dir, d, "openapi.yaml") 

78 ) 

79 ] 

80 self._available_schemas.extend(custom_schemas) 

81 logger.info( 

82 f"Found {len(custom_schemas)} schemas in custom directory" 

83 ) 

84 except Exception as e: 

85 logger.warning(f"Error scanning custom schemas directory: {str(e)}") 

86 

87 logger.debug("Checking built-in schemas directory") 

88 if os.path.exists(self._builtin_schemas_dir): 

89 logger.info( 

90 f"Scanning built-in schemas directory: {self._builtin_schemas_dir}" 

91 ) 

92 built_in_schemas = [ 

93 d 

94 for d in os.listdir(self._builtin_schemas_dir) 

95 if os.path.isdir(os.path.join(self._builtin_schemas_dir, d)) 

96 and os.path.exists( 

97 os.path.join(self._builtin_schemas_dir, d, "openapi.yaml") 

98 ) 

99 ] 

100 

101 logger.debug( 

102 "Adding built-in schemas that don't conflict with custom schemas" 

103 ) 

104 for schema in built_in_schemas: 

105 if schema not in self._available_schemas: 

106 self._available_schemas.append(schema) 

107 

108 logger.info( 

109 f"Found {len(built_in_schemas)} schemas in built-in directory" 

110 ) 

111 

112 logger.info(f"Total available schemas: {len(self._available_schemas)}") 

113 

114 return self._available_schemas 

115 

116 def schema_exists(self, version: str) -> bool: 

117 """ 

118 Check if a schema version exists. 

119 

120 Args: 

121 version: Schema version to check (e.g. "1.30.0" or "1_30_0") 

122 

123 Returns: 

124 True if the schema exists, False otherwise 

125 """ 

126 normalized = self._normalize_version(version) 

127 logger.debug(f"Checking if schema exists: {version} (normalized: {normalized})") 

128 return normalized in self.get_available_schemas() 

129 

130 def list_schemas(self, version: str) -> List[str]: 

131 """ 

132 List all schema keys for a specific version. 

133 

134 Args: 

135 version: Schema version (e.g. "1.30.0" or "1_30_0") 

136 

137 Returns: 

138 List of top-level schema keys 

139 

140 Raises: 

141 ValueError: If the schema version does not exist 

142 """ 

143 logger.info(f"Listing schemas for version: {version}") 

144 normalized = self._normalize_version(version) 

145 

146 logger.info(f"Getting schema for version {normalized}") 

147 schema = self.get_schema(normalized) 

148 

149 logger.debug("Returning top-level schema keys") 

150 keys = list(schema.keys()) 

151 logger.info(f"Found {len(keys)} top-level keys in schema {version}") 

152 return keys 

153 

154 def get_schema_components( 

155 self, version: str, path_prefix: str = "components.schemas" 

156 ) -> List[str]: 

157 """ 

158 Get a list of component names in a schema. 

159 

160 Args: 

161 version: Schema version (e.g. "1.30.0" or "1_30_0") 

162 path_prefix: The path prefix to look in (default: "components.schemas") 

163 

164 Returns: 

165 List of component names 

166 

167 Raises: 

168 ValueError: If the schema version or path does not exist 

169 """ 

170 logger.info( 

171 f"Getting schema components for {version} with prefix {path_prefix}" 

172 ) 

173 normalized = self._normalize_version(version) 

174 

175 logger.info(f"Getting component at path {path_prefix}") 

176 component = self.get_schema(normalized, path_prefix) 

177 

178 logger.debug("Returning component keys") 

179 if isinstance(component, dict): 

180 keys = list(component.keys()) 

181 logger.info(f"Found {len(keys)} components at path {path_prefix}") 

182 return keys 

183 else: 

184 logger.warning(f"Component at {path_prefix} is not a dictionary") 

185 return [] 

186 

187 def _load_schema_from_path(self, path: str, version: str, source_type: str) -> bool: 

188 """ 

189 Load schema from a specified path into the cache. 

190 

191 Args: 

192 path: Path to the schema file 

193 version: Version identifier to use in cache 

194 source_type: Source type for logging ('custom' or 'built-in') 

195 

196 Returns: 

197 True if schema was loaded successfully, False otherwise 

198 """ 

199 logger.info(f"Loading schema from {source_type} path: {path}") 

200 try: 

201 with open(path, "r") as f: 

202 self.schemas[version] = yaml.safe_load(f) 

203 logger.info(f"Successfully loaded schema from {source_type} path") 

204 return True 

205 except Exception as e: 

206 logger.error(f"Error loading schema from {source_type} path: {str(e)}") 

207 return False 

208 

209 def _parse_version(self, version: str) -> tuple: 

210 """ 

211 Parse a version string into a comparable tuple. 

212 

213 Args: 

214 version: Version string (e.g. "1_30_0", "1.30.0") 

215 

216 Returns: 

217 Tuple of integers representing the version 

218 """ 

219 parts = version.replace(".", "_").split("_") 

220 try: 

221 return tuple(int(part) for part in parts if part.isdigit()) 

222 except ValueError: 

223 logger.warning(f"Could not parse all parts of version: {version}") 

224 return tuple() 

225 

226 def get_schema( 

227 self, version: str, component: Optional[str] = None 

228 ) -> Dict[str, Any]: 

229 """ 

230 Get schema for the specified version and optional component. 

231 

232 Args: 

233 version: Schema version (e.g., "1.30.0" or "1_30_0") 

234 component: Optional component path (e.g., "components.schemas.Flow.Router") 

235 using dot notation to navigate the schema 

236 

237 Returns: 

238 Dict containing the schema or component 

239 

240 Raises: 

241 ValueError: If the schema version or component does not exist 

242 """ 

243 logger.info( 

244 f"Getting schema for version: {version}, component: {component or 'all'}" 

245 ) 

246 normalized = self._normalize_version(version) 

247 

248 logger.info(f"Validating schema version exists: {version}") 

249 if not self.schema_exists(normalized): 

250 logger.error(f"Schema version not found: {version}") 

251 raise ValueError(f"Schema version {version} not found") 

252 

253 logger.info(f"Loading schema if not already cached: {normalized}") 

254 if normalized not in self.schemas: 

255 logger.debug("Checking custom schemas directory first if specified") 

256 success = False 

257 if self._custom_schemas_dir: 

258 custom_schema_path = os.path.join( 

259 self._custom_schemas_dir, normalized, "openapi.yaml" 

260 ) 

261 if os.path.exists(custom_schema_path): 

262 success = self._load_schema_from_path( 

263 custom_schema_path, normalized, "custom" 

264 ) 

265 

266 logger.debug("Trying built-in path if not loaded from custom path") 

267 if not success: 

268 builtin_schema_path = os.path.join( 

269 self._builtin_schemas_dir, normalized, "openapi.yaml" 

270 ) 

271 if not self._load_schema_from_path( 

272 builtin_schema_path, normalized, "built-in" 

273 ): 

274 raise ValueError(f"Error loading schema {normalized}") 

275 

276 if not component: 

277 logger.debug("Returning full schema") 

278 return self.schemas[normalized] 

279 

280 logger.info( 

281 f"Checking if component path requires special handling: {component}" 

282 ) 

283 if component.startswith("components.schemas."): 

284 logger.debug("Using special handling for components.schemas.X path") 

285 schema_name = component[len("components.schemas.") :] 

286 logger.debug(f"Extracted schema name: {schema_name}") 

287 

288 logger.debug(f"Getting schemas dictionary for {normalized}") 

289 try: 

290 schemas = self.schemas[normalized]["components"]["schemas"] 

291 

292 logger.debug(f"Checking if schema {schema_name} exists directly") 

293 if schema_name in schemas: 

294 logger.info(f"Found schema {schema_name}") 

295 return schemas[schema_name] 

296 

297 logger.error(f"Schema {schema_name} not found in components.schemas") 

298 error_msg = f"Schema {schema_name} not found in components.schemas" 

299 logger.error(error_msg) 

300 raise ValueError(error_msg) 

301 

302 except KeyError as e: 

303 error_msg = f"Error accessing components.schemas: {str(e)}" 

304 logger.error(error_msg) 

305 raise ValueError(error_msg) 

306 

307 logger.info("Using standard navigation through component path") 

308 logger.info(f"Navigating to component: {component}") 

309 components = component.split(".") 

310 result = self.schemas[normalized] 

311 

312 try: 

313 for comp in components: 

314 if comp in result: 

315 result = result[comp] 

316 else: 

317 error_msg = f"Component {comp} not found in path {component}" 

318 logger.error(error_msg) 

319 raise ValueError(error_msg) 

320 except (TypeError, KeyError) as e: 

321 error_msg = f"Invalid component path {component}: {str(e)}" 

322 logger.error(error_msg) 

323 raise ValueError(error_msg) 

324 

325 logger.info(f"Successfully retrieved component {component}") 

326 return result 

327 

328 def _get_parsed_versions(self, available_versions: List[str]) -> List[tuple]: 

329 """ 

330 Parse a list of version strings into a list of (version_string, version_tuple) pairs. 

331 

332 Args: 

333 available_versions: List of version strings 

334 

335 Returns: 

336 List of tuples (version_string, version_tuple) 

337 """ 

338 parsed_versions = [] 

339 for version in available_versions: 

340 ver_tuple = self._parse_version(version) 

341 if ver_tuple: 

342 logger.debug("Including version tuple as it was successfully parsed") 

343 parsed_versions.append((version, ver_tuple)) 

344 return parsed_versions 

345 

346 def find_closest_schema_version(self, requested_version: str) -> str: 

347 """ 

348 Find closest matching schema version from available schemas. 

349 

350 Logic: 

351 1. Exact match if available 

352 2. Same major.minor with equal or lower patch 

353 3. Same major with highest available minor 

354 4. Latest available version as fallback 

355 

356 Args: 

357 requested_version: The version to find a match for 

358 

359 Returns: 

360 The closest matching available schema version 

361 

362 Raises: 

363 ValueError: If no schemas are available 

364 """ 

365 logger.info(f"Finding closest schema version to: {requested_version}") 

366 available_versions = self.get_available_schemas() 

367 

368 if not available_versions: 

369 error_msg = "No schema versions available" 

370 logger.error(error_msg) 

371 raise ValueError(error_msg) 

372 

373 logger.debug("Checking for exact schema version match first") 

374 normalized = self._normalize_version(requested_version) 

375 if normalized in available_versions: 

376 logger.info( 

377 f"Found exact schema match for {requested_version}: {normalized}" 

378 ) 

379 return normalized 

380 

381 logger.debug("Parsing the requested version") 

382 req_version = self._parse_version(requested_version) 

383 if not req_version: 

384 logger.debug("Unable to parse version, returning latest schema version") 

385 return self.get_latest_schema_version() 

386 

387 logger.debug( 

388 "Ensuring requested version has at least 3 components (major.minor.patch)" 

389 ) 

390 if len(req_version) < 3: 

391 logger.debug("Padding requested version with zeros for missing components") 

392 req_version = req_version + (0,) * (3 - len(req_version)) 

393 

394 logger.debug("Getting all parsed versions") 

395 parsed_versions = self._get_parsed_versions(available_versions) 

396 if not parsed_versions: 

397 error_msg = "No valid schema versions available" 

398 logger.error(error_msg) 

399 raise ValueError(error_msg) 

400 

401 logger.debug( 

402 "Finding schema versions with same major.minor and equal or lower patch" 

403 ) 

404 same_major_minor = [] 

405 for version, ver in parsed_versions: 

406 if ( 

407 len(ver) >= 3 

408 and ver[0] == req_version[0] 

409 and ver[1] == req_version[1] 

410 and ver[2] <= req_version[2] 

411 ): 

412 same_major_minor.append((version, ver)) 

413 

414 if same_major_minor: 

415 logger.debug("Sorting by version tuple and taking the highest") 

416 closest = sorted(same_major_minor, key=lambda x: x[1])[-1][0] 

417 logger.info( 

418 f"Using version {closest} with same major.minor as {requested_version}" 

419 ) 

420 return closest 

421 

422 logger.debug("Finding schema versions with same major version") 

423 same_major = [] 

424 for version, ver in parsed_versions: 

425 if ver and ver[0] == req_version[0]: 

426 same_major.append((version, ver)) 

427 

428 if same_major: 

429 logger.debug("Sorting by version tuple and taking the highest") 

430 closest = sorted(same_major, key=lambda x: x[1])[-1][0] 

431 logger.info( 

432 f"Using version {closest} with same major as {requested_version}" 

433 ) 

434 return closest 

435 

436 logger.debug("Fallback to latest overall schema version") 

437 latest = sorted(parsed_versions, key=lambda x: x[1])[-1][0] 

438 logger.info( 

439 f"No matching version found, falling back to latest version {latest}" 

440 ) 

441 return latest 

442 

443 def get_latest_schema_version(self) -> str: 

444 """ 

445 Get the latest available schema version. 

446 

447 Returns: 

448 The latest schema version 

449 

450 Raises: 

451 ValueError: If no schemas are available 

452 """ 

453 logger.info("Getting latest schema version") 

454 available_versions = self.get_available_schemas() 

455 

456 if not available_versions: 

457 error_msg = "No schema versions available" 

458 logger.error(error_msg) 

459 raise ValueError(error_msg) 

460 

461 logger.debug("Parsing and sorting versions using helper method") 

462 parsed_versions = self._get_parsed_versions(available_versions) 

463 

464 if not parsed_versions: 

465 error_msg = "No valid schema versions available" 

466 logger.error(error_msg) 

467 raise ValueError(error_msg) 

468 

469 logger.debug("Sorting by version tuple and taking the highest version") 

470 latest = sorted(parsed_versions, key=lambda x: x[1])[-1][0] 

471 logger.info(f"Latest available schema version: {latest}") 

472 return latest