Coverage for src/alprina_cli/services/sbom_generator.py: 12%

171 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2SBOM Generator - Software Bill of Materials generation. 

3Supports CycloneDX (security-focused) and SPDX (compliance-focused) formats. 

4""" 

5 

6import subprocess 

7import json 

8import os 

9from pathlib import Path 

10from typing import Dict, Optional, List 

11from loguru import logger 

12 

13 

14class SBOMGenerator: 

15 """ 

16 Generate Software Bill of Materials in multiple formats. 

17  

18 Supports: 

19 - CycloneDX 1.5 (OWASP, security-focused) 

20 - SPDX 2.3 (ISO/IEC 5962:2021, compliance-focused) 

21 """ 

22 

23 def __init__(self): 

24 """Initialize SBOM generator.""" 

25 self._check_tools() 

26 

27 def _check_tools(self): 

28 """Check if required tools are installed.""" 

29 self.has_cdxgen = self._check_command("cdxgen") 

30 self.has_syft = self._check_command("syft") 

31 

32 if not self.has_cdxgen and not self.has_syft: 

33 logger.warning("No SBOM tools found. Install cdxgen or syft for SBOM generation") 

34 logger.info("Install cdxgen: npm install -g @cyclonedx/cdxgen") 

35 logger.info("Install syft: curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh") 

36 

37 def _check_command(self, command: str) -> bool: 

38 """Check if a command is available.""" 

39 try: 

40 result = subprocess.run( 

41 ["which", command], 

42 capture_output=True, 

43 text=True 

44 ) 

45 return result.returncode == 0 

46 except Exception: 

47 return False 

48 

49 def generate_cyclonedx( 

50 self, 

51 project_path: str, 

52 output_path: Optional[str] = None, 

53 output_format: str = "json" 

54 ) -> Dict: 

55 """ 

56 Generate CycloneDX SBOM (security-focused). 

57 

58 Args: 

59 project_path: Path to project directory 

60 output_path: Output file path (optional) 

61 output_format: json or xml 

62 

63 Returns: 

64 Dict with SBOM data and metadata 

65 """ 

66 if not self.has_cdxgen: 

67 return self._install_guide_cyclonedx() 

68 

69 project_path = Path(project_path).resolve() 

70 

71 if output_path is None: 

72 output_path = project_path / f"sbom-cyclonedx.{output_format}" 

73 

74 logger.info(f"Generating CycloneDX SBOM for: {project_path}") 

75 

76 try: 

77 # Build cdxgen command 

78 cmd = [ 

79 "cdxgen", 

80 str(project_path), 

81 "--output", str(output_path), 

82 "--spec-version", "1.5" 

83 ] 

84 

85 if output_format == "xml": 

86 cmd.append("--output-format") 

87 cmd.append("xml") 

88 

89 # Run cdxgen 

90 result = subprocess.run( 

91 cmd, 

92 capture_output=True, 

93 text=True, 

94 timeout=300 # 5 minute timeout 

95 ) 

96 

97 if result.returncode != 0: 

98 logger.error(f"cdxgen failed: {result.stderr}") 

99 return { 

100 "success": False, 

101 "error": result.stderr, 

102 "tool": "cdxgen" 

103 } 

104 

105 # Read generated SBOM 

106 sbom_data = self._read_sbom(output_path, output_format) 

107 

108 # Extract summary 

109 summary = self._analyze_cyclonedx(sbom_data) 

110 

111 logger.info(f"CycloneDX SBOM generated: {output_path}") 

112 

113 return { 

114 "success": True, 

115 "format": "CycloneDX", 

116 "version": "1.5", 

117 "output_file": str(output_path), 

118 "output_format": output_format, 

119 "summary": summary, 

120 "sbom_data": sbom_data 

121 } 

122 

123 except subprocess.TimeoutExpired: 

124 logger.error("SBOM generation timed out") 

125 return { 

126 "success": False, 

127 "error": "Generation timed out (>5 minutes)", 

128 "tool": "cdxgen" 

129 } 

130 except Exception as e: 

131 logger.error(f"Error generating CycloneDX SBOM: {e}") 

132 return { 

133 "success": False, 

134 "error": str(e), 

135 "tool": "cdxgen" 

136 } 

137 

138 def generate_spdx( 

139 self, 

140 project_path: str, 

141 output_path: Optional[str] = None, 

142 output_format: str = "json" 

143 ) -> Dict: 

144 """ 

145 Generate SPDX SBOM (compliance-focused, ISO standard). 

146 

147 Args: 

148 project_path: Path to project directory 

149 output_path: Output file path (optional) 

150 output_format: json, yaml, or tag-value 

151 

152 Returns: 

153 Dict with SBOM data and metadata 

154 """ 

155 if not self.has_syft: 

156 return self._install_guide_spdx() 

157 

158 project_path = Path(project_path).resolve() 

159 

160 if output_path is None: 

161 ext = "json" if output_format == "json" else "spdx" 

162 output_path = project_path / f"sbom-spdx.{ext}" 

163 

164 logger.info(f"Generating SPDX SBOM for: {project_path}") 

165 

166 try: 

167 # Build syft command 

168 format_map = { 

169 "json": "spdx-json", 

170 "yaml": "spdx", 

171 "tag-value": "spdx-tag-value" 

172 } 

173 

174 syft_format = format_map.get(output_format, "spdx-json") 

175 

176 cmd = [ 

177 "syft", 

178 f"dir:{project_path}", 

179 "--output", f"{syft_format}={output_path}" 

180 ] 

181 

182 # Run syft 

183 result = subprocess.run( 

184 cmd, 

185 capture_output=True, 

186 text=True, 

187 timeout=300 # 5 minute timeout 

188 ) 

189 

190 if result.returncode != 0: 

191 logger.error(f"syft failed: {result.stderr}") 

192 return { 

193 "success": False, 

194 "error": result.stderr, 

195 "tool": "syft" 

196 } 

197 

198 # Read generated SBOM 

199 sbom_data = self._read_sbom(output_path, output_format) 

200 

201 # Extract summary 

202 summary = self._analyze_spdx(sbom_data) 

203 

204 logger.info(f"SPDX SBOM generated: {output_path}") 

205 

206 return { 

207 "success": True, 

208 "format": "SPDX", 

209 "version": "2.3", 

210 "output_file": str(output_path), 

211 "output_format": output_format, 

212 "summary": summary, 

213 "sbom_data": sbom_data, 

214 "iso_standard": "ISO/IEC 5962:2021" 

215 } 

216 

217 except subprocess.TimeoutExpired: 

218 logger.error("SBOM generation timed out") 

219 return { 

220 "success": False, 

221 "error": "Generation timed out (>5 minutes)", 

222 "tool": "syft" 

223 } 

224 except Exception as e: 

225 logger.error(f"Error generating SPDX SBOM: {e}") 

226 return { 

227 "success": False, 

228 "error": str(e), 

229 "tool": "syft" 

230 } 

231 

232 def generate_both( 

233 self, 

234 project_path: str, 

235 output_dir: Optional[str] = None 

236 ) -> Dict: 

237 """ 

238 Generate both CycloneDX and SPDX SBOMs. 

239 

240 Args: 

241 project_path: Path to project directory 

242 output_dir: Output directory (optional) 

243 

244 Returns: 

245 Dict with results for both formats 

246 """ 

247 project_path = Path(project_path).resolve() 

248 

249 if output_dir is None: 

250 output_dir = project_path 

251 else: 

252 output_dir = Path(output_dir) 

253 output_dir.mkdir(parents=True, exist_ok=True) 

254 

255 logger.info("Generating both CycloneDX and SPDX SBOMs...") 

256 

257 results = { 

258 "success": True, 

259 "formats": [] 

260 } 

261 

262 # Generate CycloneDX 

263 if self.has_cdxgen: 

264 cyclonedx_output = output_dir / "sbom-cyclonedx.json" 

265 cyclonedx_result = self.generate_cyclonedx( 

266 project_path, 

267 str(cyclonedx_output) 

268 ) 

269 results["formats"].append(cyclonedx_result) 

270 if not cyclonedx_result["success"]: 

271 results["success"] = False 

272 else: 

273 logger.warning("Skipping CycloneDX (cdxgen not installed)") 

274 results["formats"].append({ 

275 "success": False, 

276 "format": "CycloneDX", 

277 "error": "cdxgen not installed" 

278 }) 

279 

280 # Generate SPDX 

281 if self.has_syft: 

282 spdx_output = output_dir / "sbom-spdx.json" 

283 spdx_result = self.generate_spdx( 

284 project_path, 

285 str(spdx_output) 

286 ) 

287 results["formats"].append(spdx_result) 

288 if not spdx_result["success"]: 

289 results["success"] = False 

290 else: 

291 logger.warning("Skipping SPDX (syft not installed)") 

292 results["formats"].append({ 

293 "success": False, 

294 "format": "SPDX", 

295 "error": "syft not installed" 

296 }) 

297 

298 return results 

299 

300 def _read_sbom(self, file_path: str, format: str) -> Dict: 

301 """Read SBOM file and return data.""" 

302 try: 

303 if format == "json": 

304 with open(file_path, 'r') as f: 

305 return json.load(f) 

306 else: 

307 # For XML/YAML, just return file path 

308 return {"file": str(file_path)} 

309 except Exception as e: 

310 logger.error(f"Error reading SBOM file: {e}") 

311 return {} 

312 

313 def _analyze_cyclonedx(self, sbom_data: Dict) -> Dict: 

314 """Analyze CycloneDX SBOM and extract summary.""" 

315 summary = { 

316 "total_components": 0, 

317 "direct_dependencies": 0, 

318 "transitive_dependencies": 0, 

319 "vulnerabilities": 0, 

320 "licenses": set() 

321 } 

322 

323 if not sbom_data: 

324 return summary 

325 

326 components = sbom_data.get("components", []) 

327 summary["total_components"] = len(components) 

328 

329 # Count dependencies 

330 dependencies = sbom_data.get("dependencies", []) 

331 for dep in dependencies: 

332 if dep.get("dependsOn"): 

333 summary["direct_dependencies"] += 1 

334 else: 

335 summary["transitive_dependencies"] += 1 

336 

337 # Count vulnerabilities 

338 if "vulnerabilities" in sbom_data: 

339 summary["vulnerabilities"] = len(sbom_data["vulnerabilities"]) 

340 

341 # Collect licenses 

342 for component in components: 

343 licenses = component.get("licenses", []) 

344 for lic in licenses: 

345 if "license" in lic: 

346 lic_data = lic["license"] 

347 if "id" in lic_data: 

348 summary["licenses"].add(lic_data["id"]) 

349 elif "name" in lic_data: 

350 summary["licenses"].add(lic_data["name"]) 

351 

352 summary["licenses"] = list(summary["licenses"]) 

353 summary["unique_licenses"] = len(summary["licenses"]) 

354 

355 return summary 

356 

357 def _analyze_spdx(self, sbom_data: Dict) -> Dict: 

358 """Analyze SPDX SBOM and extract summary.""" 

359 summary = { 

360 "total_packages": 0, 

361 "files_analyzed": 0, 

362 "licenses": set(), 

363 "relationships": 0 

364 } 

365 

366 if not sbom_data: 

367 return summary 

368 

369 packages = sbom_data.get("packages", []) 

370 summary["total_packages"] = len(packages) 

371 

372 # Count files 

373 files = sbom_data.get("files", []) 

374 summary["files_analyzed"] = len(files) 

375 

376 # Count relationships 

377 relationships = sbom_data.get("relationships", []) 

378 summary["relationships"] = len(relationships) 

379 

380 # Collect licenses 

381 for package in packages: 

382 lic_concluded = package.get("licenseConcluded") 

383 if lic_concluded and lic_concluded != "NOASSERTION": 

384 summary["licenses"].add(lic_concluded) 

385 

386 lic_declared = package.get("licenseDeclared") 

387 if lic_declared and lic_declared != "NOASSERTION": 

388 summary["licenses"].add(lic_declared) 

389 

390 summary["licenses"] = list(summary["licenses"]) 

391 summary["unique_licenses"] = len(summary["licenses"]) 

392 

393 return summary 

394 

395 def _install_guide_cyclonedx(self) -> Dict: 

396 """Return installation guide for CycloneDX tools.""" 

397 return { 

398 "success": False, 

399 "error": "cdxgen not installed", 

400 "install_command": "npm install -g @cyclonedx/cdxgen", 

401 "install_url": "https://github.com/CycloneDX/cdxgen", 

402 "description": "CycloneDX generator for OWASP security-focused SBOMs" 

403 } 

404 

405 def _install_guide_spdx(self) -> Dict: 

406 """Return installation guide for SPDX tools.""" 

407 return { 

408 "success": False, 

409 "error": "syft not installed", 

410 "install_command": "curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh", 

411 "install_url": "https://github.com/anchore/syft", 

412 "description": "Syft for ISO-standard SPDX SBOMs" 

413 } 

414 

415 

416# Global SBOM generator instance 

417_sbom_generator = None 

418 

419 

420def get_sbom_generator() -> SBOMGenerator: 

421 """Get or create global SBOM generator instance.""" 

422 global _sbom_generator 

423 if _sbom_generator is None: 

424 _sbom_generator = SBOMGenerator() 

425 return _sbom_generator 

426 

427 

428# Convenience functions 

429def generate_sbom( 

430 project_path: str, 

431 format: str = "cyclonedx", 

432 output_path: Optional[str] = None 

433) -> Dict: 

434 """ 

435 Convenience function to generate SBOM. 

436 

437 Args: 

438 project_path: Path to project 

439 format: cyclonedx, spdx, or both 

440 output_path: Output file path 

441 

442 Returns: 

443 Dict with SBOM data and metadata 

444 """ 

445 generator = get_sbom_generator() 

446 

447 if format.lower() == "cyclonedx": 

448 return generator.generate_cyclonedx(project_path, output_path) 

449 elif format.lower() == "spdx": 

450 return generator.generate_spdx(project_path, output_path) 

451 elif format.lower() == "both": 

452 return generator.generate_both(project_path) 

453 else: 

454 return { 

455 "success": False, 

456 "error": f"Unknown format: {format}. Use 'cyclonedx', 'spdx', or 'both'" 

457 }