Coverage for src/alprina_cli/services/container_scanner.py: 12%

181 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Container Scanner - Docker image and Kubernetes security scanning. 

3Uses Trivy (Aqua Security) for comprehensive container vulnerability detection. 

4""" 

5 

6import subprocess 

7import json 

8import os 

9from pathlib import Path 

10from typing import Dict, Optional, List 

11from loguru import logger 

12 

13 

14class ContainerScanner: 

15 """ 

16 Scan Docker images and Kubernetes manifests for vulnerabilities. 

17  

18 Uses Trivy for: 

19 - OS package vulnerabilities 

20 - Language-specific dependencies 

21 - Secrets in images 

22 - Misconfigurations 

23 - IaC security issues 

24 """ 

25 

26 def __init__(self): 

27 """Initialize container scanner.""" 

28 self.has_trivy = self._check_trivy() 

29 if not self.has_trivy: 

30 logger.warning("Trivy not installed. Install for container scanning.") 

31 logger.info("Install: curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh") 

32 

33 def _check_trivy(self) -> bool: 

34 """Check if Trivy is installed.""" 

35 try: 

36 result = subprocess.run( 

37 ["trivy", "--version"], 

38 capture_output=True, 

39 text=True 

40 ) 

41 return result.returncode == 0 

42 except FileNotFoundError: 

43 return False 

44 

45 def scan_image( 

46 self, 

47 image: str, 

48 severity: List[str] = None, 

49 output_format: str = "json", 

50 include_secrets: bool = True 

51 ) -> Dict: 

52 """ 

53 Scan a Docker image for vulnerabilities. 

54 

55 Args: 

56 image: Docker image name (e.g., 'nginx:latest', 'myapp:1.0') 

57 severity: List of severities to report (CRITICAL, HIGH, MEDIUM, LOW) 

58 output_format: json, table, or sarif 

59 include_secrets: Whether to scan for secrets 

60 

61 Returns: 

62 Dict with scan results and vulnerability summary 

63 """ 

64 if not self.has_trivy: 

65 return self._install_guide() 

66 

67 logger.info(f"Scanning Docker image: {image}") 

68 

69 if severity is None: 

70 severity = ["CRITICAL", "HIGH", "MEDIUM", "LOW"] 

71 

72 try: 

73 # Build Trivy command 

74 cmd = [ 

75 "trivy", "image", 

76 "--format", output_format, 

77 "--severity", ",".join(severity), 

78 "--scanners", "vuln,secret" if include_secrets else "vuln", 

79 image 

80 ] 

81 

82 # Run Trivy 

83 result = subprocess.run( 

84 cmd, 

85 capture_output=True, 

86 text=True, 

87 timeout=300 # 5 minute timeout 

88 ) 

89 

90 if result.returncode != 0 and not result.stdout: 

91 logger.error(f"Trivy scan failed: {result.stderr}") 

92 return { 

93 "success": False, 

94 "error": result.stderr, 

95 "image": image 

96 } 

97 

98 # Parse results 

99 if output_format == "json": 

100 scan_data = json.loads(result.stdout) if result.stdout else {} 

101 else: 

102 scan_data = {"raw_output": result.stdout} 

103 

104 # Analyze results 

105 summary = self._analyze_image_results(scan_data) 

106 

107 logger.info(f"Scan complete. Found {summary['total_vulnerabilities']} vulnerabilities") 

108 

109 return { 

110 "success": True, 

111 "image": image, 

112 "scan_type": "docker_image", 

113 "summary": summary, 

114 "scan_data": scan_data, 

115 "recommendations": self._generate_recommendations(summary, image) 

116 } 

117 

118 except subprocess.TimeoutExpired: 

119 logger.error("Container scan timed out") 

120 return { 

121 "success": False, 

122 "error": "Scan timed out (>5 minutes)", 

123 "image": image 

124 } 

125 except json.JSONDecodeError as e: 

126 logger.error(f"Failed to parse Trivy output: {e}") 

127 return { 

128 "success": False, 

129 "error": f"Invalid JSON from Trivy: {e}", 

130 "image": image 

131 } 

132 except Exception as e: 

133 logger.error(f"Error scanning image: {e}") 

134 return { 

135 "success": False, 

136 "error": str(e), 

137 "image": image 

138 } 

139 

140 def scan_kubernetes( 

141 self, 

142 manifest_path: str, 

143 severity: List[str] = None 

144 ) -> Dict: 

145 """ 

146 Scan Kubernetes manifests for misconfigurations. 

147 

148 Args: 

149 manifest_path: Path to K8s YAML file or directory 

150 severity: List of severities to report 

151 

152 Returns: 

153 Dict with scan results 

154 """ 

155 if not self.has_trivy: 

156 return self._install_guide() 

157 

158 logger.info(f"Scanning Kubernetes manifest: {manifest_path}") 

159 

160 if severity is None: 

161 severity = ["CRITICAL", "HIGH", "MEDIUM", "LOW"] 

162 

163 try: 

164 # Build Trivy command 

165 cmd = [ 

166 "trivy", "config", 

167 "--format", "json", 

168 "--severity", ",".join(severity), 

169 manifest_path 

170 ] 

171 

172 # Run Trivy 

173 result = subprocess.run( 

174 cmd, 

175 capture_output=True, 

176 text=True, 

177 timeout=120 

178 ) 

179 

180 if result.returncode != 0 and not result.stdout: 

181 logger.error(f"Trivy config scan failed: {result.stderr}") 

182 return { 

183 "success": False, 

184 "error": result.stderr, 

185 "manifest": manifest_path 

186 } 

187 

188 # Parse results 

189 scan_data = json.loads(result.stdout) if result.stdout else {} 

190 

191 # Analyze results 

192 summary = self._analyze_config_results(scan_data) 

193 

194 logger.info(f"K8s scan complete. Found {summary['total_misconfigurations']} issues") 

195 

196 return { 

197 "success": True, 

198 "manifest": manifest_path, 

199 "scan_type": "kubernetes", 

200 "summary": summary, 

201 "scan_data": scan_data 

202 } 

203 

204 except subprocess.TimeoutExpired: 

205 logger.error("Kubernetes scan timed out") 

206 return { 

207 "success": False, 

208 "error": "Scan timed out (>2 minutes)", 

209 "manifest": manifest_path 

210 } 

211 except Exception as e: 

212 logger.error(f"Error scanning Kubernetes manifest: {e}") 

213 return { 

214 "success": False, 

215 "error": str(e), 

216 "manifest": manifest_path 

217 } 

218 

219 def scan_filesystem( 

220 self, 

221 path: str, 

222 severity: List[str] = None 

223 ) -> Dict: 

224 """ 

225 Scan a filesystem or directory for vulnerabilities. 

226 

227 Args: 

228 path: Path to scan 

229 severity: List of severities to report 

230 

231 Returns: 

232 Dict with scan results 

233 """ 

234 if not self.has_trivy: 

235 return self._install_guide() 

236 

237 logger.info(f"Scanning filesystem: {path}") 

238 

239 if severity is None: 

240 severity = ["CRITICAL", "HIGH", "MEDIUM", "LOW"] 

241 

242 try: 

243 cmd = [ 

244 "trivy", "fs", 

245 "--format", "json", 

246 "--severity", ",".join(severity), 

247 "--scanners", "vuln,secret,misconfig", 

248 path 

249 ] 

250 

251 result = subprocess.run( 

252 cmd, 

253 capture_output=True, 

254 text=True, 

255 timeout=300 

256 ) 

257 

258 if result.returncode != 0 and not result.stdout: 

259 return { 

260 "success": False, 

261 "error": result.stderr, 

262 "path": path 

263 } 

264 

265 scan_data = json.loads(result.stdout) if result.stdout else {} 

266 summary = self._analyze_filesystem_results(scan_data) 

267 

268 return { 

269 "success": True, 

270 "path": path, 

271 "scan_type": "filesystem", 

272 "summary": summary, 

273 "scan_data": scan_data 

274 } 

275 

276 except subprocess.TimeoutExpired: 

277 return { 

278 "success": False, 

279 "error": "Scan timed out", 

280 "path": path 

281 } 

282 except Exception as e: 

283 logger.error(f"Error scanning filesystem: {e}") 

284 return { 

285 "success": False, 

286 "error": str(e), 

287 "path": path 

288 } 

289 

290 def generate_sbom( 

291 self, 

292 image: str, 

293 output_file: Optional[str] = None 

294 ) -> Dict: 

295 """ 

296 Generate SBOM for a container image. 

297 

298 Args: 

299 image: Docker image name 

300 output_file: Output file path (optional) 

301 

302 Returns: 

303 Dict with SBOM generation results 

304 """ 

305 if not self.has_trivy: 

306 return self._install_guide() 

307 

308 logger.info(f"Generating SBOM for image: {image}") 

309 

310 try: 

311 cmd = [ 

312 "trivy", "image", 

313 "--format", "cyclonedx", 

314 image 

315 ] 

316 

317 if output_file: 

318 cmd.extend(["--output", output_file]) 

319 

320 result = subprocess.run( 

321 cmd, 

322 capture_output=True, 

323 text=True, 

324 timeout=300 

325 ) 

326 

327 if result.returncode != 0: 

328 return { 

329 "success": False, 

330 "error": result.stderr, 

331 "image": image 

332 } 

333 

334 sbom_data = result.stdout if not output_file else None 

335 

336 return { 

337 "success": True, 

338 "image": image, 

339 "format": "CycloneDX", 

340 "output_file": output_file, 

341 "sbom_data": sbom_data 

342 } 

343 

344 except Exception as e: 

345 logger.error(f"Error generating SBOM: {e}") 

346 return { 

347 "success": False, 

348 "error": str(e), 

349 "image": image 

350 } 

351 

352 def _analyze_image_results(self, scan_data: Dict) -> Dict: 

353 """Analyze Trivy image scan results.""" 

354 summary = { 

355 "total_vulnerabilities": 0, 

356 "by_severity": { 

357 "CRITICAL": 0, 

358 "HIGH": 0, 

359 "MEDIUM": 0, 

360 "LOW": 0, 

361 "UNKNOWN": 0 

362 }, 

363 "secrets_found": 0, 

364 "packages_scanned": 0, 

365 "layers": 0 

366 } 

367 

368 results = scan_data.get("Results", []) 

369 

370 for result in results: 

371 # Count vulnerabilities 

372 vulnerabilities = result.get("Vulnerabilities", []) 

373 summary["total_vulnerabilities"] += len(vulnerabilities) 

374 

375 for vuln in vulnerabilities: 

376 severity = vuln.get("Severity", "UNKNOWN") 

377 if severity in summary["by_severity"]: 

378 summary["by_severity"][severity] += 1 

379 

380 # Count secrets 

381 secrets = result.get("Secrets", []) 

382 summary["secrets_found"] += len(secrets) 

383 

384 # Count packages 

385 if "Packages" in result: 

386 summary["packages_scanned"] += len(result["Packages"]) 

387 

388 # Get image metadata 

389 metadata = scan_data.get("Metadata", {}) 

390 if "ImageConfig" in metadata: 

391 image_config = metadata["ImageConfig"] 

392 if "history" in image_config: 

393 summary["layers"] = len(image_config["history"]) 

394 

395 return summary 

396 

397 def _analyze_config_results(self, scan_data: Dict) -> Dict: 

398 """Analyze Trivy config scan results.""" 

399 summary = { 

400 "total_misconfigurations": 0, 

401 "by_severity": { 

402 "CRITICAL": 0, 

403 "HIGH": 0, 

404 "MEDIUM": 0, 

405 "LOW": 0 

406 }, 

407 "files_scanned": 0 

408 } 

409 

410 results = scan_data.get("Results", []) 

411 summary["files_scanned"] = len(results) 

412 

413 for result in results: 

414 misconfigs = result.get("Misconfigurations", []) 

415 summary["total_misconfigurations"] += len(misconfigs) 

416 

417 for misconfig in misconfigs: 

418 severity = misconfig.get("Severity", "UNKNOWN") 

419 if severity in summary["by_severity"]: 

420 summary["by_severity"][severity] += 1 

421 

422 return summary 

423 

424 def _analyze_filesystem_results(self, scan_data: Dict) -> Dict: 

425 """Analyze Trivy filesystem scan results.""" 

426 summary = { 

427 "total_vulnerabilities": 0, 

428 "total_secrets": 0, 

429 "total_misconfigurations": 0, 

430 "by_severity": { 

431 "CRITICAL": 0, 

432 "HIGH": 0, 

433 "MEDIUM": 0, 

434 "LOW": 0 

435 } 

436 } 

437 

438 results = scan_data.get("Results", []) 

439 

440 for result in results: 

441 # Vulnerabilities 

442 vulns = result.get("Vulnerabilities", []) 

443 summary["total_vulnerabilities"] += len(vulns) 

444 

445 for vuln in vulns: 

446 severity = vuln.get("Severity", "UNKNOWN") 

447 if severity in summary["by_severity"]: 

448 summary["by_severity"][severity] += 1 

449 

450 # Secrets 

451 secrets = result.get("Secrets", []) 

452 summary["total_secrets"] += len(secrets) 

453 

454 # Misconfigurations 

455 misconfigs = result.get("Misconfigurations", []) 

456 summary["total_misconfigurations"] += len(misconfigs) 

457 

458 return summary 

459 

460 def _generate_recommendations(self, summary: Dict, image: str) -> List[str]: 

461 """Generate recommendations based on scan results.""" 

462 recommendations = [] 

463 

464 # Check for critical issues 

465 critical = summary["by_severity"].get("CRITICAL", 0) 

466 high = summary["by_severity"].get("HIGH", 0) 

467 

468 if critical > 0: 

469 recommendations.append( 

470 f"🚨 URGENT: {critical} CRITICAL vulnerabilities found. Update image immediately." 

471 ) 

472 

473 if high > 0: 

474 recommendations.append( 

475 f"⚠️ {high} HIGH severity vulnerabilities. Plan updates within 1 week." 

476 ) 

477 

478 # Base image recommendations 

479 if ":" in image: 

480 base, tag = image.rsplit(":", 1) 

481 if tag == "latest": 

482 recommendations.append( 

483 "💡 Avoid 'latest' tag. Use specific versions for reproducibility." 

484 ) 

485 

486 # Secrets found 

487 if summary.get("secrets_found", 0) > 0: 

488 recommendations.append( 

489 "🔐 Secrets detected in image. Remove hardcoded credentials immediately." 

490 ) 

491 

492 # General recommendations 

493 if summary["total_vulnerabilities"] > 50: 

494 recommendations.append( 

495 "📦 Consider using a minimal base image (alpine, distroless) to reduce attack surface." 

496 ) 

497 

498 if not recommendations: 

499 recommendations.append( 

500 "✅ No critical issues found. Continue monitoring for new vulnerabilities." 

501 ) 

502 

503 return recommendations 

504 

505 def _install_guide(self) -> Dict: 

506 """Return installation guide for Trivy.""" 

507 return { 

508 "success": False, 

509 "error": "Trivy not installed", 

510 "install_command": "curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh", 

511 "install_url": "https://github.com/aquasecurity/trivy", 

512 "description": "Trivy - comprehensive container security scanner by Aqua Security" 

513 } 

514 

515 

516# Global container scanner instance 

517_container_scanner = None 

518 

519 

520def get_container_scanner() -> ContainerScanner: 

521 """Get or create global container scanner instance.""" 

522 global _container_scanner 

523 if _container_scanner is None: 

524 _container_scanner = ContainerScanner() 

525 return _container_scanner 

526 

527 

528# Convenience functions 

529def scan_docker_image(image: str, severity: List[str] = None) -> Dict: 

530 """ 

531 Convenience function to scan a Docker image. 

532 

533 Args: 

534 image: Docker image name 

535 severity: List of severities to report 

536 

537 Returns: 

538 Dict with scan results 

539 """ 

540 scanner = get_container_scanner() 

541 return scanner.scan_image(image, severity) 

542 

543 

544def scan_k8s_manifest(manifest_path: str) -> Dict: 

545 """ 

546 Convenience function to scan Kubernetes manifest. 

547 

548 Args: 

549 manifest_path: Path to K8s YAML 

550 

551 Returns: 

552 Dict with scan results 

553 """ 

554 scanner = get_container_scanner() 

555 return scanner.scan_kubernetes(manifest_path)