Coverage for src/alprina_cli/services/container_scanner.py: 12%
181 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
1"""
2Container Scanner - Docker image and Kubernetes security scanning.
3Uses Trivy (Aqua Security) for comprehensive container vulnerability detection.
4"""
6import subprocess
7import json
8import os
9from pathlib import Path
10from typing import Dict, Optional, List
11from loguru import logger
14class ContainerScanner:
15 """
16 Scan Docker images and Kubernetes manifests for vulnerabilities.
18 Uses Trivy for:
19 - OS package vulnerabilities
20 - Language-specific dependencies
21 - Secrets in images
22 - Misconfigurations
23 - IaC security issues
24 """
26 def __init__(self):
27 """Initialize container scanner."""
28 self.has_trivy = self._check_trivy()
29 if not self.has_trivy:
30 logger.warning("Trivy not installed. Install for container scanning.")
31 logger.info("Install: curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh")
33 def _check_trivy(self) -> bool:
34 """Check if Trivy is installed."""
35 try:
36 result = subprocess.run(
37 ["trivy", "--version"],
38 capture_output=True,
39 text=True
40 )
41 return result.returncode == 0
42 except FileNotFoundError:
43 return False
45 def scan_image(
46 self,
47 image: str,
48 severity: List[str] = None,
49 output_format: str = "json",
50 include_secrets: bool = True
51 ) -> Dict:
52 """
53 Scan a Docker image for vulnerabilities.
55 Args:
56 image: Docker image name (e.g., 'nginx:latest', 'myapp:1.0')
57 severity: List of severities to report (CRITICAL, HIGH, MEDIUM, LOW)
58 output_format: json, table, or sarif
59 include_secrets: Whether to scan for secrets
61 Returns:
62 Dict with scan results and vulnerability summary
63 """
64 if not self.has_trivy:
65 return self._install_guide()
67 logger.info(f"Scanning Docker image: {image}")
69 if severity is None:
70 severity = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
72 try:
73 # Build Trivy command
74 cmd = [
75 "trivy", "image",
76 "--format", output_format,
77 "--severity", ",".join(severity),
78 "--scanners", "vuln,secret" if include_secrets else "vuln",
79 image
80 ]
82 # Run Trivy
83 result = subprocess.run(
84 cmd,
85 capture_output=True,
86 text=True,
87 timeout=300 # 5 minute timeout
88 )
90 if result.returncode != 0 and not result.stdout:
91 logger.error(f"Trivy scan failed: {result.stderr}")
92 return {
93 "success": False,
94 "error": result.stderr,
95 "image": image
96 }
98 # Parse results
99 if output_format == "json":
100 scan_data = json.loads(result.stdout) if result.stdout else {}
101 else:
102 scan_data = {"raw_output": result.stdout}
104 # Analyze results
105 summary = self._analyze_image_results(scan_data)
107 logger.info(f"Scan complete. Found {summary['total_vulnerabilities']} vulnerabilities")
109 return {
110 "success": True,
111 "image": image,
112 "scan_type": "docker_image",
113 "summary": summary,
114 "scan_data": scan_data,
115 "recommendations": self._generate_recommendations(summary, image)
116 }
118 except subprocess.TimeoutExpired:
119 logger.error("Container scan timed out")
120 return {
121 "success": False,
122 "error": "Scan timed out (>5 minutes)",
123 "image": image
124 }
125 except json.JSONDecodeError as e:
126 logger.error(f"Failed to parse Trivy output: {e}")
127 return {
128 "success": False,
129 "error": f"Invalid JSON from Trivy: {e}",
130 "image": image
131 }
132 except Exception as e:
133 logger.error(f"Error scanning image: {e}")
134 return {
135 "success": False,
136 "error": str(e),
137 "image": image
138 }
140 def scan_kubernetes(
141 self,
142 manifest_path: str,
143 severity: List[str] = None
144 ) -> Dict:
145 """
146 Scan Kubernetes manifests for misconfigurations.
148 Args:
149 manifest_path: Path to K8s YAML file or directory
150 severity: List of severities to report
152 Returns:
153 Dict with scan results
154 """
155 if not self.has_trivy:
156 return self._install_guide()
158 logger.info(f"Scanning Kubernetes manifest: {manifest_path}")
160 if severity is None:
161 severity = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
163 try:
164 # Build Trivy command
165 cmd = [
166 "trivy", "config",
167 "--format", "json",
168 "--severity", ",".join(severity),
169 manifest_path
170 ]
172 # Run Trivy
173 result = subprocess.run(
174 cmd,
175 capture_output=True,
176 text=True,
177 timeout=120
178 )
180 if result.returncode != 0 and not result.stdout:
181 logger.error(f"Trivy config scan failed: {result.stderr}")
182 return {
183 "success": False,
184 "error": result.stderr,
185 "manifest": manifest_path
186 }
188 # Parse results
189 scan_data = json.loads(result.stdout) if result.stdout else {}
191 # Analyze results
192 summary = self._analyze_config_results(scan_data)
194 logger.info(f"K8s scan complete. Found {summary['total_misconfigurations']} issues")
196 return {
197 "success": True,
198 "manifest": manifest_path,
199 "scan_type": "kubernetes",
200 "summary": summary,
201 "scan_data": scan_data
202 }
204 except subprocess.TimeoutExpired:
205 logger.error("Kubernetes scan timed out")
206 return {
207 "success": False,
208 "error": "Scan timed out (>2 minutes)",
209 "manifest": manifest_path
210 }
211 except Exception as e:
212 logger.error(f"Error scanning Kubernetes manifest: {e}")
213 return {
214 "success": False,
215 "error": str(e),
216 "manifest": manifest_path
217 }
219 def scan_filesystem(
220 self,
221 path: str,
222 severity: List[str] = None
223 ) -> Dict:
224 """
225 Scan a filesystem or directory for vulnerabilities.
227 Args:
228 path: Path to scan
229 severity: List of severities to report
231 Returns:
232 Dict with scan results
233 """
234 if not self.has_trivy:
235 return self._install_guide()
237 logger.info(f"Scanning filesystem: {path}")
239 if severity is None:
240 severity = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
242 try:
243 cmd = [
244 "trivy", "fs",
245 "--format", "json",
246 "--severity", ",".join(severity),
247 "--scanners", "vuln,secret,misconfig",
248 path
249 ]
251 result = subprocess.run(
252 cmd,
253 capture_output=True,
254 text=True,
255 timeout=300
256 )
258 if result.returncode != 0 and not result.stdout:
259 return {
260 "success": False,
261 "error": result.stderr,
262 "path": path
263 }
265 scan_data = json.loads(result.stdout) if result.stdout else {}
266 summary = self._analyze_filesystem_results(scan_data)
268 return {
269 "success": True,
270 "path": path,
271 "scan_type": "filesystem",
272 "summary": summary,
273 "scan_data": scan_data
274 }
276 except subprocess.TimeoutExpired:
277 return {
278 "success": False,
279 "error": "Scan timed out",
280 "path": path
281 }
282 except Exception as e:
283 logger.error(f"Error scanning filesystem: {e}")
284 return {
285 "success": False,
286 "error": str(e),
287 "path": path
288 }
290 def generate_sbom(
291 self,
292 image: str,
293 output_file: Optional[str] = None
294 ) -> Dict:
295 """
296 Generate SBOM for a container image.
298 Args:
299 image: Docker image name
300 output_file: Output file path (optional)
302 Returns:
303 Dict with SBOM generation results
304 """
305 if not self.has_trivy:
306 return self._install_guide()
308 logger.info(f"Generating SBOM for image: {image}")
310 try:
311 cmd = [
312 "trivy", "image",
313 "--format", "cyclonedx",
314 image
315 ]
317 if output_file:
318 cmd.extend(["--output", output_file])
320 result = subprocess.run(
321 cmd,
322 capture_output=True,
323 text=True,
324 timeout=300
325 )
327 if result.returncode != 0:
328 return {
329 "success": False,
330 "error": result.stderr,
331 "image": image
332 }
334 sbom_data = result.stdout if not output_file else None
336 return {
337 "success": True,
338 "image": image,
339 "format": "CycloneDX",
340 "output_file": output_file,
341 "sbom_data": sbom_data
342 }
344 except Exception as e:
345 logger.error(f"Error generating SBOM: {e}")
346 return {
347 "success": False,
348 "error": str(e),
349 "image": image
350 }
352 def _analyze_image_results(self, scan_data: Dict) -> Dict:
353 """Analyze Trivy image scan results."""
354 summary = {
355 "total_vulnerabilities": 0,
356 "by_severity": {
357 "CRITICAL": 0,
358 "HIGH": 0,
359 "MEDIUM": 0,
360 "LOW": 0,
361 "UNKNOWN": 0
362 },
363 "secrets_found": 0,
364 "packages_scanned": 0,
365 "layers": 0
366 }
368 results = scan_data.get("Results", [])
370 for result in results:
371 # Count vulnerabilities
372 vulnerabilities = result.get("Vulnerabilities", [])
373 summary["total_vulnerabilities"] += len(vulnerabilities)
375 for vuln in vulnerabilities:
376 severity = vuln.get("Severity", "UNKNOWN")
377 if severity in summary["by_severity"]:
378 summary["by_severity"][severity] += 1
380 # Count secrets
381 secrets = result.get("Secrets", [])
382 summary["secrets_found"] += len(secrets)
384 # Count packages
385 if "Packages" in result:
386 summary["packages_scanned"] += len(result["Packages"])
388 # Get image metadata
389 metadata = scan_data.get("Metadata", {})
390 if "ImageConfig" in metadata:
391 image_config = metadata["ImageConfig"]
392 if "history" in image_config:
393 summary["layers"] = len(image_config["history"])
395 return summary
397 def _analyze_config_results(self, scan_data: Dict) -> Dict:
398 """Analyze Trivy config scan results."""
399 summary = {
400 "total_misconfigurations": 0,
401 "by_severity": {
402 "CRITICAL": 0,
403 "HIGH": 0,
404 "MEDIUM": 0,
405 "LOW": 0
406 },
407 "files_scanned": 0
408 }
410 results = scan_data.get("Results", [])
411 summary["files_scanned"] = len(results)
413 for result in results:
414 misconfigs = result.get("Misconfigurations", [])
415 summary["total_misconfigurations"] += len(misconfigs)
417 for misconfig in misconfigs:
418 severity = misconfig.get("Severity", "UNKNOWN")
419 if severity in summary["by_severity"]:
420 summary["by_severity"][severity] += 1
422 return summary
424 def _analyze_filesystem_results(self, scan_data: Dict) -> Dict:
425 """Analyze Trivy filesystem scan results."""
426 summary = {
427 "total_vulnerabilities": 0,
428 "total_secrets": 0,
429 "total_misconfigurations": 0,
430 "by_severity": {
431 "CRITICAL": 0,
432 "HIGH": 0,
433 "MEDIUM": 0,
434 "LOW": 0
435 }
436 }
438 results = scan_data.get("Results", [])
440 for result in results:
441 # Vulnerabilities
442 vulns = result.get("Vulnerabilities", [])
443 summary["total_vulnerabilities"] += len(vulns)
445 for vuln in vulns:
446 severity = vuln.get("Severity", "UNKNOWN")
447 if severity in summary["by_severity"]:
448 summary["by_severity"][severity] += 1
450 # Secrets
451 secrets = result.get("Secrets", [])
452 summary["total_secrets"] += len(secrets)
454 # Misconfigurations
455 misconfigs = result.get("Misconfigurations", [])
456 summary["total_misconfigurations"] += len(misconfigs)
458 return summary
460 def _generate_recommendations(self, summary: Dict, image: str) -> List[str]:
461 """Generate recommendations based on scan results."""
462 recommendations = []
464 # Check for critical issues
465 critical = summary["by_severity"].get("CRITICAL", 0)
466 high = summary["by_severity"].get("HIGH", 0)
468 if critical > 0:
469 recommendations.append(
470 f"🚨 URGENT: {critical} CRITICAL vulnerabilities found. Update image immediately."
471 )
473 if high > 0:
474 recommendations.append(
475 f"⚠️ {high} HIGH severity vulnerabilities. Plan updates within 1 week."
476 )
478 # Base image recommendations
479 if ":" in image:
480 base, tag = image.rsplit(":", 1)
481 if tag == "latest":
482 recommendations.append(
483 "💡 Avoid 'latest' tag. Use specific versions for reproducibility."
484 )
486 # Secrets found
487 if summary.get("secrets_found", 0) > 0:
488 recommendations.append(
489 "🔐 Secrets detected in image. Remove hardcoded credentials immediately."
490 )
492 # General recommendations
493 if summary["total_vulnerabilities"] > 50:
494 recommendations.append(
495 "📦 Consider using a minimal base image (alpine, distroless) to reduce attack surface."
496 )
498 if not recommendations:
499 recommendations.append(
500 "✅ No critical issues found. Continue monitoring for new vulnerabilities."
501 )
503 return recommendations
505 def _install_guide(self) -> Dict:
506 """Return installation guide for Trivy."""
507 return {
508 "success": False,
509 "error": "Trivy not installed",
510 "install_command": "curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh",
511 "install_url": "https://github.com/aquasecurity/trivy",
512 "description": "Trivy - comprehensive container security scanner by Aqua Security"
513 }
516# Global container scanner instance
517_container_scanner = None
520def get_container_scanner() -> ContainerScanner:
521 """Get or create global container scanner instance."""
522 global _container_scanner
523 if _container_scanner is None:
524 _container_scanner = ContainerScanner()
525 return _container_scanner
528# Convenience functions
529def scan_docker_image(image: str, severity: List[str] = None) -> Dict:
530 """
531 Convenience function to scan a Docker image.
533 Args:
534 image: Docker image name
535 severity: List of severities to report
537 Returns:
538 Dict with scan results
539 """
540 scanner = get_container_scanner()
541 return scanner.scan_image(image, severity)
544def scan_k8s_manifest(manifest_path: str) -> Dict:
545 """
546 Convenience function to scan Kubernetes manifest.
548 Args:
549 manifest_path: Path to K8s YAML
551 Returns:
552 Dict with scan results
553 """
554 scanner = get_container_scanner()
555 return scanner.scan_kubernetes(manifest_path)