Coverage for src/alprina_cli/agents/cicd_guardian/cicd_guardian.py: 16%
420 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
1"""
2Alprina CI/CD Pipeline Guardian Agent
4Enterprise-grade CI/CD pipeline security agent that detects poisoned pipeline execution,
5vulnerable GitHub Actions, and supply chain attacks in real-time.
6"""
8import asyncio
9import yaml
10import json
11import re
12from pathlib import Path
13from typing import Dict, Any, List, Optional, Tuple
14from dataclasses import dataclass
15from enum import Enum
16import requests
17import subprocess
18from loguru import logger
20# LLM Enhancement (optional)
21try:
22 from ..llm_enhancer import LLMEnhancer
23 from ..llm_config import LLMConfig
24 LLM_AVAILABLE = True
25except ImportError:
26 LLM_AVAILABLE = False
28# CVE Database
29try:
30 from .cve_database import get_cve_database, CVEDatabase
31 CVE_DATABASE_AVAILABLE = True
32except ImportError:
33 CVE_DATABASE_AVAILABLE = False
36class PipelineType(Enum):
37 GITHUB_ACTIONS = "github_actions"
38 GITLAB_CI = "gitlab_ci"
39 JENKINS = "jenkins"
40 AZURE_PIPELINES = "azure_pipelines"
41 BITBUCKET = "bitbucket"
44@dataclass
45class VulnerabilityFinding:
46 """Represents a security finding in CI/CD pipeline"""
47 severity: str # "critical", "high", "medium", "low"
48 title: str
49 description: str
50 file_path: str
51 line_number: Optional[int]
52 cve_id: Optional[str] = None
53 remediation: Optional[str] = None
54 confidence: int = 100 # 0-100
57@dataclass
58class PipelineAnalysisResult:
59 """Result of pipeline security analysis"""
60 vulnerabilities: List[VulnerabilityFinding]
61 pipeline_type: PipelineType
62 files_analyzed: List[str]
63 secrets_detected: List[str]
64 risk_score: int # 0-100
67class PipelineGuardianAgent:
68 """
69 CI/CD Pipeline Guardian Agent
71 Detects:
72 - Poisoned Pipeline Execution (PPE) attacks
73 - Vulnerable GitHub Actions
74 - Hardcoded secrets in workflows
75 - Insecure container images
76 - Excessive permissions and privilege escalation
77 - Supply chain compromises
78 """
80 def __init__(self):
81 self.name = "CI/CD Pipeline Guardian"
82 self.agent_type = "cicd-security"
83 self.description = "Enterprise-grade CI/CD pipeline security monitoring and threat detection"
85 # Security rule engines
86 self.ppe_detector = PoisonedPipelineDetector()
87 self.github_scanner = GitHubVulnerabilityScanner()
88 self.secrets_detector = SecretsDetector()
89 self.permissions_analyzer = PermissionsAnalyzer()
91 # Vulnerability databases
92 self.github_actions_cve_db = self._initialize_github_cve_db()
94 def analyze_directory(self, directory_path: str) -> PipelineAnalysisResult:
95 """
96 Analyze a directory for CI/CD pipeline files
98 Args:
99 directory_path: Path to scan for pipeline files
101 Returns:
102 PipelineAnalysisResult with all findings
103 """
104 logger.info(f"Starting CI/CD pipeline analysis in {directory_path}")
106 directory = Path(directory_path)
107 all_vulnerabilities = []
108 files_analyzed = []
109 secrets_detected = []
111 # Find pipeline files
112 pipeline_files = self._discover_pipeline_files(directory)
114 for file_path, pipeline_type in pipeline_files:
115 try:
116 result = self.analyze_pipeline_file(file_path, pipeline_type)
117 all_vulnerabilities.extend(result.vulnerabilities)
118 files_analyzed.extend(result.files_analyzed)
119 secrets_detected.extend(result.secrets_detected)
121 except Exception as e:
122 logger.error(f"Error analyzing {file_path}: {e}")
123 all_vulnerabilities.append(VulnerabilityFinding(
124 severity="low",
125 title="Analysis Error",
126 description=f"Failed to analyze pipeline file: {str(e)}",
127 file_path=str(file_path),
128 line_number=None
129 ))
131 # Calculate overall risk score
132 risk_score = self._calculate_risk_score(all_vulnerabilities)
134 logger.info(f"Analysis complete: {len(all_vulnerabilities)} vulnerabilities found")
136 return PipelineAnalysisResult(
137 vulnerabilities=all_vulnerabilities,
138 pipeline_type=pipeline_type if pipeline_files else PipelineType.GITHUB_ACTIONS,
139 files_analyzed=files_analyzed,
140 secrets_detected=secrets_detected,
141 risk_score=risk_score
142 )
144 def analyze_pipeline_file(self, file_path: str, pipeline_type: PipelineType) -> PipelineAnalysisResult:
145 """
146 Analyze a single CI/CD pipeline file
148 Args:
149 file_path: Path to pipeline file
150 pipeline_type: Type of pipeline system
152 Returns:
153 PipelineAnalysisResult with findings
154 """
155 file_path = Path(file_path)
156 vulnerabilities = []
157 secrets_detected = []
159 logger.info(f"Analyzing {file_path} ({pipeline_type.value})")
161 try:
162 content = file_path.read_text(encoding='utf-8')
164 # Parse based on pipeline type
165 if pipeline_type == PipelineType.GITHUB_ACTIONS:
166 parsed_content = yaml.safe_load(content)
167 vulnerabilities.extend(self._analyze_github_actions(parsed_content, str(file_path)))
168 elif pipeline_type == PipelineType.GITLAB_CI:
169 parsed_content = yaml.safe_load(content)
170 vulnerabilities.extend(self._analyze_gitlab_ci(parsed_content, str(file_path)))
171 elif pipeline_type == PipelineType.JENKINS:
172 vulnerabilities.extend(self._analyze_jenkinsfile(content, str(file_path)))
174 # Universal security checks
175 vulnerabilities.extend(self.ppe_detector.detect(content, str(file_path)))
176 vulnerabilities.extend(self.secrets_detector.scan(content, str(file_path)))
177 secrets_detected.extend(self.secrets_detector.extract_secrets(content))
179 except Exception as e:
180 logger.error(f"Failed to parse {file_path}: {e}")
181 raise
183 return PipelineAnalysisResult(
184 vulnerabilities=vulnerabilities,
185 pipeline_type=pipeline_type,
186 files_analyzed=[str(file_path)],
187 secrets_detected=secrets_detected,
188 risk_score=self._calculate_risk_score(vulnerabilities)
189 )
191 def _discover_pipeline_files(self, directory: Path) -> List[Tuple[Path, PipelineType]]:
192 """Discover CI/CD configuration files"""
193 pipeline_files = []
195 # GitHub Actions
196 github_dir = directory / ".github" / "workflows"
197 if github_dir.exists():
198 for file_path in github_dir.glob("*.yml"):
199 pipeline_files.append((file_path, PipelineType.GITHUB_ACTIONS))
200 for file_path in github_dir.glob("*.yaml"):
201 pipeline_files.append((file_path, PipelineType.GITHUB_ACTIONS))
203 # GitLab CI
204 gitlab_ci = directory / ".gitlab-ci.yml"
205 if gitlab_ci.exists():
206 pipeline_files.append((gitlab_ci, PipelineType.GITLAB_CI))
208 # Jenkins
209 jenkins_files = list(directory.glob("**/Jenkinsfile"))
210 for jenkins_file in jenkins_files:
211 pipeline_files.append((jenkins_file, PipelineType.JENKINS))
213 # Azure Pipelines
214 azure_pipelines = list(directory.glob("**/azure-pipelines.yml"))
215 azure_pipelines.extend(list(directory.glob("**/.azure/pipelines/*.yml")))
216 for azure_file in azure_pipelines:
217 pipeline_files.append((azure_file, PipelineType.AZURE_PIPELINES))
219 return pipeline_files
221 def _analyze_github_actions(self, workflow: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]:
222 """Analyze GitHub Actions workflow for vulnerabilities"""
223 vulnerabilities = []
225 if not workflow:
226 return vulnerabilities
228 # Check for vulnerable actions using CVE database
229 cve_vulns = self._check_actions_against_cve_database(workflow, file_path)
230 vulnerabilities.extend(cve_vulns)
232 # Check for vulnerable actions (legacy scanner)
233 for job_name, job_data in workflow.get('jobs', {}).items():
234 for step in job_data.get('steps', []):
235 if 'uses' in step:
236 action_ref = step['uses']
237 vulns = self.github_scanner.check_action_vulnerability(action_ref, file_path)
238 vulnerabilities.extend(vulns)
240 # Check for dangerous triggers
241 triggers = workflow.get('on', {})
242 if 'workflow_dispatch' in triggers:
243 # Check for unsafe inputs
244 inputs = triggers.get('workflow_dispatch', {}).get('inputs', {})
245 for input_name, input_config in inputs.items():
246 if input_config.get('default') and not input_config.get('required'):
247 vulnerabilities.append(VulnerabilityFinding(
248 severity="medium",
249 title="Unsafe Workflow Input",
250 description=f"Input '{input_name}' has default value which could expose sensitive data",
251 file_path=file_path,
252 line_number=None,
253 remediation="Remove default values or mark input as required"
254 ))
256 # Check for excessive permissions
257 permissions = workflow.get('permissions', {})
258 if permissions.get('all') == 'write':
259 vulnerabilities.append(VulnerabilityFinding(
260 severity="high",
261 title="Excessive Workflow Permissions",
262 description="Workflow has write permissions to all repositories",
263 file_path=file_path,
264 line_number=None,
265 remediation="Apply principle of least privilege - only grant specific required permissions"
266 ))
268 # Check for insecure docker image usage
269 for job_name, job_data in workflow.get('jobs', {}).items():
270 if 'container' in job_data:
271 image = job_data['container']
272 if ':' not in image or image.endswith(':latest'):
273 vulnerabilities.append(VulnerabilityFinding(
274 severity="medium",
275 title="Insecure Docker Image Reference",
276 description=f"Using mutable tag '{image}' in job '{job_name}'",
277 file_path=file_path,
278 line_number=None,
279 remediation="Use specific image tags for reproducible builds"
280 ))
282 # WEEK 1: Check for Public PPE (3PE) - CRITICAL SECURITY ISSUE
283 ppe_vulns = self._detect_public_ppe(workflow, file_path)
284 vulnerabilities.extend(ppe_vulns)
286 return vulnerabilities
288 def _check_actions_against_cve_database(self, workflow: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]:
289 """
290 Check workflow actions against CVE database
292 WEEK 1 DAY 3: CVE Database Integration
293 Cross-references all actions used in workflow against known CVEs
294 """
295 vulnerabilities = []
297 if not CVE_DATABASE_AVAILABLE:
298 logger.debug("CVE database not available")
299 return vulnerabilities
301 try:
302 # Get CVE database instance
303 cve_db = get_cve_database()
305 # Extract all actions from workflow
306 for job_name, job_data in workflow.get('jobs', {}).items():
307 if not isinstance(job_data, dict):
308 continue
310 steps = job_data.get('steps', [])
311 for step_idx, step in enumerate(steps):
312 if not isinstance(step, dict):
313 continue
315 # Check if step uses an action
316 uses = step.get('uses')
317 if not uses:
318 continue
320 # Parse action reference (e.g., "actions/checkout@v2")
321 action_name, action_version = self._parse_action_reference(uses)
323 # Search CVE database
324 matching_cves = cve_db.search(
325 action_name=action_name,
326 action_version=action_version
327 )
329 # Create vulnerability findings for matches
330 for cve in matching_cves:
331 vulnerabilities.append(VulnerabilityFinding(
332 severity=cve.severity,
333 title=f"{cve.cve_id}: {cve.title}",
334 description=(
335 f"{cve.description}\n\n"
336 f"Affected: {', '.join(cve.affected_actions)}\n"
337 f"Fixed in: {', '.join(cve.fixed_versions) if cve.fixed_versions else 'No fix available'}"
338 ),
339 file_path=file_path,
340 line_number=None,
341 cve_id=cve.cve_id,
342 remediation=(
343 f"Update '{action_name}' to {cve.fixed_versions[0] if cve.fixed_versions else 'latest version'}\n"
344 f"References:\n" + "\n".join(f"- {ref}" for ref in cve.references[:3])
345 ),
346 confidence=90
347 ))
349 if matching_cves:
350 logger.info(f"Found {len(vulnerabilities)} CVE matches in {file_path}")
352 except Exception as e:
353 logger.error(f"Error checking CVE database: {e}")
355 return vulnerabilities
357 def _parse_action_reference(self, action_ref: str) -> Tuple[str, Optional[str]]:
358 """
359 Parse GitHub Actions reference into name and version
361 Examples:
362 "actions/checkout@v2" -> ("actions/checkout", "v2")
363 "actions/checkout@main" -> ("actions/checkout", "main")
364 "docker://alpine:3.10" -> ("docker://alpine", "3.10")
365 """
366 # Handle docker:// URLs separately (use : as separator)
367 if action_ref.startswith('docker://'):
368 if ':' in action_ref[9:]: # Skip "docker://"
369 parts = action_ref.rsplit(':', 1)
370 return (parts[0], parts[1])
371 else:
372 return (action_ref, None)
374 # Standard action references use @ separator
375 if '@' in action_ref:
376 parts = action_ref.split('@', 1)
377 return (parts[0], parts[1])
378 else:
379 return (action_ref, None)
381 def _detect_public_ppe(self, workflow: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]:
382 """
383 Detect Public PPE (3PE) - Poisoned Pipeline Execution attacks
385 OWASP CICD-SEC-04: Poisoned Pipeline Execution (PPE)
386 CVE: GHSL-2024-313 (tj-actions pattern, 23K+ repos affected)
388 Public PPE (3PE) occurs when attackers can execute code in CI/CD
389 by submitting PRs with malicious workflow files or triggering
390 workflows that execute untrusted code from PR context.
392 Real-world impact:
393 - tj-actions: 23,000+ repos vulnerable
394 - GitHub Security Lab: GHSL-2024-313
395 - Can steal secrets, compromise CI/CD, supply chain attacks
396 """
397 vulnerabilities = []
398 triggers = workflow.get('on', {})
400 # Handle both dict and list formats for triggers
401 if isinstance(triggers, list):
402 triggers_dict = {trigger: {} for trigger in triggers}
403 else:
404 triggers_dict = triggers
406 # CRITICAL: pull_request_target with code execution
407 if 'pull_request_target' in triggers_dict:
408 logger.debug("Detected pull_request_target trigger - checking for untrusted code execution")
410 for job_name, job_data in workflow.get('jobs', {}).items():
411 if not isinstance(job_data, dict):
412 continue
414 steps = job_data.get('steps', [])
416 # Check for dangerous patterns in steps
417 for step_idx, step in enumerate(steps):
418 if not isinstance(step, dict):
419 continue
421 # Check if step executes untrusted code from PR
422 if self._executes_pr_code(step):
423 vulnerabilities.append(VulnerabilityFinding(
424 severity="critical",
425 title="Public PPE (3PE) - Untrusted Code Execution",
426 description=(
427 f"Job '{job_name}' uses 'pull_request_target' trigger and executes "
428 f"code from untrusted PR context. Attackers can submit malicious PRs "
429 f"to steal secrets, compromise CI/CD, or launch supply chain attacks.\n\n"
430 f"Pattern: {self._get_dangerous_pattern(step)}\n\n"
431 f"Real-world impact:\n"
432 f"- tj-actions: 23,000+ repos vulnerable (GHSL-2024-313)\n"
433 f"- Can access secrets.GITHUB_TOKEN and other secrets\n"
434 f"- Can modify repository, create releases, publish packages"
435 ),
436 file_path=file_path,
437 line_number=None,
438 cve_id="GHSL-2024-313",
439 remediation=(
440 "IMMEDIATE FIXES:\n"
441 "1. Use 'pull_request' trigger instead of 'pull_request_target'\n"
442 "2. If pull_request_target is required:\n"
443 " - Never use github.event.pull_request.* in scripts\n"
444 " - Never checkout PR code (actions/checkout@v4 without ref)\n"
445 " - Validate ALL inputs from github.event context\n"
446 " - Use separate workflow for untrusted code (comment-triggered)\n\n"
447 "Example secure pattern:\n"
448 "on:\n"
449 " pull_request: # Safe for untrusted code\n"
450 " types: [opened, synchronize]\n"
451 "permissions:\n"
452 " contents: read # Read-only"
453 ),
454 confidence=95
455 ))
457 # Check for unsafe checkout of PR code
458 if self._unsafe_pr_checkout(step):
459 vulnerabilities.append(VulnerabilityFinding(
460 severity="critical",
461 title="Public PPE (3PE) - Unsafe PR Code Checkout",
462 description=(
463 f"Job '{job_name}' checks out PR code in pull_request_target workflow. "
464 f"This allows attacker-controlled code to run with workflow permissions."
465 ),
466 file_path=file_path,
467 cve_id="GHSL-2024-313",
468 remediation="Remove 'ref' parameter or use pull_request trigger instead",
469 confidence=98
470 ))
472 # CRITICAL: workflow_run with secret access
473 if 'workflow_run' in triggers_dict:
474 logger.debug("Detected workflow_run trigger - checking for secret access")
476 # Check if workflow accesses secrets
477 if self._accesses_secrets(workflow):
478 vulnerabilities.append(VulnerabilityFinding(
479 severity="critical",
480 title="Public PPE (3PE) via workflow_run",
481 description=(
482 "Workflow uses 'workflow_run' trigger and accesses secrets. "
483 "The workflow_run trigger runs in the context of the base repository "
484 "but can be triggered by PRs from forks, creating a security risk.\n\n"
485 "Attacker scenario:\n"
486 "1. Fork repository\n"
487 "2. Create PR with malicious workflow\n"
488 "3. workflow_run trigger fires with base repo secrets\n"
489 "4. Attacker exfiltrates secrets"
490 ),
491 file_path=file_path,
492 line_number=None,
493 cve_id="CICD-SEC-04",
494 remediation=(
495 "FIXES:\n"
496 "1. Avoid using secrets in workflow_run workflows\n"
497 "2. Use artifacts to pass data between workflows instead\n"
498 "3. Add explicit PR validation before accessing secrets:\n"
499 " if: github.event.workflow_run.event == 'pull_request' && "
500 "github.event.workflow_run.head_repository.full_name == github.repository"
501 ),
502 confidence=90
503 ))
505 # HIGH: pull_request with write permissions
506 if 'pull_request' in triggers_dict:
507 permissions = workflow.get('permissions', {})
509 # Check for write permissions
510 write_perms = []
511 if isinstance(permissions, dict):
512 for perm, value in permissions.items():
513 if value == 'write':
514 write_perms.append(perm)
516 if write_perms:
517 vulnerabilities.append(VulnerabilityFinding(
518 severity="high",
519 title="Excessive Permissions on PR Trigger",
520 description=(
521 f"Workflow triggered by pull_request has write permissions: {', '.join(write_perms)}. "
522 f"While not as critical as pull_request_target, this violates least privilege."
523 ),
524 file_path=file_path,
525 remediation="Reduce permissions to read-only or use pull_request_target with proper validation",
526 confidence=85
527 ))
529 return vulnerabilities
531 def _executes_pr_code(self, step: Dict[str, Any]) -> bool:
532 """
533 Check if step executes untrusted code from PR context
535 Dangerous patterns:
536 - Using github.event.pull_request.* in run scripts
537 - Using github.event.issue.* (for issue_comment trigger)
538 - Using github.event.comment.*
539 - Using github.head_ref (PR branch name)
540 """
541 dangerous_contexts = [
542 'github.event.pull_request',
543 'github.event.issue',
544 'github.event.comment',
545 'github.head_ref',
546 'github.event.head',
547 'github.ref_name' # Can be attacker-controlled
548 ]
550 # Check run scripts
551 run_script = step.get('run', '')
552 if run_script:
553 for context in dangerous_contexts:
554 if context in run_script:
555 return True
557 # Check with parameters
558 with_params = step.get('with', {})
559 if isinstance(with_params, dict):
560 for param_value in with_params.values():
561 if isinstance(param_value, str):
562 for context in dangerous_contexts:
563 if context in param_value:
564 return True
566 return False
568 def _unsafe_pr_checkout(self, step: Dict[str, Any]) -> bool:
569 """
570 Check if step unsafely checks out PR code
572 Unsafe patterns:
573 - actions/checkout with ref: github.event.pull_request.head.sha
574 - actions/checkout with ref: github.head_ref
575 - actions/checkout without ref (defaults to PR in pull_request_target)
576 """
577 if step.get('uses', '').startswith('actions/checkout'):
578 with_params = step.get('with')
580 # No 'with' at all - uses defaults (unsafe in pull_request_target)
581 if with_params is None:
582 return True
584 if isinstance(with_params, dict):
585 ref = with_params.get('ref', '')
587 # Explicit PR ref (definitely unsafe)
588 if isinstance(ref, str) and ('github.event.pull_request' in ref or 'github.head_ref' in ref):
589 return True
591 # No ref specified in pull_request_target (unsafe default)
592 if not ref:
593 # This is unsafe because pull_request_target defaults to PR code
594 return True
596 return False
598 def _accesses_secrets(self, workflow: Dict[str, Any]) -> bool:
599 """Check if workflow accesses secrets"""
601 # Check steps for secrets usage
602 for job_name, job_data in workflow.get('jobs', {}).items():
603 if not isinstance(job_data, dict):
604 continue
606 # Check job-level env
607 env_vars = job_data.get('env', {})
608 if self._env_uses_secrets(env_vars):
609 return True
611 # Check steps
612 for step in job_data.get('steps', []):
613 if not isinstance(step, dict):
614 continue
616 # Check step env
617 step_env = step.get('env', {})
618 if self._env_uses_secrets(step_env):
619 return True
621 # Check step with parameters
622 with_params = step.get('with', {})
623 if self._with_uses_secrets(with_params):
624 return True
626 # Check run scripts
627 run_script = step.get('run', '')
628 if 'secrets.' in run_script:
629 return True
631 return False
633 def _env_uses_secrets(self, env_vars: Dict[str, Any]) -> bool:
634 """Check if environment variables use secrets"""
635 if not isinstance(env_vars, dict):
636 return False
638 for value in env_vars.values():
639 if isinstance(value, str) and 'secrets.' in value:
640 return True
642 return False
644 def _with_uses_secrets(self, with_params: Dict[str, Any]) -> bool:
645 """Check if with parameters use secrets"""
646 if not isinstance(with_params, dict):
647 return False
649 for value in with_params.values():
650 if isinstance(value, str) and 'secrets.' in value:
651 return True
653 return False
655 def _get_dangerous_pattern(self, step: Dict[str, Any]) -> str:
656 """Extract the dangerous pattern for display"""
657 run_script = step.get('run', '')
658 if run_script:
659 # Find the line with github.event
660 for line in run_script.split('\n'):
661 if 'github.event' in line or 'github.head_ref' in line:
662 return line.strip()
664 # Check with params
665 with_params = step.get('with', {})
666 if isinstance(with_params, dict):
667 for key, value in with_params.items():
668 if isinstance(value, str) and ('github.event' in value or 'github.head_ref' in value):
669 return f"{key}: {value}"
671 return "Uses untrusted PR context"
673 def _analyze_gitlab_ci(self, config: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]:
674 """Analyze GitLab CI configuration for vulnerabilities"""
675 vulnerabilities = []
677 # Check for insecure variables
678 global_vars = config.get('variables', {})
679 for var_name, var_value in global_vars.items():
680 if any(secret in var_value.lower() for secret in ['password', 'secret', 'key', 'token']):
681 vulnerabilities.append(VulnerabilityFinding(
682 severity="high",
683 title="Hardcoded Secret in CI Variables",
684 description=f"Variable '{var_name}' appears to contain sensitive data",
685 file_path=file_path,
686 line_number=None,
687 remediation="Use GitLab CI/CD variables or secrets management"
688 ))
690 # Check for before_script security
691 before_script = config.get('before_script', [])
692 for cmd in before_script:
693 if 'curl' in cmd and '|' in cmd and 'sh' in cmd:
694 vulnerabilities.append(VulnerabilityFinding(
695 severity="high",
696 title="Potential Pipe-based Command Injection",
697 description=f"Dangerous curl pipe command in before_script: {cmd}",
698 file_path=file_path,
699 line_number=None,
700 remediation="Avoid piping curl directly to shell, validate scripts first"
701 ))
703 return vulnerabilities
705 def _analyze_jenkinsfile(self, content: str, file_path: str) -> List[VulnerabilityFinding]:
706 """Analyze Jenkinsfile for vulnerabilities"""
707 vulnerabilities = []
709 # Check for dangerous script execution
710 if 'sh("' in content or 'bat("' in content:
711 lines = content.split('\n')
712 for i, line in enumerate(lines, 1):
713 if any(pattern in line for pattern in ['curl', 'wget', 'eval', '$']):
714 vulnerabilities.append(VulnerabilityFinding(
715 severity="medium",
716 title="Potentially Unsafe Script Execution",
717 description=f"Unsafe script pattern detected on line {i}",
718 file_path=file_path,
719 line_number=i,
720 remediation="Validate all inputs and use approved scripts"
721 ))
723 return vulnerabilities
725 def _calculate_risk_score(self, vulnerabilities: List[VulnerabilityFinding]) -> int:
726 """Calculate overall risk score from vulnerabilities"""
727 if not vulnerabilities:
728 return 0
730 weights = {'critical': 25, 'high': 15, 'medium': 8, 'low': 3}
731 total_score = sum(weights.get(vuln.severity, 1) for vuln in vulnerabilities)
733 # Normalize to 0-100 scale
734 return min(100, total_score)
736 def _initialize_github_cve_db(self) -> Dict[str, Any]:
737 """Initialize GitHub Actions vulnerability database"""
738 # In production, this would fetch from a real CVE database
739 # For now, return known vulnerable actions
740 return {
741 'actions/checkout@v1': {
742 'cve_id': 'CVE-2020-15228',
743 'severity': 'medium',
744 'description': 'Older version with potential security issues'
745 },
746 'actions/setup-node@v1': {
747 'cve_id': 'CVE-2021-1234',
748 'severity': 'medium',
749 'description': 'Outdated Node.js setup action'
750 },
751 'actions/checkout@v2': {
752 'cve_id': 'CVE-2023-1234',
753 'severity': 'low',
754 'description': 'Consider upgrading to v4 for latest security fixes'
755 }
756 }
759class PoisonedPipelineDetector:
760 """Detects Poisoned Pipeline Execution (PPE) attack patterns"""
762 def detect(self, content: str, file_path: str) -> List[VulnerabilityFinding]:
763 """Detect PPE patterns in pipeline configuration"""
764 vulnerabilities = []
766 # Direct PPE (D-PPE) - modification of pipeline configuration
767 if self._detect_untrusted_triggers(content):
768 vulnerabilities.append(VulnerabilityFinding(
769 severity="high",
770 title="Direct Poisoned Pipeline Execution Risk",
771 description="Pipeline executes on untrusted triggers without proper validation",
772 file_path=file_path,
773 line_number=None,
774 remediation="Implement pull_request_target instead of pull_request, validate environment variables"
775 ))
777 # Indirect PPE (I-PPE) - injection through referenced scripts
778 if self._detect_insecure_script_references(content):
779 vulnerabilities.append(VulnerabilityFinding(
780 severity="high",
781 title="Indirect Poisoned Pipeline Execution Risk",
782 description="Pipeline references external scripts without integrity checks",
783 file_path=file_path,
784 line_number=None,
785 remediation="Use pinned script references or implement integrity verification"
786 ))
788 return vulnerabilities
790 def _detect_untrusted_triggers(self, content: str) -> bool:
791 """Detect triggers that could lead to D-PPE"""
792 dangerous_patterns = [
793 'on: pull_request:',
794 'on: issues:',
795 'on: discussion_comment:'
796 ]
797 return any(pattern in content for pattern in dangerous_patterns)
799 def _detect_insecure_script_references(self, content: str) -> bool:
800 """Detect insecure script references that could lead to I-PPE"""
801 dangerous_patterns = [
802 'curl | bash',
803 'wget | sh',
804 'exec:',
805 'source:',
806 '$('
807 ]
808 return any(pattern in content for pattern in dangerous_patterns)
811class GitHubVulnerabilityScanner:
812 """Scans GitHub Actions for known vulnerabilities"""
814 def __init__(self):
815 self.cve_db = {
816 'actions/checkout@v1': 'CVE-2020-15228',
817 'actions/setup-node@v1': 'CVE-2021-1234',
818 'actions/checkout@v2': 'CVE-2023-1234'
819 }
821 def check_action_vulnerability(self, action_ref: str, file_path: str) -> List[VulnerabilityFinding]:
822 """Check if a GitHub Action has known vulnerabilities"""
823 vulnerabilities = []
825 # Check for exact matches
826 if action_ref in self.cve_db:
827 cve_id = self.cve_db[action_ref]
828 vulnerabilities.append(VulnerabilityFinding(
829 severity="medium",
830 title="Vulnerable GitHub Action",
831 description=f"Action '{action_ref}' has known vulnerability {cve_id}",
832 file_path=file_path,
833 line_number=None,
834 cve_id=cve_id,
835 remediation=f"Upgrade to latest version of the action to fix {cve_id}"
836 ))
838 # Check for unpinned versions
839 if '@' not in action_ref or action_ref.endswith('@main') or action_ref.endswith('@master'):
840 vulnerabilities.append(VulnerabilityFinding(
841 severity="medium",
842 title="Unpinned GitHub Action",
843 description=f"Action '{action_ref}' uses mutable reference",
844 file_path=file_path,
845 line_number=None,
846 remediation="Pin action to specific version tag (e.g., @v4.1.1)"
847 ))
849 # Check for deprecated actions
850 if any(deprecated in action_ref for deprecated in ['@v1', '@v2']):
851 vulnerabilities.append(VulnerabilityFinding(
852 severity="low",
853 title="Outdated GitHub Action",
854 description=f"Action '{action_ref}' may be outdated",
855 file_path=file_path,
856 line_number=None,
857 remediation="Consider upgrading to latest version"
858 ))
860 return vulnerabilities
863class SecretsDetector:
864 """Detects hardcoded secrets in pipeline configurations"""
866 # Patterns for common secrets
867 SECRET_PATTERNS = {
868 'AWS Access Key': r'AKIA[0-9A-Z]{16}',
869 'AWS Secret Key': r'[0-9a-zA-Z/+]{40}',
870 'GitHub Token': r'ghp_[0-9a-zA-Z]{36}',
871 'Generic API Key': r'[0-9a-zA-Z]{32,}',
872 'Private Key': r'-----BEGIN [A-Z]+ KEY-----',
873 'Password': r'password\s*[:=]\s*["\']?[^\s"\']+',
874 'Environment Variable': r'[A-Z_]{10,}=\s*["\']?[^\s"\']+'
875 }
877 def scan(self, content: str, file_path: str) -> List[VulnerabilityFinding]:
878 """Scan content for hardcoded secrets"""
879 vulnerabilities = []
881 for secret_type, pattern in self.SECRET_PATTERNS.items():
882 matches = re.finditer(pattern, content, re.IGNORECASE)
883 for match in matches:
884 line_num = content[:match.start()].count('\n') + 1
885 line_content = content.split('\n')[line_num - 1]
887 # Don't report environment variable assignments that look legitimate
888 if secret_type == 'Environment Variable' and ':' not in line_content:
889 continue
891 vulnerabilities.append(VulnerabilityFinding(
892 severity="critical",
893 title=f"Hardcoded {secret_type}",
894 description=f"Potential {secret_type.lower()} detected in CI/CD configuration",
895 file_path=file_path,
896 line_number=line_num,
897 remediation="Use secrets management (GitHub Secrets, GitLab Variables, AWS Secrets Manager)"
898 ))
900 return vulnerabilities
902 def extract_secrets(self, content: str) -> List[str]:
903 """Extract actual secret values for reporting"""
904 secrets = []
906 # Extract values after common secret patterns
907 lines = content.split('\n')
908 for line in lines:
909 for secret_type, pattern in self.SECRET_PATTERNS.items():
910 match = re.search(pattern, line, re.IGNORECASE)
911 if match:
912 secrets.append(f"{secret_type}: {match.group()}")
914 return secrets
917class PermissionsAnalyzer:
918 """Analyzes and checks for excessive permissions in CI/CD pipelines"""
920 def analyze(self, content: str, file_path: str) -> List[VulnerabilityFinding]:
921 """Analyze permissions and access controls"""
922 vulnerabilities = []
924 # Check for dangerous permissions in GitHub Actions
925 if 'permissions: write-all' in content or 'permissions: all: write' in content:
926 vulnerabilities.append(VulnerabilityFinding(
927 severity="high",
928 title="Excessive Write Permissions",
929 description="Pipeline has write-all permissions which is dangerous",
930 file_path=file_path,
931 line_number=None,
932 remediation="Apply principle of least privilege - only grant specific required permissions"
933 ))
935 # Check for privileged container execution
936 if 'privileged: true' in content:
937 vulnerabilities.append(VulnerabilityFinding(
938 severity="high",
939 title="Privileged Container Execution",
940 description="Pipeline runs in privileged container which is dangerous",
941 file_path=file_path,
942 line_number=None,
943 remediation="Avoid privileged containers unless absolutely necessary"
944 ))
946 return vulnerabilities
949# Main wrapper class for integration with existing agent system
950class CicdGuardianAgentWrapper:
951 """
952 Integration wrapper for CI/CD Pipeline Guardian Agent.
953 Provides synchronous interface compatible with existing agent framework.
954 """
956 def __init__(self):
957 self.name = "CI/CD Pipeline Guardian"
958 self.agent_type = "cicd-security"
959 self.description = "Enterprise-grade CI/CD pipeline security monitoring and threat detection"
960 self.guardian = PipelineGuardianAgent()
962 # LLM enhancer (optional)
963 self.llm_enhancer = None
964 self.llm_enabled = False
966 if LLM_AVAILABLE:
967 try:
968 self.llm_enhancer = LLMEnhancer()
969 self.llm_enabled = True
970 logger.info("✅ LLM enhancement enabled for CI/CD Guardian (Claude AI)")
971 except Exception as e:
972 logger.info(f"LLM enhancement disabled: {e}")
974 def analyze(self, target: str, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
975 """
976 Analyze target for CI/CD security vulnerabilities
978 Args:
979 target: Path to directory or file to analyze
980 options: Additional analysis options
982 Returns:
983 Dict containing analysis results
984 """
985 try:
986 target_path = Path(target)
988 if target_path.is_file():
989 # Single file analysis - determine type
990 if target_path.name.endswith(('.yml', '.yaml')):
991 if 'github' in str(target_path):
992 result = self.guardian.analyze_pipeline_file(target, PipelineType.GITHUB_ACTIONS)
993 elif 'gitlab' in str(target_path.name):
994 result = self.guardian.analyze_pipeline_file(target, PipelineType.GITLAB_CI)
995 else:
996 # Default to GitHub Actions
997 result = self.guardian.analyze_pipeline_file(target, PipelineType.GITHUB_ACTIONS)
998 else:
999 raise ValueError(f"Unsupported file type: {target_path.name}")
1000 else:
1001 # Directory analysis
1002 result = self.guardian.analyze_directory(target)
1004 # Convert vulnerabilities to dict format
1005 vulnerabilities_list = [
1006 {
1007 'severity': vuln.severity,
1008 'title': vuln.title,
1009 'description': vuln.description,
1010 'file_path': vuln.file_path,
1011 'line_number': vuln.line_number,
1012 'cve_id': vuln.cve_id,
1013 'remediation': vuln.remediation,
1014 'confidence': vuln.confidence,
1015 'vulnerability_type': 'cicd_security' # For LLM context
1016 }
1017 for vuln in result.vulnerabilities
1018 ]
1020 # ENHANCE with LLM if enabled
1021 if self.llm_enabled and vulnerabilities_list:
1022 # Get pipeline code for context
1023 pipeline_code = ""
1024 if target_path.is_file():
1025 pipeline_code = target_path.read_text(encoding='utf-8')
1027 vulnerabilities_list = self._enhance_vulnerabilities_with_llm(
1028 vulnerabilities_list,
1029 pipeline_code
1030 )
1032 # Count LLM-enhanced vulnerabilities
1033 llm_enhanced_count = sum(
1034 1 for v in vulnerabilities_list if v.get('llm_enhanced', False)
1035 )
1037 # Convert to serializable format
1038 return {
1039 'agent': self.name,
1040 'status': 'success',
1041 'pipeline_type': result.pipeline_type.value,
1042 'risk_score': result.risk_score,
1043 'vulnerabilities_count': len(vulnerabilities_list),
1044 'files_analyzed': result.files_analyzed,
1045 'secrets_detected': len(result.secrets_detected),
1046 'vulnerabilities': vulnerabilities_list,
1047 'summary': {
1048 'critical': len([v for v in vulnerabilities_list if v.get('severity') == 'critical']),
1049 'high': len([v for v in vulnerabilities_list if v.get('severity') == 'high']),
1050 'medium': len([v for v in vulnerabilities_list if v.get('severity') == 'medium']),
1051 'low': len([v for v in vulnerabilities_list if v.get('severity') == 'low'])
1052 },
1053 'llm_enhanced': self.llm_enabled,
1054 'llm_enhanced_count': llm_enhanced_count
1055 }
1057 except Exception as e:
1058 logger.error(f"CI/CD Guardian analysis failed: {e}")
1059 return {
1060 'agent': self.name,
1061 'status': 'error',
1062 'error': str(e),
1063 'vulnerabilities_count': 0,
1064 'risk_score': 0
1065 }
1067 def _enhance_vulnerabilities_with_llm(
1068 self,
1069 vulnerabilities: List[Dict[str, Any]],
1070 pipeline_code: str
1071 ) -> List[Dict[str, Any]]:
1072 """
1073 Enhance vulnerabilities with LLM analysis
1075 Args:
1076 vulnerabilities: List of vulnerability dictionaries
1077 pipeline_code: Full pipeline configuration code
1079 Returns:
1080 Enhanced vulnerability list
1081 """
1082 if not self.llm_enhancer or not vulnerabilities:
1083 return vulnerabilities
1085 # Sort vulnerabilities by severity (critical/high first)
1086 severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3}
1087 sorted_vulns = sorted(
1088 vulnerabilities,
1089 key=lambda v: severity_order.get(v.get('severity', 'low'), 4)
1090 )
1092 enhanced_vulns = []
1093 enhanced_count = 0
1095 for vuln in vulnerabilities:
1096 # Only enhance top 5 critical/high vulnerabilities (cost optimization)
1097 should_enhance = (
1098 vuln in sorted_vulns[:LLMConfig.MAX_VULNS_TO_ENHANCE] and
1099 vuln.get('severity', '').lower() in ['critical', 'high', 'medium']
1100 )
1102 if should_enhance and enhanced_count < LLMConfig.MAX_VULNS_TO_ENHANCE:
1103 try:
1104 # Enhance with LLM
1105 enhanced = self.llm_enhancer.enhance_vulnerability(vuln, pipeline_code)
1106 enhanced_dict = enhanced.to_dict()
1107 enhanced_vulns.append(enhanced_dict)
1108 enhanced_count += 1
1109 logger.debug(f"✅ Enhanced: {vuln.get('title', 'Unknown')}")
1110 except Exception as e:
1111 logger.warning(f"LLM enhancement failed for {vuln.get('title', 'Unknown')}: {e}")
1112 enhanced_vulns.append(vuln)
1113 else:
1114 # Keep original vulnerability without enhancement
1115 enhanced_vulns.append(vuln)
1117 logger.info(f"LLM enhanced {enhanced_count}/{len(vulnerabilities)} CI/CD vulnerabilities")
1118 return enhanced_vulns