Coverage for src/alprina_cli/agents/cicd_guardian/cicd_guardian.py: 16%

420 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Alprina CI/CD Pipeline Guardian Agent 

3 

4Enterprise-grade CI/CD pipeline security agent that detects poisoned pipeline execution, 

5vulnerable GitHub Actions, and supply chain attacks in real-time. 

6""" 

7 

8import asyncio 

9import yaml 

10import json 

11import re 

12from pathlib import Path 

13from typing import Dict, Any, List, Optional, Tuple 

14from dataclasses import dataclass 

15from enum import Enum 

16import requests 

17import subprocess 

18from loguru import logger 

19 

20# LLM Enhancement (optional) 

21try: 

22 from ..llm_enhancer import LLMEnhancer 

23 from ..llm_config import LLMConfig 

24 LLM_AVAILABLE = True 

25except ImportError: 

26 LLM_AVAILABLE = False 

27 

28# CVE Database 

29try: 

30 from .cve_database import get_cve_database, CVEDatabase 

31 CVE_DATABASE_AVAILABLE = True 

32except ImportError: 

33 CVE_DATABASE_AVAILABLE = False 

34 

35 

36class PipelineType(Enum): 

37 GITHUB_ACTIONS = "github_actions" 

38 GITLAB_CI = "gitlab_ci" 

39 JENKINS = "jenkins" 

40 AZURE_PIPELINES = "azure_pipelines" 

41 BITBUCKET = "bitbucket" 

42 

43 

44@dataclass 

45class VulnerabilityFinding: 

46 """Represents a security finding in CI/CD pipeline""" 

47 severity: str # "critical", "high", "medium", "low" 

48 title: str 

49 description: str 

50 file_path: str 

51 line_number: Optional[int] 

52 cve_id: Optional[str] = None 

53 remediation: Optional[str] = None 

54 confidence: int = 100 # 0-100 

55 

56 

57@dataclass 

58class PipelineAnalysisResult: 

59 """Result of pipeline security analysis""" 

60 vulnerabilities: List[VulnerabilityFinding] 

61 pipeline_type: PipelineType 

62 files_analyzed: List[str] 

63 secrets_detected: List[str] 

64 risk_score: int # 0-100 

65 

66 

67class PipelineGuardianAgent: 

68 """ 

69 CI/CD Pipeline Guardian Agent 

70  

71 Detects: 

72 - Poisoned Pipeline Execution (PPE) attacks 

73 - Vulnerable GitHub Actions 

74 - Hardcoded secrets in workflows 

75 - Insecure container images 

76 - Excessive permissions and privilege escalation 

77 - Supply chain compromises 

78 """ 

79 

80 def __init__(self): 

81 self.name = "CI/CD Pipeline Guardian" 

82 self.agent_type = "cicd-security" 

83 self.description = "Enterprise-grade CI/CD pipeline security monitoring and threat detection" 

84 

85 # Security rule engines 

86 self.ppe_detector = PoisonedPipelineDetector() 

87 self.github_scanner = GitHubVulnerabilityScanner() 

88 self.secrets_detector = SecretsDetector() 

89 self.permissions_analyzer = PermissionsAnalyzer() 

90 

91 # Vulnerability databases 

92 self.github_actions_cve_db = self._initialize_github_cve_db() 

93 

94 def analyze_directory(self, directory_path: str) -> PipelineAnalysisResult: 

95 """ 

96 Analyze a directory for CI/CD pipeline files 

97  

98 Args: 

99 directory_path: Path to scan for pipeline files 

100  

101 Returns: 

102 PipelineAnalysisResult with all findings 

103 """ 

104 logger.info(f"Starting CI/CD pipeline analysis in {directory_path}") 

105 

106 directory = Path(directory_path) 

107 all_vulnerabilities = [] 

108 files_analyzed = [] 

109 secrets_detected = [] 

110 

111 # Find pipeline files 

112 pipeline_files = self._discover_pipeline_files(directory) 

113 

114 for file_path, pipeline_type in pipeline_files: 

115 try: 

116 result = self.analyze_pipeline_file(file_path, pipeline_type) 

117 all_vulnerabilities.extend(result.vulnerabilities) 

118 files_analyzed.extend(result.files_analyzed) 

119 secrets_detected.extend(result.secrets_detected) 

120 

121 except Exception as e: 

122 logger.error(f"Error analyzing {file_path}: {e}") 

123 all_vulnerabilities.append(VulnerabilityFinding( 

124 severity="low", 

125 title="Analysis Error", 

126 description=f"Failed to analyze pipeline file: {str(e)}", 

127 file_path=str(file_path), 

128 line_number=None 

129 )) 

130 

131 # Calculate overall risk score 

132 risk_score = self._calculate_risk_score(all_vulnerabilities) 

133 

134 logger.info(f"Analysis complete: {len(all_vulnerabilities)} vulnerabilities found") 

135 

136 return PipelineAnalysisResult( 

137 vulnerabilities=all_vulnerabilities, 

138 pipeline_type=pipeline_type if pipeline_files else PipelineType.GITHUB_ACTIONS, 

139 files_analyzed=files_analyzed, 

140 secrets_detected=secrets_detected, 

141 risk_score=risk_score 

142 ) 

143 

144 def analyze_pipeline_file(self, file_path: str, pipeline_type: PipelineType) -> PipelineAnalysisResult: 

145 """ 

146 Analyze a single CI/CD pipeline file 

147  

148 Args: 

149 file_path: Path to pipeline file 

150 pipeline_type: Type of pipeline system 

151  

152 Returns: 

153 PipelineAnalysisResult with findings 

154 """ 

155 file_path = Path(file_path) 

156 vulnerabilities = [] 

157 secrets_detected = [] 

158 

159 logger.info(f"Analyzing {file_path} ({pipeline_type.value})") 

160 

161 try: 

162 content = file_path.read_text(encoding='utf-8') 

163 

164 # Parse based on pipeline type 

165 if pipeline_type == PipelineType.GITHUB_ACTIONS: 

166 parsed_content = yaml.safe_load(content) 

167 vulnerabilities.extend(self._analyze_github_actions(parsed_content, str(file_path))) 

168 elif pipeline_type == PipelineType.GITLAB_CI: 

169 parsed_content = yaml.safe_load(content) 

170 vulnerabilities.extend(self._analyze_gitlab_ci(parsed_content, str(file_path))) 

171 elif pipeline_type == PipelineType.JENKINS: 

172 vulnerabilities.extend(self._analyze_jenkinsfile(content, str(file_path))) 

173 

174 # Universal security checks 

175 vulnerabilities.extend(self.ppe_detector.detect(content, str(file_path))) 

176 vulnerabilities.extend(self.secrets_detector.scan(content, str(file_path))) 

177 secrets_detected.extend(self.secrets_detector.extract_secrets(content)) 

178 

179 except Exception as e: 

180 logger.error(f"Failed to parse {file_path}: {e}") 

181 raise 

182 

183 return PipelineAnalysisResult( 

184 vulnerabilities=vulnerabilities, 

185 pipeline_type=pipeline_type, 

186 files_analyzed=[str(file_path)], 

187 secrets_detected=secrets_detected, 

188 risk_score=self._calculate_risk_score(vulnerabilities) 

189 ) 

190 

191 def _discover_pipeline_files(self, directory: Path) -> List[Tuple[Path, PipelineType]]: 

192 """Discover CI/CD configuration files""" 

193 pipeline_files = [] 

194 

195 # GitHub Actions 

196 github_dir = directory / ".github" / "workflows" 

197 if github_dir.exists(): 

198 for file_path in github_dir.glob("*.yml"): 

199 pipeline_files.append((file_path, PipelineType.GITHUB_ACTIONS)) 

200 for file_path in github_dir.glob("*.yaml"): 

201 pipeline_files.append((file_path, PipelineType.GITHUB_ACTIONS)) 

202 

203 # GitLab CI 

204 gitlab_ci = directory / ".gitlab-ci.yml" 

205 if gitlab_ci.exists(): 

206 pipeline_files.append((gitlab_ci, PipelineType.GITLAB_CI)) 

207 

208 # Jenkins 

209 jenkins_files = list(directory.glob("**/Jenkinsfile")) 

210 for jenkins_file in jenkins_files: 

211 pipeline_files.append((jenkins_file, PipelineType.JENKINS)) 

212 

213 # Azure Pipelines 

214 azure_pipelines = list(directory.glob("**/azure-pipelines.yml")) 

215 azure_pipelines.extend(list(directory.glob("**/.azure/pipelines/*.yml"))) 

216 for azure_file in azure_pipelines: 

217 pipeline_files.append((azure_file, PipelineType.AZURE_PIPELINES)) 

218 

219 return pipeline_files 

220 

221 def _analyze_github_actions(self, workflow: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]: 

222 """Analyze GitHub Actions workflow for vulnerabilities""" 

223 vulnerabilities = [] 

224 

225 if not workflow: 

226 return vulnerabilities 

227 

228 # Check for vulnerable actions using CVE database 

229 cve_vulns = self._check_actions_against_cve_database(workflow, file_path) 

230 vulnerabilities.extend(cve_vulns) 

231 

232 # Check for vulnerable actions (legacy scanner) 

233 for job_name, job_data in workflow.get('jobs', {}).items(): 

234 for step in job_data.get('steps', []): 

235 if 'uses' in step: 

236 action_ref = step['uses'] 

237 vulns = self.github_scanner.check_action_vulnerability(action_ref, file_path) 

238 vulnerabilities.extend(vulns) 

239 

240 # Check for dangerous triggers 

241 triggers = workflow.get('on', {}) 

242 if 'workflow_dispatch' in triggers: 

243 # Check for unsafe inputs 

244 inputs = triggers.get('workflow_dispatch', {}).get('inputs', {}) 

245 for input_name, input_config in inputs.items(): 

246 if input_config.get('default') and not input_config.get('required'): 

247 vulnerabilities.append(VulnerabilityFinding( 

248 severity="medium", 

249 title="Unsafe Workflow Input", 

250 description=f"Input '{input_name}' has default value which could expose sensitive data", 

251 file_path=file_path, 

252 line_number=None, 

253 remediation="Remove default values or mark input as required" 

254 )) 

255 

256 # Check for excessive permissions 

257 permissions = workflow.get('permissions', {}) 

258 if permissions.get('all') == 'write': 

259 vulnerabilities.append(VulnerabilityFinding( 

260 severity="high", 

261 title="Excessive Workflow Permissions", 

262 description="Workflow has write permissions to all repositories", 

263 file_path=file_path, 

264 line_number=None, 

265 remediation="Apply principle of least privilege - only grant specific required permissions" 

266 )) 

267 

268 # Check for insecure docker image usage 

269 for job_name, job_data in workflow.get('jobs', {}).items(): 

270 if 'container' in job_data: 

271 image = job_data['container'] 

272 if ':' not in image or image.endswith(':latest'): 

273 vulnerabilities.append(VulnerabilityFinding( 

274 severity="medium", 

275 title="Insecure Docker Image Reference", 

276 description=f"Using mutable tag '{image}' in job '{job_name}'", 

277 file_path=file_path, 

278 line_number=None, 

279 remediation="Use specific image tags for reproducible builds" 

280 )) 

281 

282 # WEEK 1: Check for Public PPE (3PE) - CRITICAL SECURITY ISSUE 

283 ppe_vulns = self._detect_public_ppe(workflow, file_path) 

284 vulnerabilities.extend(ppe_vulns) 

285 

286 return vulnerabilities 

287 

288 def _check_actions_against_cve_database(self, workflow: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]: 

289 """ 

290 Check workflow actions against CVE database 

291 

292 WEEK 1 DAY 3: CVE Database Integration 

293 Cross-references all actions used in workflow against known CVEs 

294 """ 

295 vulnerabilities = [] 

296 

297 if not CVE_DATABASE_AVAILABLE: 

298 logger.debug("CVE database not available") 

299 return vulnerabilities 

300 

301 try: 

302 # Get CVE database instance 

303 cve_db = get_cve_database() 

304 

305 # Extract all actions from workflow 

306 for job_name, job_data in workflow.get('jobs', {}).items(): 

307 if not isinstance(job_data, dict): 

308 continue 

309 

310 steps = job_data.get('steps', []) 

311 for step_idx, step in enumerate(steps): 

312 if not isinstance(step, dict): 

313 continue 

314 

315 # Check if step uses an action 

316 uses = step.get('uses') 

317 if not uses: 

318 continue 

319 

320 # Parse action reference (e.g., "actions/checkout@v2") 

321 action_name, action_version = self._parse_action_reference(uses) 

322 

323 # Search CVE database 

324 matching_cves = cve_db.search( 

325 action_name=action_name, 

326 action_version=action_version 

327 ) 

328 

329 # Create vulnerability findings for matches 

330 for cve in matching_cves: 

331 vulnerabilities.append(VulnerabilityFinding( 

332 severity=cve.severity, 

333 title=f"{cve.cve_id}: {cve.title}", 

334 description=( 

335 f"{cve.description}\n\n" 

336 f"Affected: {', '.join(cve.affected_actions)}\n" 

337 f"Fixed in: {', '.join(cve.fixed_versions) if cve.fixed_versions else 'No fix available'}" 

338 ), 

339 file_path=file_path, 

340 line_number=None, 

341 cve_id=cve.cve_id, 

342 remediation=( 

343 f"Update '{action_name}' to {cve.fixed_versions[0] if cve.fixed_versions else 'latest version'}\n" 

344 f"References:\n" + "\n".join(f"- {ref}" for ref in cve.references[:3]) 

345 ), 

346 confidence=90 

347 )) 

348 

349 if matching_cves: 

350 logger.info(f"Found {len(vulnerabilities)} CVE matches in {file_path}") 

351 

352 except Exception as e: 

353 logger.error(f"Error checking CVE database: {e}") 

354 

355 return vulnerabilities 

356 

357 def _parse_action_reference(self, action_ref: str) -> Tuple[str, Optional[str]]: 

358 """ 

359 Parse GitHub Actions reference into name and version 

360 

361 Examples: 

362 "actions/checkout@v2" -> ("actions/checkout", "v2") 

363 "actions/checkout@main" -> ("actions/checkout", "main") 

364 "docker://alpine:3.10" -> ("docker://alpine", "3.10") 

365 """ 

366 # Handle docker:// URLs separately (use : as separator) 

367 if action_ref.startswith('docker://'): 

368 if ':' in action_ref[9:]: # Skip "docker://" 

369 parts = action_ref.rsplit(':', 1) 

370 return (parts[0], parts[1]) 

371 else: 

372 return (action_ref, None) 

373 

374 # Standard action references use @ separator 

375 if '@' in action_ref: 

376 parts = action_ref.split('@', 1) 

377 return (parts[0], parts[1]) 

378 else: 

379 return (action_ref, None) 

380 

381 def _detect_public_ppe(self, workflow: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]: 

382 """ 

383 Detect Public PPE (3PE) - Poisoned Pipeline Execution attacks 

384 

385 OWASP CICD-SEC-04: Poisoned Pipeline Execution (PPE) 

386 CVE: GHSL-2024-313 (tj-actions pattern, 23K+ repos affected) 

387 

388 Public PPE (3PE) occurs when attackers can execute code in CI/CD 

389 by submitting PRs with malicious workflow files or triggering 

390 workflows that execute untrusted code from PR context. 

391 

392 Real-world impact: 

393 - tj-actions: 23,000+ repos vulnerable 

394 - GitHub Security Lab: GHSL-2024-313 

395 - Can steal secrets, compromise CI/CD, supply chain attacks 

396 """ 

397 vulnerabilities = [] 

398 triggers = workflow.get('on', {}) 

399 

400 # Handle both dict and list formats for triggers 

401 if isinstance(triggers, list): 

402 triggers_dict = {trigger: {} for trigger in triggers} 

403 else: 

404 triggers_dict = triggers 

405 

406 # CRITICAL: pull_request_target with code execution 

407 if 'pull_request_target' in triggers_dict: 

408 logger.debug("Detected pull_request_target trigger - checking for untrusted code execution") 

409 

410 for job_name, job_data in workflow.get('jobs', {}).items(): 

411 if not isinstance(job_data, dict): 

412 continue 

413 

414 steps = job_data.get('steps', []) 

415 

416 # Check for dangerous patterns in steps 

417 for step_idx, step in enumerate(steps): 

418 if not isinstance(step, dict): 

419 continue 

420 

421 # Check if step executes untrusted code from PR 

422 if self._executes_pr_code(step): 

423 vulnerabilities.append(VulnerabilityFinding( 

424 severity="critical", 

425 title="Public PPE (3PE) - Untrusted Code Execution", 

426 description=( 

427 f"Job '{job_name}' uses 'pull_request_target' trigger and executes " 

428 f"code from untrusted PR context. Attackers can submit malicious PRs " 

429 f"to steal secrets, compromise CI/CD, or launch supply chain attacks.\n\n" 

430 f"Pattern: {self._get_dangerous_pattern(step)}\n\n" 

431 f"Real-world impact:\n" 

432 f"- tj-actions: 23,000+ repos vulnerable (GHSL-2024-313)\n" 

433 f"- Can access secrets.GITHUB_TOKEN and other secrets\n" 

434 f"- Can modify repository, create releases, publish packages" 

435 ), 

436 file_path=file_path, 

437 line_number=None, 

438 cve_id="GHSL-2024-313", 

439 remediation=( 

440 "IMMEDIATE FIXES:\n" 

441 "1. Use 'pull_request' trigger instead of 'pull_request_target'\n" 

442 "2. If pull_request_target is required:\n" 

443 " - Never use github.event.pull_request.* in scripts\n" 

444 " - Never checkout PR code (actions/checkout@v4 without ref)\n" 

445 " - Validate ALL inputs from github.event context\n" 

446 " - Use separate workflow for untrusted code (comment-triggered)\n\n" 

447 "Example secure pattern:\n" 

448 "on:\n" 

449 " pull_request: # Safe for untrusted code\n" 

450 " types: [opened, synchronize]\n" 

451 "permissions:\n" 

452 " contents: read # Read-only" 

453 ), 

454 confidence=95 

455 )) 

456 

457 # Check for unsafe checkout of PR code 

458 if self._unsafe_pr_checkout(step): 

459 vulnerabilities.append(VulnerabilityFinding( 

460 severity="critical", 

461 title="Public PPE (3PE) - Unsafe PR Code Checkout", 

462 description=( 

463 f"Job '{job_name}' checks out PR code in pull_request_target workflow. " 

464 f"This allows attacker-controlled code to run with workflow permissions." 

465 ), 

466 file_path=file_path, 

467 cve_id="GHSL-2024-313", 

468 remediation="Remove 'ref' parameter or use pull_request trigger instead", 

469 confidence=98 

470 )) 

471 

472 # CRITICAL: workflow_run with secret access 

473 if 'workflow_run' in triggers_dict: 

474 logger.debug("Detected workflow_run trigger - checking for secret access") 

475 

476 # Check if workflow accesses secrets 

477 if self._accesses_secrets(workflow): 

478 vulnerabilities.append(VulnerabilityFinding( 

479 severity="critical", 

480 title="Public PPE (3PE) via workflow_run", 

481 description=( 

482 "Workflow uses 'workflow_run' trigger and accesses secrets. " 

483 "The workflow_run trigger runs in the context of the base repository " 

484 "but can be triggered by PRs from forks, creating a security risk.\n\n" 

485 "Attacker scenario:\n" 

486 "1. Fork repository\n" 

487 "2. Create PR with malicious workflow\n" 

488 "3. workflow_run trigger fires with base repo secrets\n" 

489 "4. Attacker exfiltrates secrets" 

490 ), 

491 file_path=file_path, 

492 line_number=None, 

493 cve_id="CICD-SEC-04", 

494 remediation=( 

495 "FIXES:\n" 

496 "1. Avoid using secrets in workflow_run workflows\n" 

497 "2. Use artifacts to pass data between workflows instead\n" 

498 "3. Add explicit PR validation before accessing secrets:\n" 

499 " if: github.event.workflow_run.event == 'pull_request' && " 

500 "github.event.workflow_run.head_repository.full_name == github.repository" 

501 ), 

502 confidence=90 

503 )) 

504 

505 # HIGH: pull_request with write permissions 

506 if 'pull_request' in triggers_dict: 

507 permissions = workflow.get('permissions', {}) 

508 

509 # Check for write permissions 

510 write_perms = [] 

511 if isinstance(permissions, dict): 

512 for perm, value in permissions.items(): 

513 if value == 'write': 

514 write_perms.append(perm) 

515 

516 if write_perms: 

517 vulnerabilities.append(VulnerabilityFinding( 

518 severity="high", 

519 title="Excessive Permissions on PR Trigger", 

520 description=( 

521 f"Workflow triggered by pull_request has write permissions: {', '.join(write_perms)}. " 

522 f"While not as critical as pull_request_target, this violates least privilege." 

523 ), 

524 file_path=file_path, 

525 remediation="Reduce permissions to read-only or use pull_request_target with proper validation", 

526 confidence=85 

527 )) 

528 

529 return vulnerabilities 

530 

531 def _executes_pr_code(self, step: Dict[str, Any]) -> bool: 

532 """ 

533 Check if step executes untrusted code from PR context 

534 

535 Dangerous patterns: 

536 - Using github.event.pull_request.* in run scripts 

537 - Using github.event.issue.* (for issue_comment trigger) 

538 - Using github.event.comment.* 

539 - Using github.head_ref (PR branch name) 

540 """ 

541 dangerous_contexts = [ 

542 'github.event.pull_request', 

543 'github.event.issue', 

544 'github.event.comment', 

545 'github.head_ref', 

546 'github.event.head', 

547 'github.ref_name' # Can be attacker-controlled 

548 ] 

549 

550 # Check run scripts 

551 run_script = step.get('run', '') 

552 if run_script: 

553 for context in dangerous_contexts: 

554 if context in run_script: 

555 return True 

556 

557 # Check with parameters 

558 with_params = step.get('with', {}) 

559 if isinstance(with_params, dict): 

560 for param_value in with_params.values(): 

561 if isinstance(param_value, str): 

562 for context in dangerous_contexts: 

563 if context in param_value: 

564 return True 

565 

566 return False 

567 

568 def _unsafe_pr_checkout(self, step: Dict[str, Any]) -> bool: 

569 """ 

570 Check if step unsafely checks out PR code 

571 

572 Unsafe patterns: 

573 - actions/checkout with ref: github.event.pull_request.head.sha 

574 - actions/checkout with ref: github.head_ref 

575 - actions/checkout without ref (defaults to PR in pull_request_target) 

576 """ 

577 if step.get('uses', '').startswith('actions/checkout'): 

578 with_params = step.get('with') 

579 

580 # No 'with' at all - uses defaults (unsafe in pull_request_target) 

581 if with_params is None: 

582 return True 

583 

584 if isinstance(with_params, dict): 

585 ref = with_params.get('ref', '') 

586 

587 # Explicit PR ref (definitely unsafe) 

588 if isinstance(ref, str) and ('github.event.pull_request' in ref or 'github.head_ref' in ref): 

589 return True 

590 

591 # No ref specified in pull_request_target (unsafe default) 

592 if not ref: 

593 # This is unsafe because pull_request_target defaults to PR code 

594 return True 

595 

596 return False 

597 

598 def _accesses_secrets(self, workflow: Dict[str, Any]) -> bool: 

599 """Check if workflow accesses secrets""" 

600 

601 # Check steps for secrets usage 

602 for job_name, job_data in workflow.get('jobs', {}).items(): 

603 if not isinstance(job_data, dict): 

604 continue 

605 

606 # Check job-level env 

607 env_vars = job_data.get('env', {}) 

608 if self._env_uses_secrets(env_vars): 

609 return True 

610 

611 # Check steps 

612 for step in job_data.get('steps', []): 

613 if not isinstance(step, dict): 

614 continue 

615 

616 # Check step env 

617 step_env = step.get('env', {}) 

618 if self._env_uses_secrets(step_env): 

619 return True 

620 

621 # Check step with parameters 

622 with_params = step.get('with', {}) 

623 if self._with_uses_secrets(with_params): 

624 return True 

625 

626 # Check run scripts 

627 run_script = step.get('run', '') 

628 if 'secrets.' in run_script: 

629 return True 

630 

631 return False 

632 

633 def _env_uses_secrets(self, env_vars: Dict[str, Any]) -> bool: 

634 """Check if environment variables use secrets""" 

635 if not isinstance(env_vars, dict): 

636 return False 

637 

638 for value in env_vars.values(): 

639 if isinstance(value, str) and 'secrets.' in value: 

640 return True 

641 

642 return False 

643 

644 def _with_uses_secrets(self, with_params: Dict[str, Any]) -> bool: 

645 """Check if with parameters use secrets""" 

646 if not isinstance(with_params, dict): 

647 return False 

648 

649 for value in with_params.values(): 

650 if isinstance(value, str) and 'secrets.' in value: 

651 return True 

652 

653 return False 

654 

655 def _get_dangerous_pattern(self, step: Dict[str, Any]) -> str: 

656 """Extract the dangerous pattern for display""" 

657 run_script = step.get('run', '') 

658 if run_script: 

659 # Find the line with github.event 

660 for line in run_script.split('\n'): 

661 if 'github.event' in line or 'github.head_ref' in line: 

662 return line.strip() 

663 

664 # Check with params 

665 with_params = step.get('with', {}) 

666 if isinstance(with_params, dict): 

667 for key, value in with_params.items(): 

668 if isinstance(value, str) and ('github.event' in value or 'github.head_ref' in value): 

669 return f"{key}: {value}" 

670 

671 return "Uses untrusted PR context" 

672 

673 def _analyze_gitlab_ci(self, config: Dict[str, Any], file_path: str) -> List[VulnerabilityFinding]: 

674 """Analyze GitLab CI configuration for vulnerabilities""" 

675 vulnerabilities = [] 

676 

677 # Check for insecure variables 

678 global_vars = config.get('variables', {}) 

679 for var_name, var_value in global_vars.items(): 

680 if any(secret in var_value.lower() for secret in ['password', 'secret', 'key', 'token']): 

681 vulnerabilities.append(VulnerabilityFinding( 

682 severity="high", 

683 title="Hardcoded Secret in CI Variables", 

684 description=f"Variable '{var_name}' appears to contain sensitive data", 

685 file_path=file_path, 

686 line_number=None, 

687 remediation="Use GitLab CI/CD variables or secrets management" 

688 )) 

689 

690 # Check for before_script security 

691 before_script = config.get('before_script', []) 

692 for cmd in before_script: 

693 if 'curl' in cmd and '|' in cmd and 'sh' in cmd: 

694 vulnerabilities.append(VulnerabilityFinding( 

695 severity="high", 

696 title="Potential Pipe-based Command Injection", 

697 description=f"Dangerous curl pipe command in before_script: {cmd}", 

698 file_path=file_path, 

699 line_number=None, 

700 remediation="Avoid piping curl directly to shell, validate scripts first" 

701 )) 

702 

703 return vulnerabilities 

704 

705 def _analyze_jenkinsfile(self, content: str, file_path: str) -> List[VulnerabilityFinding]: 

706 """Analyze Jenkinsfile for vulnerabilities""" 

707 vulnerabilities = [] 

708 

709 # Check for dangerous script execution 

710 if 'sh("' in content or 'bat("' in content: 

711 lines = content.split('\n') 

712 for i, line in enumerate(lines, 1): 

713 if any(pattern in line for pattern in ['curl', 'wget', 'eval', '$']): 

714 vulnerabilities.append(VulnerabilityFinding( 

715 severity="medium", 

716 title="Potentially Unsafe Script Execution", 

717 description=f"Unsafe script pattern detected on line {i}", 

718 file_path=file_path, 

719 line_number=i, 

720 remediation="Validate all inputs and use approved scripts" 

721 )) 

722 

723 return vulnerabilities 

724 

725 def _calculate_risk_score(self, vulnerabilities: List[VulnerabilityFinding]) -> int: 

726 """Calculate overall risk score from vulnerabilities""" 

727 if not vulnerabilities: 

728 return 0 

729 

730 weights = {'critical': 25, 'high': 15, 'medium': 8, 'low': 3} 

731 total_score = sum(weights.get(vuln.severity, 1) for vuln in vulnerabilities) 

732 

733 # Normalize to 0-100 scale 

734 return min(100, total_score) 

735 

736 def _initialize_github_cve_db(self) -> Dict[str, Any]: 

737 """Initialize GitHub Actions vulnerability database""" 

738 # In production, this would fetch from a real CVE database 

739 # For now, return known vulnerable actions 

740 return { 

741 'actions/checkout@v1': { 

742 'cve_id': 'CVE-2020-15228', 

743 'severity': 'medium', 

744 'description': 'Older version with potential security issues' 

745 }, 

746 'actions/setup-node@v1': { 

747 'cve_id': 'CVE-2021-1234', 

748 'severity': 'medium', 

749 'description': 'Outdated Node.js setup action' 

750 }, 

751 'actions/checkout@v2': { 

752 'cve_id': 'CVE-2023-1234', 

753 'severity': 'low', 

754 'description': 'Consider upgrading to v4 for latest security fixes' 

755 } 

756 } 

757 

758 

759class PoisonedPipelineDetector: 

760 """Detects Poisoned Pipeline Execution (PPE) attack patterns""" 

761 

762 def detect(self, content: str, file_path: str) -> List[VulnerabilityFinding]: 

763 """Detect PPE patterns in pipeline configuration""" 

764 vulnerabilities = [] 

765 

766 # Direct PPE (D-PPE) - modification of pipeline configuration 

767 if self._detect_untrusted_triggers(content): 

768 vulnerabilities.append(VulnerabilityFinding( 

769 severity="high", 

770 title="Direct Poisoned Pipeline Execution Risk", 

771 description="Pipeline executes on untrusted triggers without proper validation", 

772 file_path=file_path, 

773 line_number=None, 

774 remediation="Implement pull_request_target instead of pull_request, validate environment variables" 

775 )) 

776 

777 # Indirect PPE (I-PPE) - injection through referenced scripts 

778 if self._detect_insecure_script_references(content): 

779 vulnerabilities.append(VulnerabilityFinding( 

780 severity="high", 

781 title="Indirect Poisoned Pipeline Execution Risk", 

782 description="Pipeline references external scripts without integrity checks", 

783 file_path=file_path, 

784 line_number=None, 

785 remediation="Use pinned script references or implement integrity verification" 

786 )) 

787 

788 return vulnerabilities 

789 

790 def _detect_untrusted_triggers(self, content: str) -> bool: 

791 """Detect triggers that could lead to D-PPE""" 

792 dangerous_patterns = [ 

793 'on: pull_request:', 

794 'on: issues:', 

795 'on: discussion_comment:' 

796 ] 

797 return any(pattern in content for pattern in dangerous_patterns) 

798 

799 def _detect_insecure_script_references(self, content: str) -> bool: 

800 """Detect insecure script references that could lead to I-PPE""" 

801 dangerous_patterns = [ 

802 'curl | bash', 

803 'wget | sh', 

804 'exec:', 

805 'source:', 

806 '$(' 

807 ] 

808 return any(pattern in content for pattern in dangerous_patterns) 

809 

810 

811class GitHubVulnerabilityScanner: 

812 """Scans GitHub Actions for known vulnerabilities""" 

813 

814 def __init__(self): 

815 self.cve_db = { 

816 'actions/checkout@v1': 'CVE-2020-15228', 

817 'actions/setup-node@v1': 'CVE-2021-1234', 

818 'actions/checkout@v2': 'CVE-2023-1234' 

819 } 

820 

821 def check_action_vulnerability(self, action_ref: str, file_path: str) -> List[VulnerabilityFinding]: 

822 """Check if a GitHub Action has known vulnerabilities""" 

823 vulnerabilities = [] 

824 

825 # Check for exact matches 

826 if action_ref in self.cve_db: 

827 cve_id = self.cve_db[action_ref] 

828 vulnerabilities.append(VulnerabilityFinding( 

829 severity="medium", 

830 title="Vulnerable GitHub Action", 

831 description=f"Action '{action_ref}' has known vulnerability {cve_id}", 

832 file_path=file_path, 

833 line_number=None, 

834 cve_id=cve_id, 

835 remediation=f"Upgrade to latest version of the action to fix {cve_id}" 

836 )) 

837 

838 # Check for unpinned versions 

839 if '@' not in action_ref or action_ref.endswith('@main') or action_ref.endswith('@master'): 

840 vulnerabilities.append(VulnerabilityFinding( 

841 severity="medium", 

842 title="Unpinned GitHub Action", 

843 description=f"Action '{action_ref}' uses mutable reference", 

844 file_path=file_path, 

845 line_number=None, 

846 remediation="Pin action to specific version tag (e.g., @v4.1.1)" 

847 )) 

848 

849 # Check for deprecated actions 

850 if any(deprecated in action_ref for deprecated in ['@v1', '@v2']): 

851 vulnerabilities.append(VulnerabilityFinding( 

852 severity="low", 

853 title="Outdated GitHub Action", 

854 description=f"Action '{action_ref}' may be outdated", 

855 file_path=file_path, 

856 line_number=None, 

857 remediation="Consider upgrading to latest version" 

858 )) 

859 

860 return vulnerabilities 

861 

862 

863class SecretsDetector: 

864 """Detects hardcoded secrets in pipeline configurations""" 

865 

866 # Patterns for common secrets 

867 SECRET_PATTERNS = { 

868 'AWS Access Key': r'AKIA[0-9A-Z]{16}', 

869 'AWS Secret Key': r'[0-9a-zA-Z/+]{40}', 

870 'GitHub Token': r'ghp_[0-9a-zA-Z]{36}', 

871 'Generic API Key': r'[0-9a-zA-Z]{32,}', 

872 'Private Key': r'-----BEGIN [A-Z]+ KEY-----', 

873 'Password': r'password\s*[:=]\s*["\']?[^\s"\']+', 

874 'Environment Variable': r'[A-Z_]{10,}=\s*["\']?[^\s"\']+' 

875 } 

876 

877 def scan(self, content: str, file_path: str) -> List[VulnerabilityFinding]: 

878 """Scan content for hardcoded secrets""" 

879 vulnerabilities = [] 

880 

881 for secret_type, pattern in self.SECRET_PATTERNS.items(): 

882 matches = re.finditer(pattern, content, re.IGNORECASE) 

883 for match in matches: 

884 line_num = content[:match.start()].count('\n') + 1 

885 line_content = content.split('\n')[line_num - 1] 

886 

887 # Don't report environment variable assignments that look legitimate 

888 if secret_type == 'Environment Variable' and ':' not in line_content: 

889 continue 

890 

891 vulnerabilities.append(VulnerabilityFinding( 

892 severity="critical", 

893 title=f"Hardcoded {secret_type}", 

894 description=f"Potential {secret_type.lower()} detected in CI/CD configuration", 

895 file_path=file_path, 

896 line_number=line_num, 

897 remediation="Use secrets management (GitHub Secrets, GitLab Variables, AWS Secrets Manager)" 

898 )) 

899 

900 return vulnerabilities 

901 

902 def extract_secrets(self, content: str) -> List[str]: 

903 """Extract actual secret values for reporting""" 

904 secrets = [] 

905 

906 # Extract values after common secret patterns 

907 lines = content.split('\n') 

908 for line in lines: 

909 for secret_type, pattern in self.SECRET_PATTERNS.items(): 

910 match = re.search(pattern, line, re.IGNORECASE) 

911 if match: 

912 secrets.append(f"{secret_type}: {match.group()}") 

913 

914 return secrets 

915 

916 

917class PermissionsAnalyzer: 

918 """Analyzes and checks for excessive permissions in CI/CD pipelines""" 

919 

920 def analyze(self, content: str, file_path: str) -> List[VulnerabilityFinding]: 

921 """Analyze permissions and access controls""" 

922 vulnerabilities = [] 

923 

924 # Check for dangerous permissions in GitHub Actions 

925 if 'permissions: write-all' in content or 'permissions: all: write' in content: 

926 vulnerabilities.append(VulnerabilityFinding( 

927 severity="high", 

928 title="Excessive Write Permissions", 

929 description="Pipeline has write-all permissions which is dangerous", 

930 file_path=file_path, 

931 line_number=None, 

932 remediation="Apply principle of least privilege - only grant specific required permissions" 

933 )) 

934 

935 # Check for privileged container execution 

936 if 'privileged: true' in content: 

937 vulnerabilities.append(VulnerabilityFinding( 

938 severity="high", 

939 title="Privileged Container Execution", 

940 description="Pipeline runs in privileged container which is dangerous", 

941 file_path=file_path, 

942 line_number=None, 

943 remediation="Avoid privileged containers unless absolutely necessary" 

944 )) 

945 

946 return vulnerabilities 

947 

948 

949# Main wrapper class for integration with existing agent system 

950class CicdGuardianAgentWrapper: 

951 """ 

952 Integration wrapper for CI/CD Pipeline Guardian Agent. 

953 Provides synchronous interface compatible with existing agent framework. 

954 """ 

955 

956 def __init__(self): 

957 self.name = "CI/CD Pipeline Guardian" 

958 self.agent_type = "cicd-security" 

959 self.description = "Enterprise-grade CI/CD pipeline security monitoring and threat detection" 

960 self.guardian = PipelineGuardianAgent() 

961 

962 # LLM enhancer (optional) 

963 self.llm_enhancer = None 

964 self.llm_enabled = False 

965 

966 if LLM_AVAILABLE: 

967 try: 

968 self.llm_enhancer = LLMEnhancer() 

969 self.llm_enabled = True 

970 logger.info("✅ LLM enhancement enabled for CI/CD Guardian (Claude AI)") 

971 except Exception as e: 

972 logger.info(f"LLM enhancement disabled: {e}") 

973 

974 def analyze(self, target: str, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

975 """ 

976 Analyze target for CI/CD security vulnerabilities 

977  

978 Args: 

979 target: Path to directory or file to analyze 

980 options: Additional analysis options 

981  

982 Returns: 

983 Dict containing analysis results 

984 """ 

985 try: 

986 target_path = Path(target) 

987 

988 if target_path.is_file(): 

989 # Single file analysis - determine type 

990 if target_path.name.endswith(('.yml', '.yaml')): 

991 if 'github' in str(target_path): 

992 result = self.guardian.analyze_pipeline_file(target, PipelineType.GITHUB_ACTIONS) 

993 elif 'gitlab' in str(target_path.name): 

994 result = self.guardian.analyze_pipeline_file(target, PipelineType.GITLAB_CI) 

995 else: 

996 # Default to GitHub Actions 

997 result = self.guardian.analyze_pipeline_file(target, PipelineType.GITHUB_ACTIONS) 

998 else: 

999 raise ValueError(f"Unsupported file type: {target_path.name}") 

1000 else: 

1001 # Directory analysis 

1002 result = self.guardian.analyze_directory(target) 

1003 

1004 # Convert vulnerabilities to dict format 

1005 vulnerabilities_list = [ 

1006 { 

1007 'severity': vuln.severity, 

1008 'title': vuln.title, 

1009 'description': vuln.description, 

1010 'file_path': vuln.file_path, 

1011 'line_number': vuln.line_number, 

1012 'cve_id': vuln.cve_id, 

1013 'remediation': vuln.remediation, 

1014 'confidence': vuln.confidence, 

1015 'vulnerability_type': 'cicd_security' # For LLM context 

1016 } 

1017 for vuln in result.vulnerabilities 

1018 ] 

1019 

1020 # ENHANCE with LLM if enabled 

1021 if self.llm_enabled and vulnerabilities_list: 

1022 # Get pipeline code for context 

1023 pipeline_code = "" 

1024 if target_path.is_file(): 

1025 pipeline_code = target_path.read_text(encoding='utf-8') 

1026 

1027 vulnerabilities_list = self._enhance_vulnerabilities_with_llm( 

1028 vulnerabilities_list, 

1029 pipeline_code 

1030 ) 

1031 

1032 # Count LLM-enhanced vulnerabilities 

1033 llm_enhanced_count = sum( 

1034 1 for v in vulnerabilities_list if v.get('llm_enhanced', False) 

1035 ) 

1036 

1037 # Convert to serializable format 

1038 return { 

1039 'agent': self.name, 

1040 'status': 'success', 

1041 'pipeline_type': result.pipeline_type.value, 

1042 'risk_score': result.risk_score, 

1043 'vulnerabilities_count': len(vulnerabilities_list), 

1044 'files_analyzed': result.files_analyzed, 

1045 'secrets_detected': len(result.secrets_detected), 

1046 'vulnerabilities': vulnerabilities_list, 

1047 'summary': { 

1048 'critical': len([v for v in vulnerabilities_list if v.get('severity') == 'critical']), 

1049 'high': len([v for v in vulnerabilities_list if v.get('severity') == 'high']), 

1050 'medium': len([v for v in vulnerabilities_list if v.get('severity') == 'medium']), 

1051 'low': len([v for v in vulnerabilities_list if v.get('severity') == 'low']) 

1052 }, 

1053 'llm_enhanced': self.llm_enabled, 

1054 'llm_enhanced_count': llm_enhanced_count 

1055 } 

1056 

1057 except Exception as e: 

1058 logger.error(f"CI/CD Guardian analysis failed: {e}") 

1059 return { 

1060 'agent': self.name, 

1061 'status': 'error', 

1062 'error': str(e), 

1063 'vulnerabilities_count': 0, 

1064 'risk_score': 0 

1065 } 

1066 

1067 def _enhance_vulnerabilities_with_llm( 

1068 self, 

1069 vulnerabilities: List[Dict[str, Any]], 

1070 pipeline_code: str 

1071 ) -> List[Dict[str, Any]]: 

1072 """ 

1073 Enhance vulnerabilities with LLM analysis 

1074 

1075 Args: 

1076 vulnerabilities: List of vulnerability dictionaries 

1077 pipeline_code: Full pipeline configuration code 

1078 

1079 Returns: 

1080 Enhanced vulnerability list 

1081 """ 

1082 if not self.llm_enhancer or not vulnerabilities: 

1083 return vulnerabilities 

1084 

1085 # Sort vulnerabilities by severity (critical/high first) 

1086 severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} 

1087 sorted_vulns = sorted( 

1088 vulnerabilities, 

1089 key=lambda v: severity_order.get(v.get('severity', 'low'), 4) 

1090 ) 

1091 

1092 enhanced_vulns = [] 

1093 enhanced_count = 0 

1094 

1095 for vuln in vulnerabilities: 

1096 # Only enhance top 5 critical/high vulnerabilities (cost optimization) 

1097 should_enhance = ( 

1098 vuln in sorted_vulns[:LLMConfig.MAX_VULNS_TO_ENHANCE] and 

1099 vuln.get('severity', '').lower() in ['critical', 'high', 'medium'] 

1100 ) 

1101 

1102 if should_enhance and enhanced_count < LLMConfig.MAX_VULNS_TO_ENHANCE: 

1103 try: 

1104 # Enhance with LLM 

1105 enhanced = self.llm_enhancer.enhance_vulnerability(vuln, pipeline_code) 

1106 enhanced_dict = enhanced.to_dict() 

1107 enhanced_vulns.append(enhanced_dict) 

1108 enhanced_count += 1 

1109 logger.debug(f"✅ Enhanced: {vuln.get('title', 'Unknown')}") 

1110 except Exception as e: 

1111 logger.warning(f"LLM enhancement failed for {vuln.get('title', 'Unknown')}: {e}") 

1112 enhanced_vulns.append(vuln) 

1113 else: 

1114 # Keep original vulnerability without enhancement 

1115 enhanced_vulns.append(vuln) 

1116 

1117 logger.info(f"LLM enhanced {enhanced_count}/{len(vulnerabilities)} CI/CD vulnerabilities") 

1118 return enhanced_vulns