Coverage for src/alprina_cli/tools/security/vuln_scan.py: 20%

127 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Vulnerability Scanning Tool 

3 

4Context Engineering: 

5- Identify security vulnerabilities in targets 

6- Returns structured vulnerability findings 

7- Configurable depth (quick, standard, deep) 

8- Token-efficient output with severity ranking 

9 

10Find vulnerabilities, not false positives. 

11""" 

12 

13from typing import Dict, Any, List, Literal 

14from pydantic import BaseModel, Field 

15from loguru import logger 

16from pathlib import Path 

17import re 

18 

19from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError 

20 

21 

22class VulnScanParams(BaseModel): 

23 """ 

24 Parameters for vulnerability scanning. 

25 

26 Context: Focused schema for vulnerability detection. 

27 """ 

28 target: str = Field( 

29 description="Target to scan (file, directory, URL, or IP)" 

30 ) 

31 depth: Literal["quick", "standard", "deep"] = Field( 

32 default="standard", 

33 description="Scan depth: quick (common vulns), standard (balanced), deep (comprehensive)" 

34 ) 

35 categories: List[str] = Field( 

36 default_factory=lambda: ["all"], 

37 description="Vulnerability categories: all, injection, crypto, config, code, deps" 

38 ) 

39 max_findings: int = Field( 

40 default=50, 

41 description="Maximum vulnerability findings to return" 

42 ) 

43 

44 

45class VulnScanTool(AlprinaToolBase[VulnScanParams]): 

46 """ 

47 Vulnerability scanning tool. 

48 

49 Context Engineering Benefits: 

50 - Returns ranked findings (HIGH → LOW) 

51 - Configurable depth for token control 

52 - Category filtering for focused scans 

53 - Max findings limit for context efficiency 

54 

55 Scan Depths: 

56 - quick: Common vulnerabilities (fast, ~10s) 

57 - standard: Balanced scan (medium, ~30s) 

58 - deep: Comprehensive scan (slow, 60s+) 

59 

60 Categories: 

61 - injection: SQL, XSS, command injection 

62 - crypto: Weak crypto, exposed secrets 

63 - config: Misconfigurations, insecure defaults 

64 - code: Code quality issues, logic flaws 

65 - deps: Dependency vulnerabilities 

66 

67 Usage: 

68 ```python 

69 tool = VulnScanTool() 

70 result = await tool.execute(VulnScanParams( 

71 target="./src", 

72 depth="standard", 

73 categories=["injection", "crypto"] 

74 )) 

75 ``` 

76 """ 

77 

78 name: str = "VulnScan" 

79 description: str = """Vulnerability scanning for security issues. 

80 

81Capabilities: 

82- Injection vulnerabilities (SQL, XSS, Command) 

83- Cryptographic weaknesses 

84- Configuration issues 

85- Code quality problems 

86- Dependency vulnerabilities 

87 

88Returns: Ranked vulnerability findings (HIGH → LOW)""" 

89 params: type[VulnScanParams] = VulnScanParams 

90 

91 async def execute(self, params: VulnScanParams) -> ToolOk | ToolError: 

92 """ 

93 Execute vulnerability scan. 

94 

95 Context: Returns limited, ranked findings. 

96 """ 

97 logger.info(f"VulnScan: {params.target} (depth={params.depth})") 

98 

99 try: 

100 # Determine target type 

101 target_path = Path(params.target).expanduser() 

102 is_local = target_path.exists() 

103 

104 if is_local: 

105 findings = await self._scan_local(params, target_path) 

106 else: 

107 findings = await self._scan_remote(params) 

108 

109 # Filter by categories 

110 if "all" not in params.categories: 

111 findings = [ 

112 f for f in findings 

113 if f.get("category") in params.categories 

114 ] 

115 

116 # Sort by severity (HIGH → CRITICAL → MEDIUM → LOW → INFO) 

117 severity_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO": 4} 

118 findings.sort(key=lambda f: severity_order.get(f.get("severity", "INFO"), 4)) 

119 

120 # Limit findings 

121 if len(findings) > params.max_findings: 

122 findings = findings[:params.max_findings] 

123 truncated = True 

124 else: 

125 truncated = False 

126 

127 # Calculate summary stats 

128 severity_counts = {} 

129 for finding in findings: 

130 sev = finding.get("severity", "INFO") 

131 severity_counts[sev] = severity_counts.get(sev, 0) + 1 

132 

133 # Prepare result 

134 result_content = { 

135 "target": params.target, 

136 "depth": params.depth, 

137 "findings": findings, 

138 "summary": { 

139 "total_findings": len(findings), 

140 "truncated": truncated, 

141 "by_severity": severity_counts, 

142 "target_type": "local" if is_local else "remote" 

143 } 

144 } 

145 

146 # Store in memory if available 

147 if self.memory_service and self.memory_service.is_enabled(): 

148 self.memory_service.add_scan_results( 

149 tool_name="VulnScan", 

150 target=params.target, 

151 results=result_content 

152 ) 

153 

154 return ToolOk(content=result_content) 

155 

156 except Exception as e: 

157 logger.error(f"VulnScan failed: {e}") 

158 return ToolError( 

159 message=f"Vulnerability scan failed: {str(e)}", 

160 brief="VulnScan failed" 

161 ) 

162 

163 async def _scan_local( 

164 self, 

165 params: VulnScanParams, 

166 target_path: Path 

167 ) -> List[Dict[str, Any]]: 

168 """ 

169 Scan local file or directory. 

170 

171 Context: Returns structured vulnerability findings. 

172 """ 

173 findings = [] 

174 

175 if target_path.is_file(): 

176 findings.extend(self._scan_file(target_path, params.depth)) 

177 else: 

178 # Scan directory 

179 files = list(target_path.rglob("*")) 

180 for file_path in files: 

181 if file_path.is_file(): 

182 findings.extend(self._scan_file(file_path, params.depth)) 

183 

184 # For quick scan, limit files checked 

185 if params.depth == "quick" and len(findings) >= 20: 

186 break 

187 

188 return findings 

189 

190 def _scan_file(self, file_path: Path, depth: str) -> List[Dict[str, Any]]: 

191 """Scan individual file for vulnerabilities""" 

192 findings = [] 

193 

194 try: 

195 # Skip binary files 

196 if self._is_binary(file_path): 

197 return findings 

198 

199 content = file_path.read_text(errors="ignore") 

200 lines = content.splitlines() 

201 

202 # Check for secrets/credentials 

203 findings.extend(self._check_secrets(file_path, content, lines)) 

204 

205 # Check for injection vulnerabilities 

206 if depth in ["standard", "deep"]: 

207 findings.extend(self._check_injection(file_path, content, lines)) 

208 

209 # Check for crypto issues 

210 if depth in ["standard", "deep"]: 

211 findings.extend(self._check_crypto(file_path, content, lines)) 

212 

213 # Check for config issues 

214 findings.extend(self._check_config(file_path, content, lines)) 

215 

216 # Deep scan: additional checks 

217 if depth == "deep": 

218 findings.extend(self._check_code_quality(file_path, content, lines)) 

219 

220 except Exception as e: 

221 logger.warning(f"Could not scan {file_path}: {e}") 

222 

223 return findings 

224 

225 def _check_secrets( 

226 self, 

227 file_path: Path, 

228 content: str, 

229 lines: List[str] 

230 ) -> List[Dict[str, Any]]: 

231 """Check for exposed secrets""" 

232 findings = [] 

233 

234 # Common secret patterns 

235 secret_patterns = { 

236 r"(?i)(password|passwd|pwd)\s*=\s*['\"][^'\"]{3,}['\"]": ("Password in plaintext", "HIGH"), 

237 r"(?i)(api[_-]?key|apikey)\s*=\s*['\"][^'\"]{10,}['\"]": ("API key exposed", "CRITICAL"), 

238 r"(?i)(secret[_-]?key|secretkey)\s*=\s*['\"][^'\"]{10,}['\"]": ("Secret key exposed", "CRITICAL"), 

239 r"(?i)(private[_-]?key|privatekey)\s*=\s*['\"][^'\"]{20,}['\"]": ("Private key exposed", "CRITICAL"), 

240 r"(?i)(token)\s*=\s*['\"][^'\"]{10,}['\"]": ("Auth token exposed", "HIGH"), 

241 r"-----BEGIN (RSA |EC )?PRIVATE KEY-----": ("Private key in file", "CRITICAL"), 

242 } 

243 

244 for pattern, (title, severity) in secret_patterns.items(): 

245 matches = re.finditer(pattern, content) 

246 for match in matches: 

247 line_num = content[:match.start()].count('\n') + 1 

248 findings.append({ 

249 "category": "crypto", 

250 "severity": severity, 

251 "title": title, 

252 "description": f"Found at line {line_num} in {file_path.name}", 

253 "file": str(file_path), 

254 "line_number": line_num, 

255 "confidence": 0.9 

256 }) 

257 

258 return findings 

259 

260 def _check_injection( 

261 self, 

262 file_path: Path, 

263 content: str, 

264 lines: List[str] 

265 ) -> List[Dict[str, Any]]: 

266 """Check for injection vulnerabilities""" 

267 findings = [] 

268 

269 # SQL injection patterns 

270 sql_patterns = [ 

271 r"execute\([^)]*\+[^)]*\)", # String concatenation in SQL 

272 r'SELECT.*"\s*\+\s*', # SQL with concatenation 

273 r"=\s*['\"]SELECT.*['\"].*\+", # SQL query with concatenation 

274 r"\.format\(.*SELECT", # format() with SQL 

275 ] 

276 

277 # Command injection patterns 

278 cmd_patterns = [ 

279 r"os\.system\([^)]*\+[^)]*\)", # os.system with concatenation 

280 r"subprocess\.call\([^)]*\+[^)]*\)", # subprocess with concatenation 

281 r"eval\(", # eval() usage 

282 r"exec\(", # exec() usage 

283 ] 

284 

285 for line_num, line in enumerate(lines, 1): 

286 # Check SQL injection 

287 for pattern in sql_patterns: 

288 if re.search(pattern, line, re.IGNORECASE): 

289 findings.append({ 

290 "category": "injection", 

291 "severity": "HIGH", 

292 "title": "Potential SQL injection", 

293 "description": f"Line {line_num}: {line.strip()[:80]}", 

294 "file": str(file_path), 

295 "line_number": line_num, 

296 "confidence": 0.7 

297 }) 

298 

299 # Check command injection 

300 for pattern in cmd_patterns: 

301 if re.search(pattern, line, re.IGNORECASE): 

302 findings.append({ 

303 "category": "injection", 

304 "severity": "HIGH", 

305 "title": "Potential command injection", 

306 "description": f"Line {line_num}: {line.strip()[:80]}", 

307 "file": str(file_path), 

308 "line_number": line_num, 

309 "confidence": 0.8 

310 }) 

311 

312 return findings 

313 

314 def _check_crypto( 

315 self, 

316 file_path: Path, 

317 content: str, 

318 lines: List[str] 

319 ) -> List[Dict[str, Any]]: 

320 """Check for cryptographic issues""" 

321 findings = [] 

322 

323 # Weak crypto patterns 

324 weak_patterns = { 

325 r"hashlib\.md5": ("MD5 usage (weak)", "MEDIUM"), 

326 r"hashlib\.sha1": ("SHA1 usage (weak)", "MEDIUM"), 

327 r"DES\.new": ("DES encryption (weak)", "HIGH"), 

328 r"random\.random": ("Insecure random (use secrets module)", "MEDIUM"), 

329 } 

330 

331 for line_num, line in enumerate(lines, 1): 

332 for pattern, (title, severity) in weak_patterns.items(): 

333 if re.search(pattern, line): 

334 findings.append({ 

335 "category": "crypto", 

336 "severity": severity, 

337 "title": title, 

338 "description": f"Line {line_num} in {file_path.name}", 

339 "file": str(file_path), 

340 "line_number": line_num, 

341 "confidence": 0.9 

342 }) 

343 

344 return findings 

345 

346 def _check_config( 

347 self, 

348 file_path: Path, 

349 content: str, 

350 lines: List[str] 

351 ) -> List[Dict[str, Any]]: 

352 """Check for configuration issues""" 

353 findings = [] 

354 

355 # Check for debug mode enabled 

356 if re.search(r"(?i)debug\s*=\s*(True|1|\"true\")", content): 

357 findings.append({ 

358 "category": "config", 

359 "severity": "MEDIUM", 

360 "title": "Debug mode enabled", 

361 "description": f"Debug mode found in {file_path.name}", 

362 "file": str(file_path), 

363 "confidence": 0.8 

364 }) 

365 

366 # Check for insecure defaults 

367 if ".env" in file_path.name and file_path.stat().st_size > 0: 

368 findings.append({ 

369 "category": "config", 

370 "severity": "HIGH", 

371 "title": "Environment file with contents", 

372 "description": ".env file may contain secrets", 

373 "file": str(file_path), 

374 "confidence": 0.7 

375 }) 

376 

377 return findings 

378 

379 def _check_code_quality( 

380 self, 

381 file_path: Path, 

382 content: str, 

383 lines: List[str] 

384 ) -> List[Dict[str, Any]]: 

385 """Check for code quality issues (deep scan only)""" 

386 findings = [] 

387 

388 # Check for TODO/FIXME/HACK comments 

389 for line_num, line in enumerate(lines, 1): 

390 if re.search(r"(?i)(TODO|FIXME|HACK|XXX)", line): 

391 findings.append({ 

392 "category": "code", 

393 "severity": "INFO", 

394 "title": "Code comment requires attention", 

395 "description": f"Line {line_num}: {line.strip()[:80]}", 

396 "file": str(file_path), 

397 "line_number": line_num, 

398 "confidence": 1.0 

399 }) 

400 

401 return findings 

402 

403 async def _scan_remote(self, params: VulnScanParams) -> List[Dict[str, Any]]: 

404 """ 

405 Scan remote target (URL or IP). 

406 

407 Context: Basic remote vulnerability checks. 

408 """ 

409 findings = [] 

410 target = params.target 

411 

412 # Check for HTTP 

413 if target.startswith("http://"): 

414 findings.append({ 

415 "category": "config", 

416 "severity": "MEDIUM", 

417 "title": "Insecure HTTP protocol", 

418 "description": "Target uses HTTP instead of HTTPS", 

419 "confidence": 1.0 

420 }) 

421 

422 # Placeholder for future remote scanning 

423 # (would integrate with tools like nmap, nikto, etc.) 

424 findings.append({ 

425 "category": "info", 

426 "severity": "INFO", 

427 "title": "Remote scanning not fully implemented", 

428 "description": "Use local file/directory scanning for comprehensive results", 

429 "confidence": 1.0 

430 }) 

431 

432 return findings 

433 

434 def _is_binary(self, file_path: Path) -> bool: 

435 """Check if file is binary""" 

436 try: 

437 with open(file_path, 'rb') as f: 

438 chunk = f.read(8192) 

439 return b'\x00' in chunk 

440 except Exception: 

441 return False