Coverage for src/alprina_cli/tools/security/scan.py: 33%

111 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Security Scan Tool 

3 

4Context Engineering: 

5- Unified scan tool for local and remote targets 

6- Returns compressed findings (not verbose logs) 

7- Configurable scan profiles 

8- Optional agent enhancement 

9 

10Based on: scanner.py + security_engine.py (refactored to tool pattern) 

11""" 

12 

13from pathlib import Path 

14from typing import Dict, Any, List, Literal 

15from pydantic import BaseModel, Field, field_validator 

16from loguru import logger 

17 

18from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError 

19 

20 

21# Try to import security agents (optional) 

22try: 

23 from alprina_cli.agents.red_teamer import run_red_team_scan 

24 from alprina_cli.agents.blue_teamer import run_blue_team_scan 

25 from alprina_cli.agents.network_analyzer import run_network_analyzer_scan 

26 AGENTS_AVAILABLE = True 

27except ImportError: 

28 AGENTS_AVAILABLE = False 

29 logger.debug("Security agents not available - using built-in analysis") 

30 

31 

32class ScanParams(BaseModel): 

33 """ 

34 Parameters for security scan. 

35 

36 Context: Clear schema for type safety and validation. 

37 """ 

38 target: str = Field( 

39 description="Target to scan (file path, directory, URL, or IP)" 

40 ) 

41 profile: Literal[ 

42 "code-audit", 

43 "web-recon", 

44 "vuln-scan", 

45 "secret-detection", 

46 "config-audit", 

47 "network-analysis", 

48 "default" 

49 ] = Field( 

50 default="default", 

51 description="Scan profile determines which checks to run" 

52 ) 

53 safe_only: bool = Field( 

54 default=True, 

55 description="Only perform safe, non-intrusive scans" 

56 ) 

57 max_findings: int = Field( 

58 default=20, 

59 description="Maximum findings to return (context efficiency)" 

60 ) 

61 

62 @field_validator('target') 

63 @classmethod 

64 def validate_target(cls, v: str) -> str: 

65 """Validate target is not empty""" 

66 if not v or v.strip() == "": 

67 raise ValueError("Target cannot be empty") 

68 return v.strip() 

69 

70 

71class ScanTool(AlprinaToolBase[ScanParams]): 

72 """ 

73 Unified security scanning tool. 

74 

75 Context Engineering Benefits: 

76 - Single tool for local + remote scans 

77 - Returns compressed findings (not full logs) 

78 - Configurable max_findings for context control 

79 - Profile-based scan selection 

80 - Optional agent enhancement 

81 

82 Profiles: 

83 - code-audit: Static code analysis 

84 - web-recon: Web application reconnaissance 

85 - vuln-scan: Vulnerability detection 

86 - secret-detection: Find hardcoded secrets 

87 - config-audit: Configuration security 

88 - network-analysis: Network traffic analysis 

89 

90 Usage: 

91 ```python 

92 tool = ScanTool() 

93 result = await tool.execute(ScanParams( 

94 target="./src", 

95 profile="code-audit", 

96 safe_only=True 

97 )) 

98 ``` 

99 """ 

100 

101 name: str = "Scan" 

102 description: str = """Perform security scans on local or remote targets. 

103 

104Capabilities: 

105- Code analysis (SAST) 

106- Web reconnaissance 

107- Vulnerability detection 

108- Secret/credential detection 

109- Configuration auditing 

110- Network analysis 

111 

112Returns: High-level summary with key findings (not full scan logs)""" 

113 params: type[ScanParams] = ScanParams 

114 

115 def __init__(self, **kwargs): 

116 super().__init__(**kwargs) 

117 self._agent_mapping = self._build_agent_mapping() 

118 

119 def _build_agent_mapping(self) -> Dict[str, Any]: 

120 """ 

121 Build profile to agent mapping. 

122 

123 Context: Only if agents available (fallback to built-in). 

124 """ 

125 if not AGENTS_AVAILABLE: 

126 return {} 

127 

128 return { 

129 "code-audit": run_red_team_scan, 

130 "web-recon": run_network_analyzer_scan, 

131 "vuln-scan": run_red_team_scan, 

132 "secret-detection": run_red_team_scan, 

133 "config-audit": run_blue_team_scan, 

134 "network-analysis": run_network_analyzer_scan, 

135 "default": run_red_team_scan 

136 } 

137 

138 async def execute(self, params: ScanParams) -> ToolOk | ToolError: 

139 """ 

140 Execute security scan. 

141 

142 Context: Returns compressed findings, not verbose logs. 

143 """ 

144 logger.info(f"Scan: {params.target} (profile={params.profile}, safe={params.safe_only})") 

145 

146 try: 

147 # Determine if target is local or remote 

148 is_local = self._is_local_target(params.target) 

149 

150 # Execute scan 

151 if is_local: 

152 results = await self._scan_local(params) 

153 else: 

154 results = await self._scan_remote(params) 

155 

156 # Limit findings for context efficiency 

157 if len(results["findings"]) > params.max_findings: 

158 results["findings"] = results["findings"][:params.max_findings] 

159 results["summary"]["truncated"] = True 

160 results["summary"]["total_found"] = len(results["findings"]) 

161 

162 return ToolOk(content=results) 

163 

164 except Exception as e: 

165 logger.error(f"Scan failed: {e}") 

166 return ToolError( 

167 message=f"Scan failed: {str(e)}", 

168 brief="Scan failed" 

169 ) 

170 

171 async def _scan_local(self, params: ScanParams) -> Dict[str, Any]: 

172 """ 

173 Scan local file or directory. 

174 

175 Context: For code, config, or file analysis. 

176 """ 

177 target_path = Path(params.target) 

178 

179 if not target_path.exists(): 

180 raise FileNotFoundError(f"Target not found: {params.target}") 

181 

182 # Try agent-enhanced scan first (skip for now due to event loop conflicts) 

183 # TODO: Refactor old agents to proper async tools 

184 # if AGENTS_AVAILABLE and params.profile in self._agent_mapping: 

185 # agent_func = self._agent_mapping[params.profile] 

186 # result = agent_func(str(target_path), params.safe_only) 

187 # return {...} 

188 

189 # Use built-in scan (clean, async, no event loop conflicts) 

190 return await self._scan_local_builtin(params, target_path) 

191 

192 async def _scan_remote(self, params: ScanParams) -> Dict[str, Any]: 

193 """ 

194 Scan remote target (URL, IP, domain). 

195 

196 Context: For web apps, APIs, network targets. 

197 """ 

198 # Validate remote target format 

199 if not self._is_valid_remote_target(params.target): 

200 raise ValueError(f"Invalid remote target format: {params.target}") 

201 

202 # Try agent-enhanced scan first (skip for now due to event loop conflicts) 

203 # TODO: Refactor old agents to proper async tools 

204 # if AGENTS_AVAILABLE and params.profile in self._agent_mapping: 

205 # agent_func = self._agent_mapping[params.profile] 

206 # result = agent_func(params.target, params.safe_only) 

207 # return {...} 

208 

209 # Use built-in scan (clean, async, no event loop conflicts) 

210 return await self._scan_remote_builtin(params) 

211 

212 async def _scan_local_builtin( 

213 self, 

214 params: ScanParams, 

215 target_path: Path 

216 ) -> Dict[str, Any]: 

217 """ 

218 Built-in local scan (fallback). 

219 

220 Context: Basic analysis when agents unavailable. 

221 """ 

222 findings = [] 

223 

224 # Check file type and size 

225 if target_path.is_file(): 

226 findings.extend(self._analyze_file(target_path, params.profile)) 

227 elif target_path.is_dir(): 

228 findings.extend(self._analyze_directory(target_path, params.profile)) 

229 

230 return { 

231 "target": params.target, 

232 "scan_type": "local", 

233 "profile": params.profile, 

234 "findings": findings, 

235 "summary": { 

236 "total_findings": len(findings), 

237 "powered_by": "built-in", 

238 "safe_mode": params.safe_only 

239 } 

240 } 

241 

242 async def _scan_remote_builtin(self, params: ScanParams) -> Dict[str, Any]: 

243 """ 

244 Built-in remote scan (fallback). 

245 

246 Context: Basic checks when agents unavailable. 

247 """ 

248 findings = [] 

249 

250 # Basic remote target analysis 

251 if params.target.startswith(("http://", "https://")): 

252 findings.append({ 

253 "type": "Web Target", 

254 "severity": "INFO", 

255 "title": "HTTP(S) Target Detected", 

256 "description": f"Target is a web application: {params.target}", 

257 "location": params.target, 

258 "confidence": 1.0 

259 }) 

260 

261 # Check for HTTP (not HTTPS) 

262 if params.target.startswith("http://"): 

263 findings.append({ 

264 "type": "Security Issue", 

265 "severity": "MEDIUM", 

266 "title": "Unencrypted HTTP Connection", 

267 "description": "Target uses HTTP instead of HTTPS. Data may be transmitted insecurely.", 

268 "location": params.target, 

269 "confidence": 0.9 

270 }) 

271 

272 return { 

273 "target": params.target, 

274 "scan_type": "remote", 

275 "profile": params.profile, 

276 "findings": findings, 

277 "summary": { 

278 "total_findings": len(findings), 

279 "powered_by": "built-in", 

280 "safe_mode": params.safe_only 

281 } 

282 } 

283 

284 def _analyze_file(self, file_path: Path, profile: str) -> List[Dict[str, Any]]: 

285 """Analyze individual file""" 

286 findings = [] 

287 

288 # File size check 

289 file_size = file_path.stat().st_size 

290 if file_size > 10 * 1024 * 1024: # 10MB 

291 findings.append({ 

292 "type": "Large File", 

293 "severity": "LOW", 

294 "title": f"Large File Detected: {file_path.name}", 

295 "description": f"File size: {file_size / (1024*1024):.2f}MB", 

296 "location": str(file_path), 

297 "confidence": 1.0 

298 }) 

299 

300 # Secret detection in code-audit profile 

301 if profile in ["code-audit", "secret-detection"]: 

302 secret_patterns = ["password", "api_key", "secret", "token"] 

303 try: 

304 content = file_path.read_text(errors="ignore") 

305 for pattern in secret_patterns: 

306 if pattern in content.lower(): 

307 findings.append({ 

308 "type": "Potential Secret", 

309 "severity": "MEDIUM", 

310 "title": f"Potential {pattern} found in {file_path.name}", 

311 "description": f"File may contain hardcoded secrets", 

312 "location": str(file_path), 

313 "confidence": 0.6 

314 }) 

315 break # Only report once per file 

316 except Exception: 

317 pass # Skip files that can't be read 

318 

319 return findings 

320 

321 def _analyze_directory(self, dir_path: Path, profile: str) -> List[Dict[str, Any]]: 

322 """Analyze directory""" 

323 findings = [] 

324 

325 # Count files 

326 files = list(dir_path.rglob("*")) 

327 file_count = len([f for f in files if f.is_file()]) 

328 

329 findings.append({ 

330 "type": "Directory Scan", 

331 "severity": "INFO", 

332 "title": f"Analyzed directory: {dir_path.name}", 

333 "description": f"Found {file_count} files to analyze", 

334 "location": str(dir_path), 

335 "confidence": 1.0 

336 }) 

337 

338 # Analyze subset of files (limit for context) 

339 analyzed = 0 

340 for file_path in files: 

341 if file_path.is_file() and analyzed < 10: # Limit to 10 files 

342 findings.extend(self._analyze_file(file_path, profile)) 

343 analyzed += 1 

344 

345 return findings 

346 

347 def _is_local_target(self, target: str) -> bool: 

348 """Check if target is local path""" 

349 return Path(target).exists() 

350 

351 def _is_valid_remote_target(self, target: str) -> bool: 

352 """Check if target is valid remote target""" 

353 # URL 

354 if target.startswith(("http://", "https://")): 

355 return True 

356 

357 # IP address 

358 parts = target.split(".") 

359 if len(parts) == 4 and all(p.isdigit() and 0 <= int(p) <= 255 for p in parts): 

360 return True 

361 

362 # Domain name (simple check) 

363 if "." in target and not target.startswith("/"): 

364 return True 

365 

366 return False