Coverage for src/alprina_cli/tools/security/recon.py: 22%

97 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Reconnaissance Tool 

3 

4Context Engineering: 

5- Information gathering and target profiling 

6- Returns compressed intelligence summaries 

7- Safe, passive techniques by default 

8- Structured output for analysis 

9 

10Reconnaissance without the noise. 

11""" 

12 

13from typing import Dict, Any, List, Literal 

14from pydantic import BaseModel, Field 

15from loguru import logger 

16import socket 

17import re 

18from pathlib import Path 

19 

20from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError 

21 

22 

23class ReconParams(BaseModel): 

24 """ 

25 Parameters for reconnaissance. 

26 

27 Context: Focused schema for info gathering. 

28 """ 

29 target: str = Field( 

30 description="Target to reconnaissance (domain, IP, or file path)" 

31 ) 

32 scope: Literal["passive", "active", "full"] = Field( 

33 default="passive", 

34 description="Recon scope: passive (safe), active (probing), full (comprehensive)" 

35 ) 

36 max_findings: int = Field( 

37 default=50, 

38 description="Maximum findings to return (context efficiency)" 

39 ) 

40 

41 

42class ReconTool(AlprinaToolBase[ReconParams]): 

43 """ 

44 Reconnaissance and information gathering tool. 

45 

46 Context Engineering Benefits: 

47 - Returns structured intelligence (not raw data dumps) 

48 - Passive mode by default (safe) 

49 - Configurable scope for depth control 

50 - Max findings limit for context control 

51 

52 Scopes: 

53 - passive: Safe info gathering (DNS, headers, public info) 

54 - active: Light probing (port scan, service detection) 

55 - full: Comprehensive (includes technology detection, vuln hints) 

56 

57 Usage: 

58 ```python 

59 tool = ReconTool() 

60 result = await tool.execute(ReconParams( 

61 target="example.com", 

62 scope="passive" 

63 )) 

64 ``` 

65 """ 

66 

67 name: str = "Recon" 

68 description: str = """Reconnaissance and information gathering. 

69 

70Capabilities: 

71- DNS resolution and info 

72- Service detection 

73- Technology fingerprinting 

74- Port scanning (active/full modes) 

75- Security header analysis 

76 

77Returns: Structured intelligence summary""" 

78 params: type[ReconParams] = ReconParams 

79 

80 async def execute(self, params: ReconParams) -> ToolOk | ToolError: 

81 """ 

82 Execute reconnaissance. 

83 

84 Context: Returns structured findings, not raw data. 

85 """ 

86 logger.info(f"Recon: {params.target} (scope={params.scope})") 

87 

88 try: 

89 # Determine target type 

90 is_network = self._is_network_target(params.target) 

91 

92 if is_network: 

93 findings = await self._recon_network(params) 

94 else: 

95 findings = await self._recon_file(params) 

96 

97 # Limit findings for context efficiency 

98 if len(findings) > params.max_findings: 

99 findings = findings[:params.max_findings] 

100 truncated = True 

101 else: 

102 truncated = False 

103 

104 return ToolOk( 

105 content={ 

106 "target": params.target, 

107 "scope": params.scope, 

108 "findings": findings, 

109 "summary": { 

110 "total_findings": len(findings), 

111 "truncated": truncated, 

112 "target_type": "network" if is_network else "file" 

113 } 

114 } 

115 ) 

116 

117 except Exception as e: 

118 logger.error(f"Recon failed: {e}") 

119 return ToolError( 

120 message=f"Reconnaissance failed: {str(e)}", 

121 brief="Recon failed" 

122 ) 

123 

124 async def _recon_network(self, params: ReconParams) -> List[Dict[str, Any]]: 

125 """ 

126 Network target reconnaissance. 

127 

128 Context: Returns high-level intelligence. 

129 """ 

130 findings = [] 

131 target = params.target 

132 

133 # Remove protocol if present 

134 target_clean = target.replace("https://", "").replace("http://", "").split("/")[0] 

135 

136 # Passive: DNS resolution 

137 try: 

138 ip_addr = socket.gethostbyname(target_clean) 

139 findings.append({ 

140 "type": "DNS Resolution", 

141 "severity": "INFO", 

142 "title": "IP Address Resolved", 

143 "description": f"{target_clean} resolves to {ip_addr}", 

144 "data": {"ip": ip_addr}, 

145 "confidence": 1.0 

146 }) 

147 except Exception as e: 

148 findings.append({ 

149 "type": "DNS Resolution", 

150 "severity": "LOW", 

151 "title": "DNS Resolution Failed", 

152 "description": f"Could not resolve {target_clean}: {str(e)}", 

153 "confidence": 0.8 

154 }) 

155 

156 # Passive: Protocol detection 

157 if target.startswith("http://"): 

158 findings.append({ 

159 "type": "Security", 

160 "severity": "MEDIUM", 

161 "title": "Insecure Protocol Detected", 

162 "description": "Target uses HTTP instead of HTTPS", 

163 "data": {"protocol": "http"}, 

164 "confidence": 1.0 

165 }) 

166 

167 # Active scope: Port scanning 

168 if params.scope in ["active", "full"]: 

169 port_findings = self._scan_common_ports(target_clean) 

170 findings.extend(port_findings) 

171 

172 # Full scope: Technology detection 

173 if params.scope == "full": 

174 tech_findings = self._detect_technologies(target) 

175 findings.extend(tech_findings) 

176 

177 return findings 

178 

179 async def _recon_file(self, params: ReconParams) -> List[Dict[str, Any]]: 

180 """ 

181 File/directory reconnaissance. 

182 

183 Context: Analyze local targets for security info. 

184 """ 

185 findings = [] 

186 target_path = Path(params.target) 

187 

188 if not target_path.exists(): 

189 return [{ 

190 "type": "Error", 

191 "severity": "HIGH", 

192 "title": "Target Not Found", 

193 "description": f"Path does not exist: {params.target}", 

194 "confidence": 1.0 

195 }] 

196 

197 # File/directory info 

198 if target_path.is_file(): 

199 findings.append({ 

200 "type": "File Info", 

201 "severity": "INFO", 

202 "title": f"File: {target_path.name}", 

203 "description": f"Size: {target_path.stat().st_size} bytes", 

204 "data": { 

205 "type": "file", 

206 "size": target_path.stat().st_size, 

207 "extension": target_path.suffix 

208 }, 

209 "confidence": 1.0 

210 }) 

211 

212 # Check for sensitive file types 

213 sensitive_extensions = [".env", ".key", ".pem", ".p12", ".pfx", ".crt"] 

214 if target_path.suffix in sensitive_extensions: 

215 findings.append({ 

216 "type": "Sensitive File", 

217 "severity": "HIGH", 

218 "title": "Potentially Sensitive File", 

219 "description": f"File has sensitive extension: {target_path.suffix}", 

220 "confidence": 0.8 

221 }) 

222 

223 elif target_path.is_dir(): 

224 # Directory reconnaissance 

225 files = list(target_path.rglob("*")) 

226 file_count = len([f for f in files if f.is_file()]) 

227 

228 findings.append({ 

229 "type": "Directory Info", 

230 "severity": "INFO", 

231 "title": f"Directory: {target_path.name}", 

232 "description": f"Contains {file_count} files", 

233 "data": {"file_count": file_count}, 

234 "confidence": 1.0 

235 }) 

236 

237 # Look for sensitive files 

238 sensitive_files = [ 

239 ".env", ".env.local", "credentials.json", 

240 "id_rsa", "id_dsa", "config.json", "secrets.yml" 

241 ] 

242 

243 for sensitive_name in sensitive_files: 

244 if (target_path / sensitive_name).exists(): 

245 findings.append({ 

246 "type": "Sensitive File", 

247 "severity": "HIGH", 

248 "title": f"Sensitive File Found: {sensitive_name}", 

249 "description": f"Directory contains {sensitive_name}", 

250 "confidence": 0.9 

251 }) 

252 

253 return findings 

254 

255 def _scan_common_ports(self, target: str) -> List[Dict[str, Any]]: 

256 """ 

257 Scan common ports. 

258 

259 Context: Quick scan of well-known ports. 

260 """ 

261 findings = [] 

262 common_ports = { 

263 22: "SSH", 

264 80: "HTTP", 

265 443: "HTTPS", 

266 3306: "MySQL", 

267 5432: "PostgreSQL", 

268 6379: "Redis", 

269 27017: "MongoDB" 

270 } 

271 

272 for port, service in common_ports.items(): 

273 try: 

274 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

275 sock.settimeout(0.5) 

276 result = sock.connect_ex((target, port)) 

277 sock.close() 

278 

279 if result == 0: 

280 findings.append({ 

281 "type": "Open Port", 

282 "severity": "MEDIUM" if port in [22, 3306, 5432, 6379, 27017] else "INFO", 

283 "title": f"Port {port} Open ({service})", 

284 "description": f"Service {service} detected on port {port}", 

285 "data": {"port": port, "service": service}, 

286 "confidence": 0.9 

287 }) 

288 except Exception: 

289 pass 

290 

291 return findings 

292 

293 def _detect_technologies(self, target: str) -> List[Dict[str, Any]]: 

294 """ 

295 Detect technologies. 

296 

297 Context: Simplified technology fingerprinting. 

298 """ 

299 findings = [] 

300 

301 # Simple heuristics based on URL patterns 

302 tech_patterns = { 

303 "/wp-": "WordPress", 

304 "/wp-admin": "WordPress", 

305 "/api/": "REST API", 

306 "/graphql": "GraphQL", 

307 ".php": "PHP", 

308 ".aspx": "ASP.NET", 

309 ".jsp": "Java/JSP" 

310 } 

311 

312 for pattern, tech in tech_patterns.items(): 

313 if pattern in target: 

314 findings.append({ 

315 "type": "Technology", 

316 "severity": "INFO", 

317 "title": f"Technology Detected: {tech}", 

318 "description": f"Target appears to use {tech}", 

319 "data": {"technology": tech}, 

320 "confidence": 0.7 

321 }) 

322 

323 return findings 

324 

325 def _is_network_target(self, target: str) -> bool: 

326 """Check if target is network-based""" 

327 # URL protocol = network 

328 if target.startswith(("http://", "https://")): 

329 return True 

330 

331 # Absolute path = file (even if doesn't exist) 

332 if target.startswith("/") or target.startswith("~"): 

333 return False 

334 

335 # Has dot and doesn't exist = likely network 

336 # No dot but doesn't exist = likely network (e.g., "localhost") 

337 target_path = Path(target) 

338 if not target_path.exists(): 

339 # If it looks like a domain or IP, treat as network 

340 return True 

341 

342 # Exists = file 

343 return False