Coverage for src/alprina_cli/tools/security/dfir.py: 19%

140 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2DFIR Tool (Digital Forensics and Incident Response) 

3 

4Context Engineering: 

5- Forensic analysis and evidence collection 

6- Timeline reconstruction 

7- Artifact preservation 

8- Memory-aware: Builds forensic knowledge base 

9 

10Preserve evidence, reconstruct events. 

11""" 

12 

13from typing import Dict, Any, List, Literal, Optional 

14from pydantic import BaseModel, Field 

15from loguru import logger 

16from pathlib import Path 

17import hashlib 

18import json 

19from datetime import datetime 

20 

21from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError 

22 

23 

24class DFIRParams(BaseModel): 

25 """ 

26 Parameters for DFIR operations. 

27 

28 Context: Focused schema for forensic analysis. 

29 """ 

30 target: str = Field( 

31 description="Target for forensic analysis" 

32 ) 

33 operation: Literal["evidence_collection", "timeline_analysis", "artifact_extraction", "hash_verification", "full_forensics"] = Field( 

34 default="evidence_collection", 

35 description="Operation: evidence_collection, timeline_analysis, artifact_extraction, hash_verification, full_forensics" 

36 ) 

37 preserve_evidence: bool = Field( 

38 default=True, 

39 description="Preserve evidence chain of custody" 

40 ) 

41 max_artifacts: int = Field( 

42 default=100, 

43 description="Maximum artifacts to collect" 

44 ) 

45 

46 

47class DFIRTool(AlprinaToolBase[DFIRParams]): 

48 """ 

49 DFIR tool for digital forensics and incident response. 

50 

51 Context Engineering Benefits: 

52 - Structured forensic findings 

53 - Chain of custody tracking 

54 - Timeline reconstruction 

55 - Memory integration for case correlation 

56 

57 Operations: 

58 - evidence_collection: Collect and preserve evidence 

59 - timeline_analysis: Reconstruct event timeline 

60 - artifact_extraction: Extract forensic artifacts 

61 - hash_verification: Verify file integrity 

62 - full_forensics: Comprehensive forensic analysis 

63 

64 Usage: 

65 ```python 

66 tool = DFIRTool(memory_service=memory) 

67 result = await tool.execute(DFIRParams( 

68 target="/evidence", 

69 operation="evidence_collection", 

70 preserve_evidence=True 

71 )) 

72 ``` 

73 """ 

74 

75 name: str = "DFIR" 

76 description: str = """Digital Forensics and Incident Response. 

77 

78Capabilities: 

79- Evidence collection and preservation 

80- Timeline reconstruction 

81- Forensic artifact extraction 

82- File integrity verification 

83- Comprehensive forensic analysis 

84 

85Returns: Structured forensic findings with chain of custody""" 

86 params: type[DFIRParams] = DFIRParams 

87 

88 # Forensic artifact patterns 

89 FORENSIC_ARTIFACTS = { 

90 "browser_artifacts": ["*.sqlite", "*History*", "*Cookies*", "*Cache*"], 

91 "system_artifacts": ["*.log", "*.evt", "*.evtx", "*Registry*"], 

92 "persistence": ["*.lnk", "*.bat", "*.vbs", "*.ps1", "*startup*"], 

93 "user_activity": ["*.doc*", "*.pdf", "*.xls*", "*recent*"], 

94 } 

95 

96 async def execute(self, params: DFIRParams) -> ToolOk | ToolError: 

97 """ 

98 Execute DFIR operation. 

99 

100 Context: Returns structured forensic findings. 

101 """ 

102 logger.info(f"DFIR: {params.target} (op={params.operation})") 

103 

104 try: 

105 # Check memory for related cases 

106 if self.memory_service and self.memory_service.is_enabled(): 

107 related_cases = self.memory_service.get_tool_context("DFIR", limit=3) 

108 if related_cases: 

109 logger.info(f"Found {len(related_cases)} related forensic cases") 

110 

111 # Execute operation 

112 if params.operation == "evidence_collection": 

113 artifacts = await self._evidence_collection_operation(params) 

114 elif params.operation == "timeline_analysis": 

115 artifacts = await self._timeline_analysis_operation(params) 

116 elif params.operation == "artifact_extraction": 

117 artifacts = await self._artifact_extraction_operation(params) 

118 elif params.operation == "hash_verification": 

119 artifacts = await self._hash_verification_operation(params) 

120 else: # full_forensics 

121 artifacts = await self._full_forensics_operation(params) 

122 

123 # Limit artifacts 

124 if len(artifacts) > params.max_artifacts: 

125 artifacts = artifacts[:params.max_artifacts] 

126 truncated = True 

127 else: 

128 truncated = False 

129 

130 # Calculate forensic stats 

131 artifact_types = {} 

132 for artifact in artifacts: 

133 atype = artifact.get("artifact_type", "unknown") 

134 artifact_types[atype] = artifact_types.get(atype, 0) + 1 

135 

136 result_content = { 

137 "target": params.target, 

138 "operation": params.operation, 

139 "artifacts": artifacts, 

140 "summary": { 

141 "total_artifacts": len(artifacts), 

142 "by_type": artifact_types, 

143 "truncated": truncated, 

144 "timestamp": datetime.utcnow().isoformat(), 

145 "chain_of_custody": params.preserve_evidence 

146 }, 

147 "forensic_notes": "Evidence preserved with chain of custody" if params.preserve_evidence else "Analysis only, no preservation" 

148 } 

149 

150 # Store in memory 

151 if self.memory_service and self.memory_service.is_enabled(): 

152 self.memory_service.add_scan_results( 

153 tool_name="DFIR", 

154 target=params.target, 

155 results=result_content 

156 ) 

157 

158 return ToolOk(content=result_content) 

159 

160 except Exception as e: 

161 logger.error(f"DFIR operation failed: {e}") 

162 return ToolError( 

163 message=f"DFIR operation failed: {str(e)}", 

164 brief="Operation failed" 

165 ) 

166 

167 async def _evidence_collection_operation(self, params: DFIRParams) -> List[Dict[str, Any]]: 

168 """ 

169 Collect and preserve evidence. 

170 

171 Context: Evidence collection with chain of custody. 

172 """ 

173 artifacts = [] 

174 

175 target_path = Path(params.target).expanduser() 

176 

177 if not target_path.exists(): 

178 artifacts.append({ 

179 "artifact_type": "error", 

180 "description": f"Target does not exist: {params.target}", 

181 "severity": "HIGH" 

182 }) 

183 return artifacts 

184 

185 # Collect file metadata 

186 if target_path.is_file(): 

187 artifacts.append(self._collect_file_evidence(target_path, params.preserve_evidence)) 

188 else: 

189 # Collect directory evidence 

190 file_count = 0 

191 for file_path in target_path.rglob("*"): 

192 if file_path.is_file() and file_count < 50: 

193 artifacts.append(self._collect_file_evidence(file_path, params.preserve_evidence)) 

194 file_count += 1 

195 

196 # Add collection summary 

197 artifacts.append({ 

198 "artifact_type": "collection_summary", 

199 "description": f"Collected {file_count if target_path.is_dir() else 1} evidence items", 

200 "chain_of_custody": params.preserve_evidence, 

201 "timestamp": datetime.utcnow().isoformat() 

202 }) 

203 

204 return artifacts 

205 

206 def _collect_file_evidence(self, file_path: Path, preserve: bool) -> Dict[str, Any]: 

207 """Collect evidence for a single file""" 

208 stat = file_path.stat() 

209 

210 evidence = { 

211 "artifact_type": "file_evidence", 

212 "file": str(file_path), 

213 "file_name": file_path.name, 

214 "file_size": stat.st_size, 

215 "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(), 

216 "accessed_time": datetime.fromtimestamp(stat.st_atime).isoformat(), 

217 "created_time": datetime.fromtimestamp(stat.st_ctime).isoformat(), 

218 } 

219 

220 # Calculate hash if preserving evidence 

221 if preserve: 

222 try: 

223 with open(file_path, 'rb') as f: 

224 content = f.read() 

225 evidence["md5_hash"] = hashlib.md5(content).hexdigest() 

226 evidence["sha256_hash"] = hashlib.sha256(content).hexdigest() 

227 except Exception as e: 

228 evidence["hash_error"] = str(e) 

229 

230 return evidence 

231 

232 async def _timeline_analysis_operation(self, params: DFIRParams) -> List[Dict[str, Any]]: 

233 """ 

234 Reconstruct event timeline. 

235 

236 Context: Timeline of file system events. 

237 """ 

238 artifacts = [] 

239 

240 target_path = Path(params.target).expanduser() 

241 

242 if target_path.exists(): 

243 # Collect timeline data 

244 timeline_events = [] 

245 

246 if target_path.is_dir(): 

247 for file_path in target_path.rglob("*"): 

248 if file_path.is_file(): 

249 stat = file_path.stat() 

250 timeline_events.append({ 

251 "file": str(file_path), 

252 "modified": stat.st_mtime, 

253 "accessed": stat.st_atime, 

254 "created": stat.st_ctime 

255 }) 

256 

257 if len(timeline_events) >= 50: 

258 break 

259 

260 # Sort by modification time 

261 timeline_events.sort(key=lambda x: x["modified"], reverse=True) 

262 

263 # Add recent events to artifacts 

264 for event in timeline_events[:20]: 

265 artifacts.append({ 

266 "artifact_type": "timeline_event", 

267 "file": event["file"], 

268 "modified": datetime.fromtimestamp(event["modified"]).isoformat(), 

269 "accessed": datetime.fromtimestamp(event["accessed"]).isoformat(), 

270 "created": datetime.fromtimestamp(event["created"]).isoformat() 

271 }) 

272 

273 artifacts.append({ 

274 "artifact_type": "timeline_summary", 

275 "description": f"Analyzed {len(timeline_events)} timeline events", 

276 "most_recent": datetime.fromtimestamp(timeline_events[0]["modified"]).isoformat() if timeline_events else None 

277 }) 

278 

279 return artifacts 

280 

281 async def _artifact_extraction_operation(self, params: DFIRParams) -> List[Dict[str, Any]]: 

282 """ 

283 Extract forensic artifacts. 

284 

285 Context: Extract common forensic artifacts. 

286 """ 

287 artifacts = [] 

288 

289 target_path = Path(params.target).expanduser() 

290 

291 if target_path.exists() and target_path.is_dir(): 

292 # Search for forensic artifacts 

293 for artifact_category, patterns in self.FORENSIC_ARTIFACTS.items(): 

294 found_artifacts = [] 

295 

296 for pattern in patterns: 

297 matches = list(target_path.rglob(pattern))[:10] 

298 found_artifacts.extend(matches) 

299 

300 if found_artifacts: 

301 artifacts.append({ 

302 "artifact_type": "forensic_artifact", 

303 "category": artifact_category, 

304 "description": f"Found {len(found_artifacts)} {artifact_category}", 

305 "artifacts": [str(f) for f in found_artifacts[:5]], 

306 "severity": "INFO" 

307 }) 

308 

309 # Check for suspicious persistence mechanisms 

310 persistence_locations = [ 

311 "*startup*", 

312 "*autorun*", 

313 "*.lnk", 

314 "*scheduled*" 

315 ] 

316 

317 for pattern in persistence_locations: 

318 suspicious = list(target_path.rglob(pattern))[:5] 

319 if suspicious: 

320 artifacts.append({ 

321 "artifact_type": "persistence_mechanism", 

322 "description": f"Potential persistence: {pattern}", 

323 "files": [str(f) for f in suspicious], 

324 "severity": "MEDIUM" 

325 }) 

326 

327 artifacts.append({ 

328 "artifact_type": "extraction_summary", 

329 "description": f"Extracted artifacts from {params.target}" 

330 }) 

331 

332 return artifacts 

333 

334 async def _hash_verification_operation(self, params: DFIRParams) -> List[Dict[str, Any]]: 

335 """ 

336 Verify file integrity with hashes. 

337 

338 Context: Hash-based file verification. 

339 """ 

340 artifacts = [] 

341 

342 target_path = Path(params.target).expanduser() 

343 

344 if target_path.exists(): 

345 files_hashed = 0 

346 

347 if target_path.is_file(): 

348 artifacts.append(self._hash_file(target_path)) 

349 files_hashed = 1 

350 else: 

351 # Hash multiple files 

352 for file_path in target_path.rglob("*"): 

353 if file_path.is_file() and files_hashed < 20: 

354 artifacts.append(self._hash_file(file_path)) 

355 files_hashed += 1 

356 

357 artifacts.append({ 

358 "artifact_type": "hash_summary", 

359 "description": f"Generated hashes for {files_hashed} files", 

360 "algorithm": "MD5, SHA256" 

361 }) 

362 

363 return artifacts 

364 

365 def _hash_file(self, file_path: Path) -> Dict[str, Any]: 

366 """Generate hashes for a file""" 

367 try: 

368 with open(file_path, 'rb') as f: 

369 content = f.read() 

370 

371 return { 

372 "artifact_type": "file_hash", 

373 "file": str(file_path), 

374 "md5": hashlib.md5(content).hexdigest(), 

375 "sha256": hashlib.sha256(content).hexdigest(), 

376 "size": len(content) 

377 } 

378 except Exception as e: 

379 return { 

380 "artifact_type": "hash_error", 

381 "file": str(file_path), 

382 "error": str(e) 

383 } 

384 

385 async def _full_forensics_operation(self, params: DFIRParams) -> List[Dict[str, Any]]: 

386 """ 

387 Comprehensive forensic analysis. 

388 

389 Context: Full forensic investigation. 

390 """ 

391 artifacts = [] 

392 

393 # Execute all forensic operations 

394 artifacts.extend(await self._evidence_collection_operation(params)) 

395 artifacts.extend(await self._timeline_analysis_operation(params)) 

396 artifacts.extend(await self._artifact_extraction_operation(params)) 

397 artifacts.extend(await self._hash_verification_operation(params)) 

398 

399 # Add comprehensive summary 

400 artifacts.append({ 

401 "artifact_type": "forensic_report", 

402 "description": "Comprehensive forensic analysis complete", 

403 "operations": ["evidence_collection", "timeline_analysis", "artifact_extraction", "hash_verification"], 

404 "timestamp": datetime.utcnow().isoformat(), 

405 "recommendation": "Review all artifacts and correlate with incident timeline" 

406 }) 

407 

408 return artifacts