Coverage for src/alprina_cli/tools/security/dfir.py: 19%
140 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
1"""
2DFIR Tool (Digital Forensics and Incident Response)
4Context Engineering:
5- Forensic analysis and evidence collection
6- Timeline reconstruction
7- Artifact preservation
8- Memory-aware: Builds forensic knowledge base
10Preserve evidence, reconstruct events.
11"""
13from typing import Dict, Any, List, Literal, Optional
14from pydantic import BaseModel, Field
15from loguru import logger
16from pathlib import Path
17import hashlib
18import json
19from datetime import datetime
21from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError
24class DFIRParams(BaseModel):
25 """
26 Parameters for DFIR operations.
28 Context: Focused schema for forensic analysis.
29 """
30 target: str = Field(
31 description="Target for forensic analysis"
32 )
33 operation: Literal["evidence_collection", "timeline_analysis", "artifact_extraction", "hash_verification", "full_forensics"] = Field(
34 default="evidence_collection",
35 description="Operation: evidence_collection, timeline_analysis, artifact_extraction, hash_verification, full_forensics"
36 )
37 preserve_evidence: bool = Field(
38 default=True,
39 description="Preserve evidence chain of custody"
40 )
41 max_artifacts: int = Field(
42 default=100,
43 description="Maximum artifacts to collect"
44 )
47class DFIRTool(AlprinaToolBase[DFIRParams]):
48 """
49 DFIR tool for digital forensics and incident response.
51 Context Engineering Benefits:
52 - Structured forensic findings
53 - Chain of custody tracking
54 - Timeline reconstruction
55 - Memory integration for case correlation
57 Operations:
58 - evidence_collection: Collect and preserve evidence
59 - timeline_analysis: Reconstruct event timeline
60 - artifact_extraction: Extract forensic artifacts
61 - hash_verification: Verify file integrity
62 - full_forensics: Comprehensive forensic analysis
64 Usage:
65 ```python
66 tool = DFIRTool(memory_service=memory)
67 result = await tool.execute(DFIRParams(
68 target="/evidence",
69 operation="evidence_collection",
70 preserve_evidence=True
71 ))
72 ```
73 """
75 name: str = "DFIR"
76 description: str = """Digital Forensics and Incident Response.
78Capabilities:
79- Evidence collection and preservation
80- Timeline reconstruction
81- Forensic artifact extraction
82- File integrity verification
83- Comprehensive forensic analysis
85Returns: Structured forensic findings with chain of custody"""
86 params: type[DFIRParams] = DFIRParams
88 # Forensic artifact patterns
89 FORENSIC_ARTIFACTS = {
90 "browser_artifacts": ["*.sqlite", "*History*", "*Cookies*", "*Cache*"],
91 "system_artifacts": ["*.log", "*.evt", "*.evtx", "*Registry*"],
92 "persistence": ["*.lnk", "*.bat", "*.vbs", "*.ps1", "*startup*"],
93 "user_activity": ["*.doc*", "*.pdf", "*.xls*", "*recent*"],
94 }
96 async def execute(self, params: DFIRParams) -> ToolOk | ToolError:
97 """
98 Execute DFIR operation.
100 Context: Returns structured forensic findings.
101 """
102 logger.info(f"DFIR: {params.target} (op={params.operation})")
104 try:
105 # Check memory for related cases
106 if self.memory_service and self.memory_service.is_enabled():
107 related_cases = self.memory_service.get_tool_context("DFIR", limit=3)
108 if related_cases:
109 logger.info(f"Found {len(related_cases)} related forensic cases")
111 # Execute operation
112 if params.operation == "evidence_collection":
113 artifacts = await self._evidence_collection_operation(params)
114 elif params.operation == "timeline_analysis":
115 artifacts = await self._timeline_analysis_operation(params)
116 elif params.operation == "artifact_extraction":
117 artifacts = await self._artifact_extraction_operation(params)
118 elif params.operation == "hash_verification":
119 artifacts = await self._hash_verification_operation(params)
120 else: # full_forensics
121 artifacts = await self._full_forensics_operation(params)
123 # Limit artifacts
124 if len(artifacts) > params.max_artifacts:
125 artifacts = artifacts[:params.max_artifacts]
126 truncated = True
127 else:
128 truncated = False
130 # Calculate forensic stats
131 artifact_types = {}
132 for artifact in artifacts:
133 atype = artifact.get("artifact_type", "unknown")
134 artifact_types[atype] = artifact_types.get(atype, 0) + 1
136 result_content = {
137 "target": params.target,
138 "operation": params.operation,
139 "artifacts": artifacts,
140 "summary": {
141 "total_artifacts": len(artifacts),
142 "by_type": artifact_types,
143 "truncated": truncated,
144 "timestamp": datetime.utcnow().isoformat(),
145 "chain_of_custody": params.preserve_evidence
146 },
147 "forensic_notes": "Evidence preserved with chain of custody" if params.preserve_evidence else "Analysis only, no preservation"
148 }
150 # Store in memory
151 if self.memory_service and self.memory_service.is_enabled():
152 self.memory_service.add_scan_results(
153 tool_name="DFIR",
154 target=params.target,
155 results=result_content
156 )
158 return ToolOk(content=result_content)
160 except Exception as e:
161 logger.error(f"DFIR operation failed: {e}")
162 return ToolError(
163 message=f"DFIR operation failed: {str(e)}",
164 brief="Operation failed"
165 )
167 async def _evidence_collection_operation(self, params: DFIRParams) -> List[Dict[str, Any]]:
168 """
169 Collect and preserve evidence.
171 Context: Evidence collection with chain of custody.
172 """
173 artifacts = []
175 target_path = Path(params.target).expanduser()
177 if not target_path.exists():
178 artifacts.append({
179 "artifact_type": "error",
180 "description": f"Target does not exist: {params.target}",
181 "severity": "HIGH"
182 })
183 return artifacts
185 # Collect file metadata
186 if target_path.is_file():
187 artifacts.append(self._collect_file_evidence(target_path, params.preserve_evidence))
188 else:
189 # Collect directory evidence
190 file_count = 0
191 for file_path in target_path.rglob("*"):
192 if file_path.is_file() and file_count < 50:
193 artifacts.append(self._collect_file_evidence(file_path, params.preserve_evidence))
194 file_count += 1
196 # Add collection summary
197 artifacts.append({
198 "artifact_type": "collection_summary",
199 "description": f"Collected {file_count if target_path.is_dir() else 1} evidence items",
200 "chain_of_custody": params.preserve_evidence,
201 "timestamp": datetime.utcnow().isoformat()
202 })
204 return artifacts
206 def _collect_file_evidence(self, file_path: Path, preserve: bool) -> Dict[str, Any]:
207 """Collect evidence for a single file"""
208 stat = file_path.stat()
210 evidence = {
211 "artifact_type": "file_evidence",
212 "file": str(file_path),
213 "file_name": file_path.name,
214 "file_size": stat.st_size,
215 "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
216 "accessed_time": datetime.fromtimestamp(stat.st_atime).isoformat(),
217 "created_time": datetime.fromtimestamp(stat.st_ctime).isoformat(),
218 }
220 # Calculate hash if preserving evidence
221 if preserve:
222 try:
223 with open(file_path, 'rb') as f:
224 content = f.read()
225 evidence["md5_hash"] = hashlib.md5(content).hexdigest()
226 evidence["sha256_hash"] = hashlib.sha256(content).hexdigest()
227 except Exception as e:
228 evidence["hash_error"] = str(e)
230 return evidence
232 async def _timeline_analysis_operation(self, params: DFIRParams) -> List[Dict[str, Any]]:
233 """
234 Reconstruct event timeline.
236 Context: Timeline of file system events.
237 """
238 artifacts = []
240 target_path = Path(params.target).expanduser()
242 if target_path.exists():
243 # Collect timeline data
244 timeline_events = []
246 if target_path.is_dir():
247 for file_path in target_path.rglob("*"):
248 if file_path.is_file():
249 stat = file_path.stat()
250 timeline_events.append({
251 "file": str(file_path),
252 "modified": stat.st_mtime,
253 "accessed": stat.st_atime,
254 "created": stat.st_ctime
255 })
257 if len(timeline_events) >= 50:
258 break
260 # Sort by modification time
261 timeline_events.sort(key=lambda x: x["modified"], reverse=True)
263 # Add recent events to artifacts
264 for event in timeline_events[:20]:
265 artifacts.append({
266 "artifact_type": "timeline_event",
267 "file": event["file"],
268 "modified": datetime.fromtimestamp(event["modified"]).isoformat(),
269 "accessed": datetime.fromtimestamp(event["accessed"]).isoformat(),
270 "created": datetime.fromtimestamp(event["created"]).isoformat()
271 })
273 artifacts.append({
274 "artifact_type": "timeline_summary",
275 "description": f"Analyzed {len(timeline_events)} timeline events",
276 "most_recent": datetime.fromtimestamp(timeline_events[0]["modified"]).isoformat() if timeline_events else None
277 })
279 return artifacts
281 async def _artifact_extraction_operation(self, params: DFIRParams) -> List[Dict[str, Any]]:
282 """
283 Extract forensic artifacts.
285 Context: Extract common forensic artifacts.
286 """
287 artifacts = []
289 target_path = Path(params.target).expanduser()
291 if target_path.exists() and target_path.is_dir():
292 # Search for forensic artifacts
293 for artifact_category, patterns in self.FORENSIC_ARTIFACTS.items():
294 found_artifacts = []
296 for pattern in patterns:
297 matches = list(target_path.rglob(pattern))[:10]
298 found_artifacts.extend(matches)
300 if found_artifacts:
301 artifacts.append({
302 "artifact_type": "forensic_artifact",
303 "category": artifact_category,
304 "description": f"Found {len(found_artifacts)} {artifact_category}",
305 "artifacts": [str(f) for f in found_artifacts[:5]],
306 "severity": "INFO"
307 })
309 # Check for suspicious persistence mechanisms
310 persistence_locations = [
311 "*startup*",
312 "*autorun*",
313 "*.lnk",
314 "*scheduled*"
315 ]
317 for pattern in persistence_locations:
318 suspicious = list(target_path.rglob(pattern))[:5]
319 if suspicious:
320 artifacts.append({
321 "artifact_type": "persistence_mechanism",
322 "description": f"Potential persistence: {pattern}",
323 "files": [str(f) for f in suspicious],
324 "severity": "MEDIUM"
325 })
327 artifacts.append({
328 "artifact_type": "extraction_summary",
329 "description": f"Extracted artifacts from {params.target}"
330 })
332 return artifacts
334 async def _hash_verification_operation(self, params: DFIRParams) -> List[Dict[str, Any]]:
335 """
336 Verify file integrity with hashes.
338 Context: Hash-based file verification.
339 """
340 artifacts = []
342 target_path = Path(params.target).expanduser()
344 if target_path.exists():
345 files_hashed = 0
347 if target_path.is_file():
348 artifacts.append(self._hash_file(target_path))
349 files_hashed = 1
350 else:
351 # Hash multiple files
352 for file_path in target_path.rglob("*"):
353 if file_path.is_file() and files_hashed < 20:
354 artifacts.append(self._hash_file(file_path))
355 files_hashed += 1
357 artifacts.append({
358 "artifact_type": "hash_summary",
359 "description": f"Generated hashes for {files_hashed} files",
360 "algorithm": "MD5, SHA256"
361 })
363 return artifacts
365 def _hash_file(self, file_path: Path) -> Dict[str, Any]:
366 """Generate hashes for a file"""
367 try:
368 with open(file_path, 'rb') as f:
369 content = f.read()
371 return {
372 "artifact_type": "file_hash",
373 "file": str(file_path),
374 "md5": hashlib.md5(content).hexdigest(),
375 "sha256": hashlib.sha256(content).hexdigest(),
376 "size": len(content)
377 }
378 except Exception as e:
379 return {
380 "artifact_type": "hash_error",
381 "file": str(file_path),
382 "error": str(e)
383 }
385 async def _full_forensics_operation(self, params: DFIRParams) -> List[Dict[str, Any]]:
386 """
387 Comprehensive forensic analysis.
389 Context: Full forensic investigation.
390 """
391 artifacts = []
393 # Execute all forensic operations
394 artifacts.extend(await self._evidence_collection_operation(params))
395 artifacts.extend(await self._timeline_analysis_operation(params))
396 artifacts.extend(await self._artifact_extraction_operation(params))
397 artifacts.extend(await self._hash_verification_operation(params))
399 # Add comprehensive summary
400 artifacts.append({
401 "artifact_type": "forensic_report",
402 "description": "Comprehensive forensic analysis complete",
403 "operations": ["evidence_collection", "timeline_analysis", "artifact_extraction", "hash_verification"],
404 "timestamp": datetime.utcnow().isoformat(),
405 "recommendation": "Review all artifacts and correlate with incident timeline"
406 })
408 return artifacts