Coverage for src/alprina_cli/quick_scanner.py: 27%
62 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
1"""
2Quick security scanner for critical patterns.
3No LLM calls, pure regex + AST parsing.
4Designed to complete in <5 seconds.
5"""
7import re
8import ast
9from pathlib import Path
10from typing import List, Dict, Optional
11from dataclasses import dataclass
12import time
14@dataclass
15class QuickFinding:
16 """Represents a finding from quick scan."""
17 severity: str
18 title: str
19 file: str
20 line: int
21 code_snippet: str
22 pattern: str
23 description: str
25# Critical patterns to check (top 10 most dangerous)
26CRITICAL_PATTERNS = {
27 "sql_injection": {
28 "patterns": [
29 r"execute\s*\(\s*f['\"].*?{.*?}.*?['\"]", # f-string in SQL
30 r"execute\s*\(\s*['\"].*?\+.*?['\"]", # String concat in SQL
31 r"cursor\.execute\s*\(\s*.*?\%.*?(?!,)", # Old-style format without params
32 ],
33 "title": "SQL Injection Vulnerability",
34 "description": "SQL query uses unsanitized user input, allowing attackers to manipulate queries"
35 },
36 "hardcoded_secrets": {
37 "patterns": [
38 r"(?i)(password|secret|key|token|api_key)\s*=\s*['\"][^'\"]{8,}['\"]",
39 r"(?i)jwt_secret\s*=\s*['\"][^'\"]+['\"]",
40 r"(?i)aws_secret_access_key\s*=\s*['\"][^'\"]+['\"]",
41 ],
42 "title": "Hardcoded Secret/Credential",
43 "description": "Credentials hardcoded in source code can be stolen by anyone with repo access"
44 },
45 "xss_vulnerability": {
46 "patterns": [
47 r"innerHTML\s*=\s*.*?(?!sanitize)", # JS innerHTML without sanitize
48 r"dangerouslySetInnerHTML", # React XSS vector
49 r"document\.write\s*\(", # document.write
50 ],
51 "title": "Cross-Site Scripting (XSS)",
52 "description": "User input rendered without sanitization allows attackers to inject malicious scripts"
53 },
54 "command_injection": {
55 "patterns": [
56 r"os\.system\s*\(\s*f['\"]",
57 r"subprocess\.(call|run|Popen)\s*\(\s*shell\s*=\s*True",
58 r"eval\s*\(\s*.*?input.*?\)", # eval with user input
59 ],
60 "title": "Command Injection Vulnerability",
61 "description": "Unsanitized input passed to system commands allows arbitrary command execution"
62 },
63 "path_traversal": {
64 "patterns": [
65 r"open\s*\(\s*.*?\+.*?\)", # Unsanitized path concat
66 r"Path\s*\(\s*.*?input.*?\)",
67 r"\.\.\/", # Path traversal attempt
68 ],
69 "title": "Path Traversal Vulnerability",
70 "description": "Unsanitized file paths allow attackers to read arbitrary files on the system"
71 },
72 "weak_crypto": {
73 "patterns": [
74 r"hashlib\.md5",
75 r"hashlib\.sha1",
76 r"(?i)des|rc4|rc2", # Weak ciphers
77 ],
78 "title": "Weak Cryptographic Algorithm",
79 "description": "Using broken/weak crypto algorithms that can be easily cracked by attackers"
80 },
81 "insecure_random": {
82 "patterns": [
83 r"random\.random", # Not cryptographically secure
84 r"Math\.random\(", # JS non-crypto random
85 ],
86 "title": "Insecure Random Number Generation",
87 "description": "Using predictable random numbers for security-critical operations"
88 },
89 "missing_auth": {
90 "patterns": [
91 r"@app\.route.*?methods.*?POST.*?(?!@.*?auth)", # POST without auth
92 r"@app\.route.*?/admin.*?(?!@.*?require)", # Admin without protection
93 ],
94 "title": "Missing Authentication",
95 "description": "Sensitive endpoints accessible without authentication"
96 },
97 "debug_enabled": {
98 "patterns": [
99 r"DEBUG\s*=\s*True",
100 r"app\.debug\s*=\s*True",
101 r"console\.log.*?password|secret|key", # Logging secrets
102 ],
103 "title": "Debug Mode Enabled",
104 "description": "Debug mode exposes sensitive information and stack traces to attackers"
105 },
106 "exposed_endpoints": {
107 "patterns": [
108 r"/admin.*?(?!@.*?require)", # Admin routes without protection
109 r"@app\.route\(['\"].*?(secret|internal|private).*?['\"]",
110 ],
111 "title": "Exposed Sensitive Endpoint",
112 "description": "Internal/admin endpoints accessible without proper authorization"
113 },
114}
116class QuickScanner:
117 """Fast scanner for critical security patterns."""
119 def __init__(self):
120 self.findings: List[QuickFinding] = []
121 self.files_scanned = 0
122 self.start_time = time.time()
124 def scan_directory(self, target_path: str) -> List[QuickFinding]:
125 """Scan directory for critical patterns."""
126 target = Path(target_path)
128 if target.is_file():
129 self._scan_file(target)
130 else:
131 # Scan Python, JS, TS files only (most common web vulnerabilities)
132 extensions = ['.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.php', '.rb']
133 for ext in extensions:
134 for file_path in target.rglob(f'*{ext}'):
135 if self._should_skip(file_path):
136 continue
137 self._scan_file(file_path)
139 # Stop if taking too long (failsafe)
140 if time.time() - self.start_time > 30:
141 break
143 return self.findings
145 def _should_skip(self, path: Path) -> bool:
146 """Skip common directories and test files."""
147 skip_dirs = {
148 'node_modules', 'venv', '.venv', '.git', '__pycache__',
149 'dist', 'build', '.next', 'coverage', 'vendor', 'target'
150 }
151 skip_patterns = ['test_', '_test.', '.test.', '.spec.', '.min.']
153 # Check if any parent directory should be skipped
154 if any(part in skip_dirs for part in path.parts):
155 return True
157 # Check if filename matches skip patterns
158 if any(pattern in path.name.lower() for pattern in skip_patterns):
159 return True
161 return False
163 def _scan_file(self, file_path: Path):
164 """Scan single file for patterns."""
165 try:
166 content = file_path.read_text(encoding='utf-8', errors='ignore')
167 lines = content.split('\n')
169 self.files_scanned += 1
171 # Check each pattern category
172 for pattern_name, pattern_config in CRITICAL_PATTERNS.items():
173 for pattern in pattern_config['patterns']:
174 for line_num, line in enumerate(lines, 1):
175 if re.search(pattern, line):
176 # Avoid duplicates
177 finding_key = f"{file_path}:{line_num}:{pattern_name}"
178 if not any(f"{f.file}:{f.line}:{f.pattern}" == finding_key for f in self.findings):
179 self.findings.append(QuickFinding(
180 severity="critical",
181 title=pattern_config['title'],
182 file=str(file_path),
183 line=line_num,
184 code_snippet=line.strip()[:100], # Limit length
185 pattern=pattern_name,
186 description=pattern_config['description']
187 ))
188 except Exception as e:
189 # Silently skip files we can't read
190 pass
192 def get_summary(self) -> Dict:
193 """Get scan summary."""
194 return {
195 "total_files_scanned": self.files_scanned,
196 "files_with_issues": len(set(f.file for f in self.findings)),
197 "critical": len([f for f in self.findings if f.severity == "critical"]),
198 "duration_ms": int((time.time() - self.start_time) * 1000),
199 }
202def quick_scan(target: str) -> Dict:
203 """
204 Perform quick security scan.
205 Returns results in <5 seconds for most projects.
207 Args:
208 target: Path to file or directory to scan
210 Returns:
211 Dict containing findings and summary
212 """
213 scanner = QuickScanner()
214 findings = scanner.scan_directory(target)
215 summary = scanner.get_summary()
217 return {
218 "quick_scan": True,
219 "duration_ms": summary['duration_ms'],
220 "findings": [
221 {
222 "severity": f.severity,
223 "title": f.title,
224 "file": f.file,
225 "line": f.line,
226 "code_snippet": f.code_snippet,
227 "pattern": f.pattern,
228 "description": f.description,
229 }
230 for f in findings
231 ],
232 "summary": summary
233 }
236if __name__ == "__main__":
237 # Quick test
238 import sys
239 if len(sys.argv) > 1:
240 result = quick_scan(sys.argv[1])
241 print(f"Scanned {result['summary']['total_files_scanned']} files in {result['duration_ms']}ms")
242 print(f"Found {result['summary']['critical']} critical issues")