Coverage for src/alprina_cli/quick_scanner.py: 27%

62 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Quick security scanner for critical patterns. 

3No LLM calls, pure regex + AST parsing. 

4Designed to complete in <5 seconds. 

5""" 

6 

7import re 

8import ast 

9from pathlib import Path 

10from typing import List, Dict, Optional 

11from dataclasses import dataclass 

12import time 

13 

14@dataclass 

15class QuickFinding: 

16 """Represents a finding from quick scan.""" 

17 severity: str 

18 title: str 

19 file: str 

20 line: int 

21 code_snippet: str 

22 pattern: str 

23 description: str 

24 

25# Critical patterns to check (top 10 most dangerous) 

26CRITICAL_PATTERNS = { 

27 "sql_injection": { 

28 "patterns": [ 

29 r"execute\s*\(\s*f['\"].*?{.*?}.*?['\"]", # f-string in SQL 

30 r"execute\s*\(\s*['\"].*?\+.*?['\"]", # String concat in SQL 

31 r"cursor\.execute\s*\(\s*.*?\%.*?(?!,)", # Old-style format without params 

32 ], 

33 "title": "SQL Injection Vulnerability", 

34 "description": "SQL query uses unsanitized user input, allowing attackers to manipulate queries" 

35 }, 

36 "hardcoded_secrets": { 

37 "patterns": [ 

38 r"(?i)(password|secret|key|token|api_key)\s*=\s*['\"][^'\"]{8,}['\"]", 

39 r"(?i)jwt_secret\s*=\s*['\"][^'\"]+['\"]", 

40 r"(?i)aws_secret_access_key\s*=\s*['\"][^'\"]+['\"]", 

41 ], 

42 "title": "Hardcoded Secret/Credential", 

43 "description": "Credentials hardcoded in source code can be stolen by anyone with repo access" 

44 }, 

45 "xss_vulnerability": { 

46 "patterns": [ 

47 r"innerHTML\s*=\s*.*?(?!sanitize)", # JS innerHTML without sanitize 

48 r"dangerouslySetInnerHTML", # React XSS vector 

49 r"document\.write\s*\(", # document.write 

50 ], 

51 "title": "Cross-Site Scripting (XSS)", 

52 "description": "User input rendered without sanitization allows attackers to inject malicious scripts" 

53 }, 

54 "command_injection": { 

55 "patterns": [ 

56 r"os\.system\s*\(\s*f['\"]", 

57 r"subprocess\.(call|run|Popen)\s*\(\s*shell\s*=\s*True", 

58 r"eval\s*\(\s*.*?input.*?\)", # eval with user input 

59 ], 

60 "title": "Command Injection Vulnerability", 

61 "description": "Unsanitized input passed to system commands allows arbitrary command execution" 

62 }, 

63 "path_traversal": { 

64 "patterns": [ 

65 r"open\s*\(\s*.*?\+.*?\)", # Unsanitized path concat 

66 r"Path\s*\(\s*.*?input.*?\)", 

67 r"\.\.\/", # Path traversal attempt 

68 ], 

69 "title": "Path Traversal Vulnerability", 

70 "description": "Unsanitized file paths allow attackers to read arbitrary files on the system" 

71 }, 

72 "weak_crypto": { 

73 "patterns": [ 

74 r"hashlib\.md5", 

75 r"hashlib\.sha1", 

76 r"(?i)des|rc4|rc2", # Weak ciphers 

77 ], 

78 "title": "Weak Cryptographic Algorithm", 

79 "description": "Using broken/weak crypto algorithms that can be easily cracked by attackers" 

80 }, 

81 "insecure_random": { 

82 "patterns": [ 

83 r"random\.random", # Not cryptographically secure 

84 r"Math\.random\(", # JS non-crypto random 

85 ], 

86 "title": "Insecure Random Number Generation", 

87 "description": "Using predictable random numbers for security-critical operations" 

88 }, 

89 "missing_auth": { 

90 "patterns": [ 

91 r"@app\.route.*?methods.*?POST.*?(?!@.*?auth)", # POST without auth 

92 r"@app\.route.*?/admin.*?(?!@.*?require)", # Admin without protection 

93 ], 

94 "title": "Missing Authentication", 

95 "description": "Sensitive endpoints accessible without authentication" 

96 }, 

97 "debug_enabled": { 

98 "patterns": [ 

99 r"DEBUG\s*=\s*True", 

100 r"app\.debug\s*=\s*True", 

101 r"console\.log.*?password|secret|key", # Logging secrets 

102 ], 

103 "title": "Debug Mode Enabled", 

104 "description": "Debug mode exposes sensitive information and stack traces to attackers" 

105 }, 

106 "exposed_endpoints": { 

107 "patterns": [ 

108 r"/admin.*?(?!@.*?require)", # Admin routes without protection 

109 r"@app\.route\(['\"].*?(secret|internal|private).*?['\"]", 

110 ], 

111 "title": "Exposed Sensitive Endpoint", 

112 "description": "Internal/admin endpoints accessible without proper authorization" 

113 }, 

114} 

115 

116class QuickScanner: 

117 """Fast scanner for critical security patterns.""" 

118 

119 def __init__(self): 

120 self.findings: List[QuickFinding] = [] 

121 self.files_scanned = 0 

122 self.start_time = time.time() 

123 

124 def scan_directory(self, target_path: str) -> List[QuickFinding]: 

125 """Scan directory for critical patterns.""" 

126 target = Path(target_path) 

127 

128 if target.is_file(): 

129 self._scan_file(target) 

130 else: 

131 # Scan Python, JS, TS files only (most common web vulnerabilities) 

132 extensions = ['.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.php', '.rb'] 

133 for ext in extensions: 

134 for file_path in target.rglob(f'*{ext}'): 

135 if self._should_skip(file_path): 

136 continue 

137 self._scan_file(file_path) 

138 

139 # Stop if taking too long (failsafe) 

140 if time.time() - self.start_time > 30: 

141 break 

142 

143 return self.findings 

144 

145 def _should_skip(self, path: Path) -> bool: 

146 """Skip common directories and test files.""" 

147 skip_dirs = { 

148 'node_modules', 'venv', '.venv', '.git', '__pycache__', 

149 'dist', 'build', '.next', 'coverage', 'vendor', 'target' 

150 } 

151 skip_patterns = ['test_', '_test.', '.test.', '.spec.', '.min.'] 

152 

153 # Check if any parent directory should be skipped 

154 if any(part in skip_dirs for part in path.parts): 

155 return True 

156 

157 # Check if filename matches skip patterns 

158 if any(pattern in path.name.lower() for pattern in skip_patterns): 

159 return True 

160 

161 return False 

162 

163 def _scan_file(self, file_path: Path): 

164 """Scan single file for patterns.""" 

165 try: 

166 content = file_path.read_text(encoding='utf-8', errors='ignore') 

167 lines = content.split('\n') 

168 

169 self.files_scanned += 1 

170 

171 # Check each pattern category 

172 for pattern_name, pattern_config in CRITICAL_PATTERNS.items(): 

173 for pattern in pattern_config['patterns']: 

174 for line_num, line in enumerate(lines, 1): 

175 if re.search(pattern, line): 

176 # Avoid duplicates 

177 finding_key = f"{file_path}:{line_num}:{pattern_name}" 

178 if not any(f"{f.file}:{f.line}:{f.pattern}" == finding_key for f in self.findings): 

179 self.findings.append(QuickFinding( 

180 severity="critical", 

181 title=pattern_config['title'], 

182 file=str(file_path), 

183 line=line_num, 

184 code_snippet=line.strip()[:100], # Limit length 

185 pattern=pattern_name, 

186 description=pattern_config['description'] 

187 )) 

188 except Exception as e: 

189 # Silently skip files we can't read 

190 pass 

191 

192 def get_summary(self) -> Dict: 

193 """Get scan summary.""" 

194 return { 

195 "total_files_scanned": self.files_scanned, 

196 "files_with_issues": len(set(f.file for f in self.findings)), 

197 "critical": len([f for f in self.findings if f.severity == "critical"]), 

198 "duration_ms": int((time.time() - self.start_time) * 1000), 

199 } 

200 

201 

202def quick_scan(target: str) -> Dict: 

203 """ 

204 Perform quick security scan. 

205 Returns results in <5 seconds for most projects. 

206  

207 Args: 

208 target: Path to file or directory to scan 

209  

210 Returns: 

211 Dict containing findings and summary 

212 """ 

213 scanner = QuickScanner() 

214 findings = scanner.scan_directory(target) 

215 summary = scanner.get_summary() 

216 

217 return { 

218 "quick_scan": True, 

219 "duration_ms": summary['duration_ms'], 

220 "findings": [ 

221 { 

222 "severity": f.severity, 

223 "title": f.title, 

224 "file": f.file, 

225 "line": f.line, 

226 "code_snippet": f.code_snippet, 

227 "pattern": f.pattern, 

228 "description": f.description, 

229 } 

230 for f in findings 

231 ], 

232 "summary": summary 

233 } 

234 

235 

236if __name__ == "__main__": 

237 # Quick test 

238 import sys 

239 if len(sys.argv) > 1: 

240 result = quick_scan(sys.argv[1]) 

241 print(f"Scanned {result['summary']['total_files_scanned']} files in {result['duration_ms']}ms") 

242 print(f"Found {result['summary']['critical']} critical issues")