Coverage for src/alprina_cli/llm_provider.py: 16%

195 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2LLM provider module for Alprina CLI. 

3Handles connections to various LLM services (OpenAI, Anthropic, Ollama). 

4""" 

5 

6import os 

7from typing import Optional, Dict, Any 

8from enum import Enum 

9from loguru import logger 

10 

11 

12class LLMProvider(Enum): 

13 """Supported LLM providers.""" 

14 OPENAI = "openai" 

15 ANTHROPIC = "anthropic" 

16 OLLAMA = "ollama" 

17 

18 

19class LLMClient: 

20 """ 

21 Unified interface for different LLM providers. 

22 """ 

23 

24 def __init__(self, provider: str = None): 

25 """ 

26 Initialize LLM client. 

27 

28 Args: 

29 provider: Provider name (openai, anthropic, ollama) 

30 """ 

31 self.provider = provider or os.getenv("ALPRINA_LLM_PROVIDER", "openai") 

32 self.api_key = None 

33 self.client = None 

34 

35 self._initialize_client() 

36 

37 def _initialize_client(self): 

38 """Initialize the appropriate LLM client.""" 

39 try: 

40 if self.provider == LLMProvider.OPENAI.value: 

41 self._init_openai() 

42 elif self.provider == LLMProvider.ANTHROPIC.value: 

43 self._init_anthropic() 

44 elif self.provider == LLMProvider.OLLAMA.value: 

45 self._init_ollama() 

46 else: 

47 logger.warning(f"Unknown provider: {self.provider}, falling back to OpenAI") 

48 self._init_openai() 

49 except ImportError as e: 

50 logger.error(f"Failed to initialize {self.provider}: {e}") 

51 logger.info("Install with: pip install openai anthropic ollama") 

52 

53 def _init_openai(self): 

54 """Initialize OpenAI client.""" 

55 try: 

56 from openai import OpenAI 

57 

58 self.api_key = os.getenv("OPENAI_API_KEY") 

59 if not self.api_key: 

60 logger.warning("OPENAI_API_KEY not set. LLM features will be limited.") 

61 return 

62 

63 self.client = OpenAI(api_key=self.api_key) 

64 self.model = os.getenv("OPENAI_MODEL", "gpt-4o-mini") 

65 logger.info(f"Initialized OpenAI client with model: {self.model}") 

66 

67 except ImportError: 

68 logger.error("OpenAI package not installed. Install with: pip install openai") 

69 

70 def _init_anthropic(self): 

71 """Initialize Anthropic Claude client.""" 

72 try: 

73 from anthropic import Anthropic 

74 

75 self.api_key = os.getenv("ANTHROPIC_API_KEY") 

76 if not self.api_key: 

77 logger.warning("ANTHROPIC_API_KEY not set.") 

78 return 

79 

80 self.client = Anthropic(api_key=self.api_key) 

81 self.model = os.getenv("ANTHROPIC_MODEL", "claude-3-5-sonnet-20241022") 

82 logger.info(f"Initialized Anthropic client with model: {self.model}") 

83 

84 except ImportError: 

85 logger.error("Anthropic package not installed. Install with: pip install anthropic") 

86 

87 def _init_ollama(self): 

88 """Initialize Ollama local client.""" 

89 try: 

90 import ollama 

91 

92 self.client = ollama 

93 self.model = os.getenv("OLLAMA_MODEL", "llama3.2") 

94 logger.info(f"Initialized Ollama client with model: {self.model}") 

95 

96 except ImportError: 

97 logger.error("Ollama package not installed. Install with: pip install ollama") 

98 

99 def analyze_code(self, code: str, filename: str, task: str = "security-scan") -> Dict[str, Any]: 

100 """ 

101 Analyze code using LLM for security issues. 

102 

103 Args: 

104 code: Source code to analyze 

105 filename: Name of the file 

106 task: Analysis task type 

107 

108 Returns: 

109 Dict with findings 

110 """ 

111 if not self.client: 

112 return self._mock_analysis(code, filename) 

113 

114 prompt = self._build_security_prompt(code, filename, task) 

115 

116 try: 

117 if self.provider == LLMProvider.OPENAI.value: 

118 return self._analyze_with_openai(prompt) 

119 elif self.provider == LLMProvider.ANTHROPIC.value: 

120 return self._analyze_with_anthropic(prompt) 

121 elif self.provider == LLMProvider.OLLAMA.value: 

122 return self._analyze_with_ollama(prompt) 

123 except Exception as e: 

124 logger.error(f"LLM analysis failed: {e}") 

125 return self._mock_analysis(code, filename) 

126 

127 def _build_security_prompt(self, code: str, filename: str, task: str) -> str: 

128 """Build security analysis prompt.""" 

129 return f"""You are a security expert analyzing code for vulnerabilities. 

130 

131File: {filename} 

132Task: {task} 

133 

134Analyze this code for security issues: 

135 

136``` 

137{code[:2000]} # Limit to first 2000 chars 

138``` 

139 

140Identify: 

1411. Hardcoded secrets (API keys, passwords, tokens) 

1422. SQL injection vulnerabilities 

1433. XSS vulnerabilities 

1444. Insecure configurations 

1455. Outdated dependencies 

1466. Authentication issues 

1477. Authorization flaws 

1488. Data exposure risks 

149 

150For each finding, provide: 

151- Severity (CRITICAL, HIGH, MEDIUM, LOW, INFO) 

152- Type (e.g., "Hardcoded Secret", "SQL Injection") 

153- Description (brief explanation) 

154- Line number (if applicable) 

155- Recommendation (how to fix) 

156 

157Return ONLY a JSON array of findings, no other text: 

158[ 

159 {{ 

160 "severity": "HIGH", 

161 "type": "Hardcoded Secret", 

162 "description": "API key found in code", 

163 "line": 10, 

164 "recommendation": "Move to environment variable" 

165 }} 

166] 

167 

168If no issues found, return: [] 

169""" 

170 

171 def _analyze_with_openai(self, prompt: str) -> Dict[str, Any]: 

172 """Analyze with OpenAI.""" 

173 response = self.client.chat.completions.create( 

174 model=self.model, 

175 messages=[ 

176 {"role": "system", "content": "You are a security analysis expert. Return only valid JSON."}, 

177 {"role": "user", "content": prompt} 

178 ], 

179 temperature=0.3, 

180 max_tokens=2000 

181 ) 

182 

183 content = response.choices[0].message.content 

184 return self._parse_llm_response(content) 

185 

186 def _analyze_with_anthropic(self, prompt: str) -> Dict[str, Any]: 

187 """Analyze with Anthropic Claude.""" 

188 response = self.client.messages.create( 

189 model=self.model, 

190 max_tokens=2000, 

191 messages=[ 

192 {"role": "user", "content": prompt} 

193 ], 

194 temperature=0.3 

195 ) 

196 

197 content = response.content[0].text 

198 return self._parse_llm_response(content) 

199 

200 def _analyze_with_ollama(self, prompt: str) -> Dict[str, Any]: 

201 """Analyze with Ollama local model.""" 

202 response = self.client.chat( 

203 model=self.model, 

204 messages=[ 

205 {"role": "system", "content": "You are a security analysis expert. Return only valid JSON."}, 

206 {"role": "user", "content": prompt} 

207 ] 

208 ) 

209 

210 content = response['message']['content'] 

211 return self._parse_llm_response(content) 

212 

213 def _parse_llm_response(self, content: str) -> Dict[str, Any]: 

214 """Parse LLM response into structured findings.""" 

215 import json 

216 import re 

217 

218 try: 

219 # Try to extract JSON from markdown code blocks 

220 json_match = re.search(r'```(?:json)?\s*(\[.*?\])\s*```', content, re.DOTALL) 

221 if json_match: 

222 content = json_match.group(1) 

223 

224 # Try direct JSON parse 

225 findings = json.loads(content) 

226 

227 return { 

228 "findings": findings if isinstance(findings, list) else [findings], 

229 "raw_response": content 

230 } 

231 

232 except json.JSONDecodeError as e: 

233 logger.warning(f"Failed to parse LLM response as JSON: {e}") 

234 logger.debug(f"Response content: {content[:500]}") 

235 

236 # Fallback: extract findings from text 

237 return { 

238 "findings": self._extract_findings_from_text(content), 

239 "raw_response": content 

240 } 

241 

242 def _extract_findings_from_text(self, text: str) -> list: 

243 """Extract findings from unstructured text.""" 

244 findings = [] 

245 

246 # Simple pattern matching for common issues 

247 if "password" in text.lower() or "api_key" in text.lower() or "secret" in text.lower(): 

248 findings.append({ 

249 "severity": "HIGH", 

250 "type": "Potential Hardcoded Secret", 

251 "description": "LLM detected potential secrets in code", 

252 "recommendation": "Review manually and move to secure storage" 

253 }) 

254 

255 if "sql" in text.lower() and "injection" in text.lower(): 

256 findings.append({ 

257 "severity": "CRITICAL", 

258 "type": "SQL Injection Risk", 

259 "description": "LLM detected SQL injection vulnerability", 

260 "recommendation": "Use parameterized queries" 

261 }) 

262 

263 return findings 

264 

265 def _mock_analysis(self, code: str, filename: str) -> Dict[str, Any]: 

266 """Fallback mock analysis when LLM is unavailable.""" 

267 findings = [] 

268 

269 # Simple pattern matching 

270 if "password" in code.lower() or "api_key" in code.lower(): 

271 findings.append({ 

272 "severity": "HIGH", 

273 "type": "Hardcoded Secret", 

274 "description": "Potential hardcoded secret detected", 

275 "recommendation": "Move secrets to environment variables" 

276 }) 

277 

278 if "debug" in code.lower() and "true" in code.lower(): 

279 findings.append({ 

280 "severity": "MEDIUM", 

281 "type": "Debug Mode", 

282 "description": "Debug mode appears to be enabled", 

283 "recommendation": "Disable debug mode in production" 

284 }) 

285 

286 return {"findings": findings, "mock": True} 

287 

288 def chat( 

289 self, 

290 messages: list, 

291 system_prompt: Optional[str] = None, 

292 max_tokens: int = 4096, 

293 temperature: float = 0.7 

294 ) -> str: 

295 """ 

296 Chat with LLM (non-streaming). 

297 

298 Args: 

299 messages: List of message dicts with role and content 

300 system_prompt: System prompt to use 

301 max_tokens: Maximum tokens in response 

302 temperature: Temperature for generation 

303 

304 Returns: 

305 Assistant's response text 

306 """ 

307 if not self.client: 

308 return "LLM client not initialized. Please set API keys." 

309 

310 try: 

311 if self.provider == LLMProvider.OPENAI.value: 

312 return self._chat_openai(messages, system_prompt, max_tokens, temperature) 

313 elif self.provider == LLMProvider.ANTHROPIC.value: 

314 return self._chat_anthropic(messages, system_prompt, max_tokens, temperature) 

315 elif self.provider == LLMProvider.OLLAMA.value: 

316 return self._chat_ollama(messages, system_prompt, max_tokens, temperature) 

317 else: 

318 return "Unsupported provider for chat." 

319 except Exception as e: 

320 logger.error(f"Chat failed: {e}", exc_info=True) 

321 return f"Error: {e}" 

322 

323 def chat_streaming( 

324 self, 

325 messages: list, 

326 system_prompt: Optional[str] = None, 

327 max_tokens: int = 4096, 

328 temperature: float = 0.7 

329 ): 

330 """ 

331 Chat with LLM (streaming). 

332 

333 Args: 

334 messages: List of message dicts with role and content 

335 system_prompt: System prompt to use 

336 max_tokens: Maximum tokens in response 

337 temperature: Temperature for generation 

338 

339 Yields: 

340 Text chunks as they arrive 

341 """ 

342 if not self.client: 

343 yield "LLM client not initialized. Please set API keys." 

344 return 

345 

346 try: 

347 if self.provider == LLMProvider.OPENAI.value: 

348 yield from self._chat_streaming_openai(messages, system_prompt, max_tokens, temperature) 

349 elif self.provider == LLMProvider.ANTHROPIC.value: 

350 yield from self._chat_streaming_anthropic(messages, system_prompt, max_tokens, temperature) 

351 elif self.provider == LLMProvider.OLLAMA.value: 

352 yield from self._chat_streaming_ollama(messages, system_prompt, max_tokens, temperature) 

353 else: 

354 yield "Unsupported provider for streaming chat." 

355 except Exception as e: 

356 logger.error(f"Streaming chat failed: {e}", exc_info=True) 

357 yield f"\n\nError: {e}" 

358 

359 def _chat_openai(self, messages, system_prompt, max_tokens, temperature) -> str: 

360 """Chat with OpenAI (non-streaming).""" 

361 chat_messages = [] 

362 if system_prompt: 

363 chat_messages.append({"role": "system", "content": system_prompt}) 

364 chat_messages.extend(messages) 

365 

366 response = self.client.chat.completions.create( 

367 model=self.model, 

368 messages=chat_messages, 

369 max_tokens=max_tokens, 

370 temperature=temperature 

371 ) 

372 return response.choices[0].message.content 

373 

374 def _chat_anthropic(self, messages, system_prompt, max_tokens, temperature) -> str: 

375 """Chat with Anthropic (non-streaming).""" 

376 kwargs = { 

377 "model": self.model, 

378 "messages": messages, 

379 "max_tokens": max_tokens, 

380 "temperature": temperature 

381 } 

382 if system_prompt: 

383 kwargs["system"] = system_prompt 

384 

385 response = self.client.messages.create(**kwargs) 

386 return response.content[0].text 

387 

388 def _chat_ollama(self, messages, system_prompt, max_tokens, temperature) -> str: 

389 """Chat with Ollama (non-streaming).""" 

390 chat_messages = [] 

391 if system_prompt: 

392 chat_messages.append({"role": "system", "content": system_prompt}) 

393 chat_messages.extend(messages) 

394 

395 response = self.client.chat( 

396 model=self.model, 

397 messages=chat_messages 

398 ) 

399 return response['message']['content'] 

400 

401 def _chat_streaming_openai(self, messages, system_prompt, max_tokens, temperature): 

402 """Chat with OpenAI (streaming).""" 

403 chat_messages = [] 

404 if system_prompt: 

405 chat_messages.append({"role": "system", "content": system_prompt}) 

406 chat_messages.extend(messages) 

407 

408 stream = self.client.chat.completions.create( 

409 model=self.model, 

410 messages=chat_messages, 

411 max_tokens=max_tokens, 

412 temperature=temperature, 

413 stream=True 

414 ) 

415 

416 for chunk in stream: 

417 if chunk.choices[0].delta.content is not None: 

418 yield chunk.choices[0].delta.content 

419 

420 def _chat_streaming_anthropic(self, messages, system_prompt, max_tokens, temperature): 

421 """Chat with Anthropic (streaming).""" 

422 kwargs = { 

423 "model": self.model, 

424 "messages": messages, 

425 "max_tokens": max_tokens, 

426 "temperature": temperature, 

427 "stream": True 

428 } 

429 if system_prompt: 

430 kwargs["system"] = system_prompt 

431 

432 with self.client.messages.stream(**kwargs) as stream: 

433 for text in stream.text_stream: 

434 yield text 

435 

436 def _chat_streaming_ollama(self, messages, system_prompt, max_tokens, temperature): 

437 """Chat with Ollama (streaming).""" 

438 chat_messages = [] 

439 if system_prompt: 

440 chat_messages.append({"role": "system", "content": system_prompt}) 

441 chat_messages.extend(messages) 

442 

443 stream = self.client.chat( 

444 model=self.model, 

445 messages=chat_messages, 

446 stream=True 

447 ) 

448 

449 for chunk in stream: 

450 if chunk['message']['content']: 

451 yield chunk['message']['content'] 

452 

453 

454# Global LLM client instance 

455_llm_client = None 

456 

457 

458def get_llm_client(model: Optional[str] = None) -> LLMClient: 

459 """ 

460 Get or create global LLM client. 

461 

462 Args: 

463 model: Optional model override 

464 

465 Returns: 

466 LLMClient instance 

467 """ 

468 global _llm_client 

469 if _llm_client is None: 

470 _llm_client = LLMClient() 

471 if model and hasattr(_llm_client, 'model'): 

472 _llm_client.model = model 

473 return _llm_client