Coverage for src/alprina_cli/tools/file/grep.py: 26%

90 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Grep Tool - Content Search 

3 

4Context Engineering: 

5- Search file contents for patterns 

6- Limits results to prevent context bloat 

7- Returns relevant context around matches 

8- Supports regex patterns 

9 

10Based on: Kimi-CLI Grep tool (simplified, no ripgrep dependency) 

11""" 

12 

13import re 

14from pathlib import Path 

15from typing import List, Dict, Any, Literal 

16from pydantic import BaseModel, Field 

17from loguru import logger 

18 

19from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError 

20 

21 

22MAX_MATCHES = 100 # Context-efficient limit 

23 

24 

25class GrepParams(BaseModel): 

26 """ 

27 Parameters for grep content search. 

28 

29 Context: Focused schema for pattern searching. 

30 """ 

31 pattern: str = Field( 

32 description="Regular expression pattern to search for" 

33 ) 

34 path: str = Field( 

35 default=".", 

36 description="File or directory to search in" 

37 ) 

38 file_pattern: str = Field( 

39 default="*", 

40 description="Glob pattern to filter files (e.g., '*.py', '*.{js,ts}')" 

41 ) 

42 ignore_case: bool = Field( 

43 default=False, 

44 description="Case-insensitive search" 

45 ) 

46 context_lines: int = Field( 

47 default=0, 

48 description="Number of lines to show before/after match (0 = match only)" 

49 ) 

50 max_matches: int = Field( 

51 default=MAX_MATCHES, 

52 description=f"Maximum matches to return (default: {MAX_MATCHES})" 

53 ) 

54 output_mode: Literal["content", "files_only", "count"] = Field( 

55 default="files_only", 

56 description="Output mode: content (show matches), files_only (just paths), count (count matches)" 

57 ) 

58 

59 

60class GrepTool(AlprinaToolBase[GrepParams]): 

61 """ 

62 Search file contents for patterns. 

63 

64 Context Engineering Benefits: 

65 - Just-in-time content discovery 

66 - Configurable max_matches to control context 

67 - Context lines for relevant code 

68 - Multiple output modes for different needs 

69 

70 Output Modes: 

71 - files_only: Just file paths (minimal context) 

72 - content: Matching lines with context 

73 - count: Number of matches (most compact) 

74 

75 Usage: 

76 ```python 

77 tool = GrepTool() 

78 result = await tool.execute(GrepParams( 

79 pattern="def.*scan", 

80 path="./src", 

81 file_pattern="*.py", 

82 output_mode="files_only" 

83 )) 

84 ``` 

85 """ 

86 

87 name: str = "Grep" 

88 description: str = """Search file contents for regex patterns. 

89 

90Capabilities: 

91- Regex pattern matching 

92- File filtering by glob pattern 

93- Case-sensitive/insensitive search 

94- Context lines around matches 

95- Multiple output modes 

96 

97Returns: Matches based on output_mode (files_only, content, or count)""" 

98 params: type[GrepParams] = GrepParams 

99 

100 async def execute(self, params: GrepParams) -> ToolOk | ToolError: 

101 """ 

102 Execute grep search. 

103 

104 Context: Returns limited results based on output_mode. 

105 """ 

106 logger.debug(f"Grep: '{params.pattern}' in {params.path}") 

107 

108 try: 

109 # Compile regex pattern 

110 flags = re.IGNORECASE if params.ignore_case else 0 

111 try: 

112 regex = re.compile(params.pattern, flags) 

113 except re.error as e: 

114 return ToolError( 

115 message=f"Invalid regex pattern: {str(e)}", 

116 brief="Invalid pattern" 

117 ) 

118 

119 # Resolve path 

120 search_path = Path(params.path).expanduser() 

121 if not search_path.is_absolute(): 

122 search_path = Path.cwd() / search_path 

123 

124 if not search_path.exists(): 

125 return ToolError( 

126 message=f"Path not found: {params.path}", 

127 brief="Path not found" 

128 ) 

129 

130 # Perform search 

131 if search_path.is_file(): 

132 results = self._search_file(search_path, regex, params) 

133 else: 

134 results = self._search_directory(search_path, regex, params) 

135 

136 # Format output based on mode 

137 return self._format_results(results, params) 

138 

139 except Exception as e: 

140 logger.error(f"Grep search failed: {e}") 

141 return ToolError( 

142 message=f"Grep search failed: {str(e)}", 

143 brief="Grep failed" 

144 ) 

145 

146 def _search_file( 

147 self, 

148 file_path: Path, 

149 regex: re.Pattern, 

150 params: GrepParams 

151 ) -> List[Dict[str, Any]]: 

152 """Search a single file""" 

153 try: 

154 content = file_path.read_text(errors="ignore") 

155 lines = content.splitlines() 

156 

157 matches = [] 

158 for line_num, line in enumerate(lines, 1): 

159 if regex.search(line): 

160 match_data = { 

161 "file": str(file_path), 

162 "line_number": line_num, 

163 "line": line, 

164 "context_before": [], 

165 "context_after": [] 

166 } 

167 

168 # Add context lines if requested 

169 if params.context_lines > 0: 

170 start = max(0, line_num - params.context_lines - 1) 

171 end = min(len(lines), line_num + params.context_lines) 

172 

173 match_data["context_before"] = lines[start:line_num-1] 

174 match_data["context_after"] = lines[line_num:end] 

175 

176 matches.append(match_data) 

177 

178 return matches 

179 

180 except Exception as e: 

181 logger.warning(f"Could not read {file_path}: {e}") 

182 return [] 

183 

184 def _search_directory( 

185 self, 

186 dir_path: Path, 

187 regex: re.Pattern, 

188 params: GrepParams 

189 ) -> List[Dict[str, Any]]: 

190 """Search all files in directory matching file_pattern""" 

191 all_matches = [] 

192 

193 # Find matching files 

194 try: 

195 files = dir_path.rglob(params.file_pattern) 

196 except Exception: 

197 files = dir_path.glob(params.file_pattern) 

198 

199 for file_path in files: 

200 if file_path.is_file(): 

201 file_matches = self._search_file(file_path, regex, params) 

202 all_matches.extend(file_matches) 

203 

204 # Stop if we hit max_matches 

205 if len(all_matches) >= params.max_matches: 

206 break 

207 

208 return all_matches[:params.max_matches] 

209 

210 def _format_results( 

211 self, 

212 results: List[Dict[str, Any]], 

213 params: GrepParams 

214 ) -> ToolOk: 

215 """Format results based on output_mode""" 

216 

217 if not results: 

218 return ToolOk( 

219 content={"matches": []}, 

220 output="No matches found", 

221 metadata={"message": "No matches found"} 

222 ) 

223 

224 if params.output_mode == "count": 

225 # Just count matches 

226 count = len(results) 

227 return ToolOk( 

228 content={"count": count}, 

229 output=f"{count} matches found", 

230 metadata={"message": f"Found {count} matches"} 

231 ) 

232 

233 elif params.output_mode == "files_only": 

234 # Just unique file paths 

235 files = list(set(r["file"] for r in results)) 

236 files.sort() 

237 

238 return ToolOk( 

239 content={"files": files, "total_matches": len(results)}, 

240 output="\n".join(files), 

241 metadata={"message": f"Found matches in {len(files)} files"} 

242 ) 

243 

244 else: # content mode 

245 # Show full matches with context 

246 output_lines = [] 

247 

248 for match in results: 

249 output_lines.append(f"\n{match['file']}:{match['line_number']}") 

250 output_lines.append("-" * 40) 

251 

252 # Context before 

253 for line in match["context_before"]: 

254 output_lines.append(f" {line}") 

255 

256 # The matching line 

257 output_lines.append(f"> {match['line']}") 

258 

259 # Context after 

260 for line in match["context_after"]: 

261 output_lines.append(f" {line}") 

262 

263 return ToolOk( 

264 content={ 

265 "matches": results, 

266 "total": len(results) 

267 }, 

268 output="\n".join(output_lines), 

269 metadata={"message": f"Found {len(results)} matches"} 

270 )