Coverage for src/alprina_cli/tools/file/grep.py: 26%
90 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
1"""
2Grep Tool - Content Search
4Context Engineering:
5- Search file contents for patterns
6- Limits results to prevent context bloat
7- Returns relevant context around matches
8- Supports regex patterns
10Based on: Kimi-CLI Grep tool (simplified, no ripgrep dependency)
11"""
13import re
14from pathlib import Path
15from typing import List, Dict, Any, Literal
16from pydantic import BaseModel, Field
17from loguru import logger
19from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError
22MAX_MATCHES = 100 # Context-efficient limit
25class GrepParams(BaseModel):
26 """
27 Parameters for grep content search.
29 Context: Focused schema for pattern searching.
30 """
31 pattern: str = Field(
32 description="Regular expression pattern to search for"
33 )
34 path: str = Field(
35 default=".",
36 description="File or directory to search in"
37 )
38 file_pattern: str = Field(
39 default="*",
40 description="Glob pattern to filter files (e.g., '*.py', '*.{js,ts}')"
41 )
42 ignore_case: bool = Field(
43 default=False,
44 description="Case-insensitive search"
45 )
46 context_lines: int = Field(
47 default=0,
48 description="Number of lines to show before/after match (0 = match only)"
49 )
50 max_matches: int = Field(
51 default=MAX_MATCHES,
52 description=f"Maximum matches to return (default: {MAX_MATCHES})"
53 )
54 output_mode: Literal["content", "files_only", "count"] = Field(
55 default="files_only",
56 description="Output mode: content (show matches), files_only (just paths), count (count matches)"
57 )
60class GrepTool(AlprinaToolBase[GrepParams]):
61 """
62 Search file contents for patterns.
64 Context Engineering Benefits:
65 - Just-in-time content discovery
66 - Configurable max_matches to control context
67 - Context lines for relevant code
68 - Multiple output modes for different needs
70 Output Modes:
71 - files_only: Just file paths (minimal context)
72 - content: Matching lines with context
73 - count: Number of matches (most compact)
75 Usage:
76 ```python
77 tool = GrepTool()
78 result = await tool.execute(GrepParams(
79 pattern="def.*scan",
80 path="./src",
81 file_pattern="*.py",
82 output_mode="files_only"
83 ))
84 ```
85 """
87 name: str = "Grep"
88 description: str = """Search file contents for regex patterns.
90Capabilities:
91- Regex pattern matching
92- File filtering by glob pattern
93- Case-sensitive/insensitive search
94- Context lines around matches
95- Multiple output modes
97Returns: Matches based on output_mode (files_only, content, or count)"""
98 params: type[GrepParams] = GrepParams
100 async def execute(self, params: GrepParams) -> ToolOk | ToolError:
101 """
102 Execute grep search.
104 Context: Returns limited results based on output_mode.
105 """
106 logger.debug(f"Grep: '{params.pattern}' in {params.path}")
108 try:
109 # Compile regex pattern
110 flags = re.IGNORECASE if params.ignore_case else 0
111 try:
112 regex = re.compile(params.pattern, flags)
113 except re.error as e:
114 return ToolError(
115 message=f"Invalid regex pattern: {str(e)}",
116 brief="Invalid pattern"
117 )
119 # Resolve path
120 search_path = Path(params.path).expanduser()
121 if not search_path.is_absolute():
122 search_path = Path.cwd() / search_path
124 if not search_path.exists():
125 return ToolError(
126 message=f"Path not found: {params.path}",
127 brief="Path not found"
128 )
130 # Perform search
131 if search_path.is_file():
132 results = self._search_file(search_path, regex, params)
133 else:
134 results = self._search_directory(search_path, regex, params)
136 # Format output based on mode
137 return self._format_results(results, params)
139 except Exception as e:
140 logger.error(f"Grep search failed: {e}")
141 return ToolError(
142 message=f"Grep search failed: {str(e)}",
143 brief="Grep failed"
144 )
146 def _search_file(
147 self,
148 file_path: Path,
149 regex: re.Pattern,
150 params: GrepParams
151 ) -> List[Dict[str, Any]]:
152 """Search a single file"""
153 try:
154 content = file_path.read_text(errors="ignore")
155 lines = content.splitlines()
157 matches = []
158 for line_num, line in enumerate(lines, 1):
159 if regex.search(line):
160 match_data = {
161 "file": str(file_path),
162 "line_number": line_num,
163 "line": line,
164 "context_before": [],
165 "context_after": []
166 }
168 # Add context lines if requested
169 if params.context_lines > 0:
170 start = max(0, line_num - params.context_lines - 1)
171 end = min(len(lines), line_num + params.context_lines)
173 match_data["context_before"] = lines[start:line_num-1]
174 match_data["context_after"] = lines[line_num:end]
176 matches.append(match_data)
178 return matches
180 except Exception as e:
181 logger.warning(f"Could not read {file_path}: {e}")
182 return []
184 def _search_directory(
185 self,
186 dir_path: Path,
187 regex: re.Pattern,
188 params: GrepParams
189 ) -> List[Dict[str, Any]]:
190 """Search all files in directory matching file_pattern"""
191 all_matches = []
193 # Find matching files
194 try:
195 files = dir_path.rglob(params.file_pattern)
196 except Exception:
197 files = dir_path.glob(params.file_pattern)
199 for file_path in files:
200 if file_path.is_file():
201 file_matches = self._search_file(file_path, regex, params)
202 all_matches.extend(file_matches)
204 # Stop if we hit max_matches
205 if len(all_matches) >= params.max_matches:
206 break
208 return all_matches[:params.max_matches]
210 def _format_results(
211 self,
212 results: List[Dict[str, Any]],
213 params: GrepParams
214 ) -> ToolOk:
215 """Format results based on output_mode"""
217 if not results:
218 return ToolOk(
219 content={"matches": []},
220 output="No matches found",
221 metadata={"message": "No matches found"}
222 )
224 if params.output_mode == "count":
225 # Just count matches
226 count = len(results)
227 return ToolOk(
228 content={"count": count},
229 output=f"{count} matches found",
230 metadata={"message": f"Found {count} matches"}
231 )
233 elif params.output_mode == "files_only":
234 # Just unique file paths
235 files = list(set(r["file"] for r in results))
236 files.sort()
238 return ToolOk(
239 content={"files": files, "total_matches": len(results)},
240 output="\n".join(files),
241 metadata={"message": f"Found matches in {len(files)} files"}
242 )
244 else: # content mode
245 # Show full matches with context
246 output_lines = []
248 for match in results:
249 output_lines.append(f"\n{match['file']}:{match['line_number']}")
250 output_lines.append("-" * 40)
252 # Context before
253 for line in match["context_before"]:
254 output_lines.append(f" {line}")
256 # The matching line
257 output_lines.append(f"> {match['line']}")
259 # Context after
260 for line in match["context_after"]:
261 output_lines.append(f" {line}")
263 return ToolOk(
264 content={
265 "matches": results,
266 "total": len(results)
267 },
268 output="\n".join(output_lines),
269 metadata={"message": f"Found {len(results)} matches"}
270 )