Coverage for src/alprina_cli/tools/file/glob.py: 29%

62 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-14 11:27 +0100

1""" 

2Glob Tool - File Pattern Matching 

3 

4Context Engineering: 

5- Enables just-in-time file discovery 

6- Limits results to prevent context bloat 

7- Safe pattern validation 

8- Returns relative paths (more readable) 

9 

10Based on: Kimi-CLI Glob tool (simplified for Alprina) 

11""" 

12 

13from pathlib import Path 

14from typing import List 

15from pydantic import BaseModel, Field 

16from loguru import logger 

17 

18from alprina_cli.tools.base import AlprinaToolBase, ToolOk, ToolError 

19 

20 

21MAX_MATCHES = 500 # Context-efficient limit 

22 

23 

24class GlobParams(BaseModel): 

25 """ 

26 Parameters for glob file search. 

27 

28 Context: Simple, focused schema for file discovery. 

29 """ 

30 pattern: str = Field( 

31 description="Glob pattern to match files (e.g., '*.py', 'src/**/*.js')" 

32 ) 

33 directory: str = Field( 

34 default=".", 

35 description="Directory to search in (default: current directory)" 

36 ) 

37 include_dirs: bool = Field( 

38 default=False, 

39 description="Include directories in results (default: files only)" 

40 ) 

41 max_results: int = Field( 

42 default=MAX_MATCHES, 

43 description=f"Maximum results to return (default: {MAX_MATCHES})" 

44 ) 

45 

46 

47class GlobTool(AlprinaToolBase[GlobParams]): 

48 """ 

49 File pattern matching tool using glob syntax. 

50 

51 Context Engineering Benefits: 

52 - Just-in-time file discovery (not pre-loading entire codebase) 

53 - Configurable max_results to prevent context bloat 

54 - Returns relative paths (more readable, less tokens) 

55 - Safe pattern validation (prevents ** at root) 

56 

57 Glob Patterns: 

58 - `*.py` - All Python files in directory 

59 - `**/*.py` - All Python files recursively 

60 - `src/**/*.{js,ts}` - JS/TS files in src/ 

61 - `test_*.py` - Files starting with test_ 

62 

63 Usage: 

64 ```python 

65 tool = GlobTool() 

66 result = await tool.execute(GlobParams( 

67 pattern="**/*.py", 

68 directory="./src" 

69 )) 

70 # Returns: List of matching file paths 

71 ``` 

72 """ 

73 

74 name: str = "Glob" 

75 description: str = """Find files matching glob patterns. 

76 

77Capabilities: 

78- Glob pattern matching (*, **, ?, []) 

79- Recursive directory search 

80- File/directory filtering 

81- Result limiting (context control) 

82 

83Returns: List of matching file paths (relative to search directory)""" 

84 params: type[GlobParams] = GlobParams 

85 

86 async def execute(self, params: GlobParams) -> ToolOk | ToolError: 

87 """ 

88 Execute glob file search. 

89 

90 Context: Returns limited, relative paths for efficiency. 

91 """ 

92 logger.debug(f"Glob: {params.pattern} in {params.directory}") 

93 

94 try: 

95 # Validate pattern 

96 error = self._validate_pattern(params.pattern) 

97 if error: 

98 return error 

99 

100 # Resolve directory 

101 dir_path = Path(params.directory).expanduser() 

102 if not dir_path.is_absolute(): 

103 dir_path = Path.cwd() / dir_path 

104 

105 # Validate directory 

106 if not dir_path.exists(): 

107 return ToolError( 

108 message=f"Directory not found: {params.directory}", 

109 brief="Directory not found" 

110 ) 

111 

112 if not dir_path.is_dir(): 

113 return ToolError( 

114 message=f"Not a directory: {params.directory}", 

115 brief="Invalid directory" 

116 ) 

117 

118 # Perform glob search 

119 matches = self._glob_search(dir_path, params.pattern, params.include_dirs) 

120 

121 # Limit results for context efficiency 

122 total_found = len(matches) 

123 if len(matches) > params.max_results: 

124 matches = matches[:params.max_results] 

125 truncated = True 

126 else: 

127 truncated = False 

128 

129 # Convert to relative paths (more readable) 

130 relative_paths = [] 

131 for match in matches: 

132 try: 

133 rel_path = match.relative_to(dir_path) 

134 relative_paths.append(str(rel_path)) 

135 except ValueError: 

136 # If can't make relative, use absolute 

137 relative_paths.append(str(match)) 

138 

139 # Build result message 

140 if total_found == 0: 

141 message = f"No matches found for pattern '{params.pattern}'" 

142 elif truncated: 

143 message = ( 

144 f"Found {total_found} matches for '{params.pattern}'. " 

145 f"Showing first {params.max_results}. " 

146 "Use a more specific pattern or increase max_results." 

147 ) 

148 else: 

149 message = f"Found {total_found} matches for '{params.pattern}'" 

150 

151 return ToolOk( 

152 content={ 

153 "matches": relative_paths, 

154 "total_found": total_found, 

155 "truncated": truncated, 

156 "directory": str(dir_path), 

157 "pattern": params.pattern 

158 }, 

159 output="\n".join(relative_paths) if relative_paths else "(no matches)", 

160 metadata={"message": message} 

161 ) 

162 

163 except Exception as e: 

164 logger.error(f"Glob search failed: {e}") 

165 return ToolError( 

166 message=f"Glob search failed: {str(e)}", 

167 brief="Glob failed" 

168 ) 

169 

170 def _validate_pattern(self, pattern: str) -> ToolError | None: 

171 """ 

172 Validate glob pattern safety. 

173 

174 Context: Prevent patterns that would search too broadly. 

175 """ 

176 # Prevent starting with ** (would search everything) 

177 if pattern.startswith("**"): 

178 return ToolError( 

179 message=( 

180 f"Pattern '{pattern}' starts with '**' which is not allowed. " 

181 "This would recursively search all directories and may be too broad. " 

182 "Use a more specific pattern like 'src/**/*.py' instead." 

183 ), 

184 brief="Unsafe pattern" 

185 ) 

186 

187 # Warn about very broad patterns 

188 if pattern == "*" or pattern == "**/*": 

189 return ToolError( 

190 message=( 

191 f"Pattern '{pattern}' is too broad and would match everything. " 

192 "Use a more specific pattern with file extensions or directory names." 

193 ), 

194 brief="Pattern too broad" 

195 ) 

196 

197 return None 

198 

199 def _glob_search( 

200 self, 

201 directory: Path, 

202 pattern: str, 

203 include_dirs: bool 

204 ) -> List[Path]: 

205 """ 

206 Perform glob search. 

207 

208 Context: Returns sorted list for consistent output. 

209 """ 

210 matches = list(directory.glob(pattern)) 

211 

212 # Filter out directories if not requested 

213 if not include_dirs: 

214 matches = [p for p in matches if p.is_file()] 

215 

216 # Sort for consistent output (helps with context) 

217 matches.sort() 

218 

219 return matches