Coverage for src/alprina_cli/guardrails/output_guardrails.py: 26%
186 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-14 11:27 +0100
1"""
2Output Guardrails
4Sanitize sensitive information from tool outputs.
5Prevent leaking: PII, credentials, internal IPs, file paths, etc.
6"""
8from abc import ABC, abstractmethod
9from typing import Any, Dict, Optional, List
10from pydantic import BaseModel
11from loguru import logger
12import re
15class SanitizationResult(BaseModel):
16 """Result from output sanitization"""
17 sanitized_value: Any
18 redactions_made: int = 0
19 redaction_types: List[str] = []
22class OutputGuardrail(ABC):
23 """
24 Base class for output guardrails.
26 Context Engineering:
27 - Fast sanitization (< 10ms per check)
28 - Preserve data utility while removing sensitive info
29 - Track what was redacted for audit logs
30 """
32 name: str = "OutputGuardrail"
34 @abstractmethod
35 def sanitize(self, value: Any) -> SanitizationResult:
36 """
37 Sanitize output value.
39 Args:
40 value: Output value to sanitize
42 Returns:
43 SanitizationResult with sanitized value and redaction info
44 """
45 raise NotImplementedError
48class PIIScrubber(OutputGuardrail):
49 """
50 Scrub Personally Identifiable Information from outputs.
52 Patterns detected:
53 - Email addresses
54 - Phone numbers (US/International)
55 - Social Security Numbers
56 - Credit card numbers
57 - IP addresses (when configured)
58 """
60 name: str = "PIIScrubber"
62 # PII patterns
63 EMAIL_PATTERN = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
64 # Phone pattern to match various formats: 555-123-4567, (555) 123-4567, 5551234567, +1-555-123-4567
65 PHONE_PATTERN = r'\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})\b'
66 SSN_PATTERN = r'\b\d{3}-\d{2}-\d{4}\b'
67 CREDIT_CARD_PATTERN = r'\b(?:\d{4}[-\s]?){3}\d{4}\b'
69 def __init__(self, scrub_emails: bool = True, scrub_phones: bool = True,
70 scrub_ssn: bool = True, scrub_credit_cards: bool = True):
71 self.scrub_emails = scrub_emails
72 self.scrub_phones = scrub_phones
73 self.scrub_ssn = scrub_ssn
74 self.scrub_credit_cards = scrub_credit_cards
76 def sanitize(self, value: Any) -> SanitizationResult:
77 """Scrub PII from value"""
78 if not isinstance(value, str):
79 return SanitizationResult(sanitized_value=value, redactions_made=0)
81 sanitized = value
82 redactions = 0
83 redaction_types = []
85 # Scrub emails
86 if self.scrub_emails:
87 emails_found = re.findall(self.EMAIL_PATTERN, sanitized)
88 if emails_found:
89 sanitized = re.sub(self.EMAIL_PATTERN, '[EMAIL_REDACTED]', sanitized)
90 redactions += len(emails_found)
91 redaction_types.append("email")
92 logger.debug(f"Redacted {len(emails_found)} email(s)")
94 # Scrub phone numbers
95 if self.scrub_phones:
96 phones_found = re.findall(self.PHONE_PATTERN, sanitized)
97 if phones_found:
98 sanitized = re.sub(self.PHONE_PATTERN, '[PHONE_REDACTED]', sanitized)
99 redactions += len(phones_found)
100 redaction_types.append("phone")
101 logger.debug(f"Redacted {len(phones_found)} phone number(s)")
103 # Scrub SSNs
104 if self.scrub_ssn:
105 ssns_found = re.findall(self.SSN_PATTERN, sanitized)
106 if ssns_found:
107 sanitized = re.sub(self.SSN_PATTERN, '[SSN_REDACTED]', sanitized)
108 redactions += len(ssns_found)
109 redaction_types.append("ssn")
110 logger.debug(f"Redacted {len(ssns_found)} SSN(s)")
112 # Scrub credit cards
113 if self.scrub_credit_cards:
114 cards_found = re.findall(self.CREDIT_CARD_PATTERN, sanitized)
115 if cards_found:
116 sanitized = re.sub(self.CREDIT_CARD_PATTERN, '[CREDIT_CARD_REDACTED]', sanitized)
117 redactions += len(cards_found)
118 redaction_types.append("credit_card")
119 logger.debug(f"Redacted {len(cards_found)} credit card(s)")
121 return SanitizationResult(
122 sanitized_value=sanitized,
123 redactions_made=redactions,
124 redaction_types=redaction_types
125 )
128class CredentialFilter(OutputGuardrail):
129 """
130 Filter credentials and secrets from outputs.
132 Patterns detected:
133 - API keys (common formats)
134 - AWS credentials
135 - JWT tokens
136 - Password patterns
137 - Private keys
138 - OAuth tokens
139 """
141 name: str = "CredentialFilter"
143 # Credential patterns
144 PATTERNS = [
145 (r'api[_-]?key[_-]?[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?', 'api_key'),
146 (r'AKIA[0-9A-Z]{16}', 'aws_access_key'),
147 (r'aws[_-]?secret[_-]?[=:]\s*["\']?([a-zA-Z0-9/+=]{40})["\']?', 'aws_secret'),
148 (r'eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*', 'jwt_token'),
149 (r'password[_-]?[=:]\s*["\']?([^\s"\']{8,})["\']?', 'password'),
150 (r'passwd[_-]?[=:]\s*["\']?([^\s"\']{8,})["\']?', 'password'),
151 (r'token[_-]?[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?', 'token'),
152 (r'-----BEGIN (RSA |DSA )?PRIVATE KEY-----', 'private_key'),
153 (r'-----BEGIN OPENSSH PRIVATE KEY-----', 'ssh_key'),
154 (r'oauth[_-]?token[_-]?[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?', 'oauth_token'),
155 (r'gh[pousr]_[A-Za-z0-9_]{36,}', 'github_token'),
156 (r'sk_live_[a-zA-Z0-9]{24,}', 'stripe_key'),
157 (r'AIza[0-9A-Za-z_\-]{35}', 'google_api_key'),
158 (r'SK[a-zA-Z0-9]{32}', 'twilio_key'),
159 ]
161 def sanitize(self, value: Any) -> SanitizationResult:
162 """Filter credentials from value"""
163 if not isinstance(value, str):
164 return SanitizationResult(sanitized_value=value, redactions_made=0)
166 sanitized = value
167 redactions = 0
168 redaction_types = []
170 # Check each credential pattern
171 for pattern, cred_type in self.PATTERNS:
172 matches = re.findall(pattern, sanitized, re.IGNORECASE)
173 if matches:
174 sanitized = re.sub(pattern, f'[{cred_type.upper()}_REDACTED]', sanitized, flags=re.IGNORECASE)
175 redactions += len(matches) if isinstance(matches[0], str) else len(matches)
176 if cred_type not in redaction_types:
177 redaction_types.append(cred_type)
178 logger.warning(f"Redacted {cred_type} from output")
180 return SanitizationResult(
181 sanitized_value=sanitized,
182 redactions_made=redactions,
183 redaction_types=redaction_types
184 )
187class IPRedactor(OutputGuardrail):
188 """
189 Redact internal IP addresses and hostnames.
191 Patterns redacted:
192 - Private IP ranges (10.x, 172.16-31.x, 192.168.x)
193 - IPv6 private addresses
194 - Internal hostnames
195 - MAC addresses (optional)
196 """
198 name: str = "IPRedactor"
200 # IP patterns
201 PRIVATE_IP_PATTERNS = [
202 r'\b10\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', # 10.x.x.x
203 r'\b172\.(1[6-9]|2[0-9]|3[0-1])\.\d{1,3}\.\d{1,3}\b', # 172.16-31.x.x
204 r'\b192\.168\.\d{1,3}\.\d{1,3}\b', # 192.168.x.x
205 r'\b127\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', # 127.x.x.x (loopback)
206 ]
208 IPV6_PRIVATE_PATTERN = r'\bfe80:[0-9a-fA-F:]+\b' # IPv6 link-local
209 MAC_ADDRESS_PATTERN = r'\b([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})\b'
211 def __init__(self, redact_private_ips: bool = True, redact_ipv6: bool = True,
212 redact_mac: bool = False):
213 self.redact_private_ips = redact_private_ips
214 self.redact_ipv6 = redact_ipv6
215 self.redact_mac = redact_mac
217 def sanitize(self, value: Any) -> SanitizationResult:
218 """Redact IPs from value"""
219 if not isinstance(value, str):
220 return SanitizationResult(sanitized_value=value, redactions_made=0)
222 sanitized = value
223 redactions = 0
224 redaction_types = []
226 # Redact private IPs
227 if self.redact_private_ips:
228 for pattern in self.PRIVATE_IP_PATTERNS:
229 ips_found = re.findall(pattern, sanitized)
230 if ips_found:
231 sanitized = re.sub(pattern, '[IP_REDACTED]', sanitized)
232 redactions += len(ips_found)
233 if "private_ip" not in redaction_types:
234 redaction_types.append("private_ip")
236 # Redact IPv6
237 if self.redact_ipv6:
238 ipv6_found = re.findall(self.IPV6_PRIVATE_PATTERN, sanitized)
239 if ipv6_found:
240 sanitized = re.sub(self.IPV6_PRIVATE_PATTERN, '[IPV6_REDACTED]', sanitized)
241 redactions += len(ipv6_found)
242 redaction_types.append("ipv6")
244 # Redact MAC addresses
245 if self.redact_mac:
246 mac_found = re.findall(self.MAC_ADDRESS_PATTERN, sanitized)
247 if mac_found:
248 sanitized = re.sub(self.MAC_ADDRESS_PATTERN, '[MAC_REDACTED]', sanitized)
249 redactions += len(mac_found)
250 redaction_types.append("mac_address")
252 if redactions > 0:
253 logger.debug(f"Redacted {redactions} IP/MAC address(es)")
255 return SanitizationResult(
256 sanitized_value=sanitized,
257 redactions_made=redactions,
258 redaction_types=redaction_types
259 )
262class PathSanitizer(OutputGuardrail):
263 """
264 Sanitize sensitive file paths from outputs.
266 Patterns sanitized:
267 - User home directories
268 - System paths
269 - Windows paths with usernames
270 - Temporary file paths with usernames
271 """
273 name: str = "PathSanitizer"
275 # Path patterns
276 PATTERNS = [
277 (r'/home/([^/\s]+)', '/home/[USER]'),
278 (r'/Users/([^/\s]+)', '/Users/[USER]'),
279 (r'C:\\Users\\([^\\]+)', r'C:\\Users\\[USER]'),
280 (r'/tmp/([^/\s]+)', '/tmp/[USER]'),
281 (r'/var/tmp/([^/\s]+)', '/var/tmp/[USER]'),
282 ]
284 def __init__(self, sanitize_user_paths: bool = True):
285 self.sanitize_user_paths = sanitize_user_paths
287 def sanitize(self, value: Any) -> SanitizationResult:
288 """Sanitize paths from value"""
289 if not isinstance(value, str):
290 return SanitizationResult(sanitized_value=value, redactions_made=0)
292 if not self.sanitize_user_paths:
293 return SanitizationResult(sanitized_value=value, redactions_made=0)
295 sanitized = value
296 redactions = 0
297 redaction_types = []
299 # Sanitize each path pattern
300 for pattern, replacement in self.PATTERNS:
301 matches = re.findall(pattern, sanitized)
302 if matches:
303 sanitized = re.sub(pattern, replacement, sanitized)
304 redactions += len(matches)
305 if "user_path" not in redaction_types:
306 redaction_types.append("user_path")
308 if redactions > 0:
309 logger.debug(f"Sanitized {redactions} user path(s)")
311 return SanitizationResult(
312 sanitized_value=sanitized,
313 redactions_made=redactions,
314 redaction_types=redaction_types
315 )
318# Default output guardrails chain
319DEFAULT_OUTPUT_GUARDRAILS = [
320 PIIScrubber(),
321 CredentialFilter(),
322 IPRedactor(redact_private_ips=True, redact_ipv6=False, redact_mac=False),
323 PathSanitizer()
324]
327def sanitize_output(
328 value: Any,
329 guardrails: Optional[List[OutputGuardrail]] = None
330) -> SanitizationResult:
331 """
332 Sanitize output through guardrail chain.
334 Args:
335 value: Output value to sanitize
336 guardrails: List of guardrails to apply (defaults to DEFAULT_OUTPUT_GUARDRAILS)
338 Returns:
339 SanitizationResult with sanitized value and redaction summary
340 """
341 if guardrails is None:
342 guardrails = DEFAULT_OUTPUT_GUARDRAILS
344 sanitized = value
345 total_redactions = 0
346 all_redaction_types = []
348 # Apply each guardrail in sequence
349 for guardrail in guardrails:
350 result = guardrail.sanitize(sanitized)
351 sanitized = result.sanitized_value
352 total_redactions += result.redactions_made
353 all_redaction_types.extend(result.redaction_types)
355 # Remove duplicates from redaction types
356 all_redaction_types = list(set(all_redaction_types))
358 return SanitizationResult(
359 sanitized_value=sanitized,
360 redactions_made=total_redactions,
361 redaction_types=all_redaction_types
362 )
365def sanitize_dict(
366 data: Dict[str, Any],
367 guardrails: Optional[List[OutputGuardrail]] = None
368) -> tuple[Dict[str, Any], int]:
369 """
370 Recursively sanitize all string values in a dictionary.
372 Args:
373 data: Dictionary to sanitize
374 guardrails: List of guardrails to apply
376 Returns:
377 Tuple of (sanitized_dict, total_redactions)
378 """
379 sanitized = {}
380 total_redactions = 0
382 for key, value in data.items():
383 if isinstance(value, str):
384 result = sanitize_output(value, guardrails)
385 sanitized[key] = result.sanitized_value
386 total_redactions += result.redactions_made
387 elif isinstance(value, dict):
388 sanitized[key], redactions = sanitize_dict(value, guardrails)
389 total_redactions += redactions
390 elif isinstance(value, list):
391 sanitized[key], redactions = sanitize_list(value, guardrails)
392 total_redactions += redactions
393 else:
394 sanitized[key] = value
396 return sanitized, total_redactions
399def sanitize_list(
400 data: List[Any],
401 guardrails: Optional[List[OutputGuardrail]] = None
402) -> tuple[List[Any], int]:
403 """
404 Recursively sanitize all string values in a list.
406 Args:
407 data: List to sanitize
408 guardrails: List of guardrails to apply
410 Returns:
411 Tuple of (sanitized_list, total_redactions)
412 """
413 sanitized = []
414 total_redactions = 0
416 for item in data:
417 if isinstance(item, str):
418 result = sanitize_output(item, guardrails)
419 sanitized.append(result.sanitized_value)
420 total_redactions += result.redactions_made
421 elif isinstance(item, dict):
422 sanitized_item, redactions = sanitize_dict(item, guardrails)
423 sanitized.append(sanitized_item)
424 total_redactions += redactions
425 elif isinstance(item, list):
426 sanitized_item, redactions = sanitize_list(item, guardrails)
427 sanitized.append(sanitized_item)
428 total_redactions += redactions
429 else:
430 sanitized.append(item)
432 return sanitized, total_redactions