"""INTEGRIUM Sanitizer. Author: Juste Elysée MALANDILA"""
import re
import html


def sanitize(data: str, mode: str = "default") -> str:
    """
    Advanced data sanitization with multiple modes.
    
    Args:
        data: String to sanitize
        mode: Sanitization mode (default, email, url, xss, sql, phone)
        
    Returns:
        Sanitized string
    """
    if not isinstance(data, str):
        data = str(data)
    
    if mode == "default":
        # Basic cleaning: strip and lowercase
        return data.strip().lower()
    
    elif mode == "email":
        # Email normalization
        email = data.strip().lower()
        # Remove comments from email
        email = re.sub(r'\([^)]*\)', '', email)
        # Remove extra whitespace
        email = re.sub(r'\s+', '', email)
        return email
    
    elif mode == "url":
        # URL cleaning
        url = data.strip()
        # Remove whitespace
        url = re.sub(r'\s+', '', url)
        # Ensure protocol
        if not url.startswith(('http://', 'https://', 'ftp://')):
            url = 'https://' + url
        return url
    
    elif mode == "xss":
        # XSS protection - Remove dangerous patterns FIRST
        cleaned = data
        
        # Remove script tags
        cleaned = re.sub(
            r'<script[^>]*>.*?</script>', 
            '', 
            cleaned, 
            flags=re.DOTALL | re.IGNORECASE
        )
        # Remove event handlers
        cleaned = re.sub(
            r'on\w+\s*=\s*["\'][^"\']*["\']', 
            '', 
            cleaned,
            flags=re.IGNORECASE
        )
        # Remove javascript: protocol
        cleaned = re.sub(
            r'javascript:', 
            '', 
            cleaned,
            flags=re.IGNORECASE
        )
        
        # Then HTML escape what remains
        cleaned = html.escape(cleaned)
        return cleaned
    
    elif mode == "sql":
        # SQL injection prevention
        escaped = data.replace("'", "''")
        # Remove dangerous keywords
        dangerous = ['DROP', 'DELETE', 'TRUNCATE', 'ALTER', 'EXEC']
        for keyword in dangerous:
            escaped = re.sub(
                rf';\s*{keyword}\s+',
                '',
                escaped,
                flags=re.IGNORECASE
            )
        return escaped
    
    elif mode == "phone":
        # Phone number cleaning - keep only digits and +
        cleaned = re.sub(r'[^\d+]', '', data)
        return cleaned
    
    elif mode == "alphanumeric":
        # Keep only alphanumeric characters
        return re.sub(r'[^a-zA-Z0-9]', '', data)
    
    elif mode == "alpha":
        # Keep only letters
        return re.sub(r'[^a-zA-Z]', '', data)
    
    elif mode == "numeric":
        # Keep only numbers
        return re.sub(r'[^\d]', '', data)
    
    return data


def strip_html(data: str) -> str:
    """Remove all HTML tags from string."""
    return re.sub(r'<[^>]+>', '', data)


def normalize_whitespace(data: str) -> str:
    """Normalize whitespace to single spaces."""
    return re.sub(r'\s+', ' ', data).strip()


def truncate(data: str, max_length: int, suffix: str = "...") -> str:
    """
    Truncate string to max length.
    
    Args:
        data: String to truncate
        max_length: Maximum length
        suffix: Suffix to add if truncated
        
    Returns:
        Truncated string
    """
    if len(data) <= max_length:
        return data
    
    return data[:max_length - len(suffix)] + suffix
