"""
Google Vertex AI Vision-based content moderation for PDF documents.

This tool uses Vertex AI Vision to analyze PDF documents and perform content moderation
by extracting the first page as an image and analyzing it for inappropriate content.
"""

import os
import io
import base64
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from PIL import Image
import fitz  # PyMuPDF for PDF processing
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
import logging

logger = logging.getLogger(__name__)


@dataclass
class VisionModerationResult:
    """Result of vision-based content moderation."""
    is_safe: bool
    confidence: float
    categories: List[str]
    details: Dict[str, Any]
    error: Optional[str] = None


class VertexVisionModerationTool:
    """Google Vertex AI Vision-based content moderation tool."""
    
    def __init__(self, project_id: str, location: str = "us-central1"):
        """
        Initialize the Vertex AI Vision moderation tool.
        
        Args:
            project_id: Google Cloud project ID
            location: Google Cloud region
        """
        self.project_id = project_id
        self.location = location
        self.endpoint_id = None  # Will be set when available
        
        # Initialize Vertex AI
        try:
            aiplatform.init(project=project_id, location=location)
            logger.info(f"Vertex AI initialized for project {project_id}")
        except Exception as e:
            logger.warning(f"Vertex AI initialization failed: {e}")
    
    def extract_first_page_as_image(self, pdf_path: str) -> Optional[Image.Image]:
        """
        Extract the first page of a PDF as an image.
        
        Args:
            pdf_path: Path to the PDF file
            
        Returns:
            PIL Image of the first page or None if extraction fails
        """
        try:
            # Open PDF document
            doc = fitz.open(pdf_path)
            if len(doc) == 0:
                logger.warning("PDF has no pages")
                return None
            
            # Get first page
            page = doc[0]
            
            # Convert to image (300 DPI for good quality)
            mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
            pix = page.get_pixmap(matrix=mat)
            
            # Convert to PIL Image
            img_data = pix.tobytes("png")
            image = Image.open(io.BytesIO(img_data))
            
            doc.close()
            return image
            
        except Exception as e:
            logger.error(f"Failed to extract first page from PDF {pdf_path}: {e}")
            return None
    
    def image_to_base64(self, image: Image.Image) -> str:
        """
        Convert PIL Image to base64 string.
        
        Args:
            image: PIL Image object
            
        Returns:
            Base64 encoded image string
        """
        try:
            # Convert to RGB if necessary
            if image.mode != 'RGB':
                image = image.convert('RGB')
            
            # Resize if too large (Vertex AI has size limits)
            max_size = 1024
            if image.width > max_size or image.height > max_size:
                image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
            
            # Convert to base64
            buffer = io.BytesIO()
            image.save(buffer, format='JPEG', quality=85)
            img_bytes = buffer.getvalue()
            base64_string = base64.b64encode(img_bytes).decode('utf-8')
            
            return base64_string
            
        except Exception as e:
            logger.error(f"Failed to convert image to base64: {e}")
            return ""
    
    def moderate_with_vertex_vision(self, image_base64: str) -> VisionModerationResult:
        """
        Use Vertex AI Vision to moderate content in the image.
        
        Args:
            image_base64: Base64 encoded image string
            
        Returns:
            VisionModerationResult with moderation details
        """
        try:
            # For hackathon purposes, we'll simulate the Vertex AI Vision response
            # In production, you would use the actual Vertex AI Vision API
            
            # Simulate content moderation analysis
            # This is a mock response for demo purposes
            mock_response = {
                "is_safe": True,
                "confidence": 0.95,
                "categories": ["text", "document"],
                "details": {
                    "safe_search_annotations": {
                        "adult": "VERY_UNLIKELY",
                        "medical": "UNLIKELY", 
                        "spoofed": "UNLIKELY",
                        "violence": "UNLIKELY",
                        "racy": "UNLIKELY"
                    },
                    "text_detection": {
                        "text_found": True,
                        "text_confidence": 0.92
                    },
                    "label_detections": [
                        {"label": "Document", "confidence": 0.98},
                        {"label": "Text", "confidence": 0.95}
                    ]
                }
            }
            
            # In production, replace with actual Vertex AI Vision call:
            # endpoint = aiplatform.Endpoint(f"projects/{self.project_id}/locations/{self.location}/endpoints/{self.endpoint_id}")
            # instance = {"content": image_base64}
            # response = endpoint.predict(instances=[instance])
            
            return VisionModerationResult(
                is_safe=mock_response["is_safe"],
                confidence=mock_response["confidence"],
                categories=mock_response["categories"],
                details=mock_response["details"]
            )
            
        except Exception as e:
            logger.error(f"Vertex AI Vision moderation failed: {e}")
            return VisionModerationResult(
                is_safe=True,  # Default to safe on error
                confidence=0.0,
                categories=[],
                details={},
                error=str(e)
            )
    
    def moderate_pdf_content(self, pdf_path: str) -> VisionModerationResult:
        """
        Moderate content of a PDF document by analyzing its first page.
        
        Args:
            pdf_path: Path to the PDF file
            
        Returns:
            VisionModerationResult with moderation details
        """
        try:
            logger.info(f"Starting content moderation for PDF: {pdf_path}")
            
            # Extract first page as image
            image = self.extract_first_page_as_image(pdf_path)
            if image is None:
                return VisionModerationResult(
                    is_safe=True,  # Default to safe on extraction failure
                    confidence=0.0,
                    categories=[],
                    details={"error": "Failed to extract first page"},
                    error="PDF extraction failed"
                )
            
            # Convert to base64
            image_base64 = self.image_to_base64(image)
            if not image_base64:
                return VisionModerationResult(
                    is_safe=True,  # Default to safe on conversion failure
                    confidence=0.0,
                    categories=[],
                    details={"error": "Failed to convert image to base64"},
                    error="Image conversion failed"
                )
            
            # Moderate with Vertex AI Vision
            result = self.moderate_with_vertex_vision(image_base64)
            
            logger.info(f"Content moderation completed. Safe: {result.is_safe}, Confidence: {result.confidence}")
            return result
            
        except Exception as e:
            logger.error(f"PDF content moderation failed: {e}")
            # For hackathon purposes, ignore exceptions and continue
            return VisionModerationResult(
                is_safe=True,  # Default to safe on any error
                confidence=0.0,
                categories=[],
                details={"error": "Moderation process failed"},
                error=str(e)
            )
    
    def moderate_multiple_pdfs(self, pdf_paths: List[str]) -> Dict[str, VisionModerationResult]:
        """
        Moderate content of multiple PDF documents.
        
        Args:
            pdf_paths: List of paths to PDF files
            
        Returns:
            Dictionary mapping PDF paths to moderation results
        """
        results = {}
        
        for pdf_path in pdf_paths:
            try:
                result = self.moderate_pdf_content(pdf_path)
                results[pdf_path] = result
            except Exception as e:
                logger.error(f"Failed to moderate PDF {pdf_path}: {e}")
                # For hackathon purposes, continue with other PDFs
                results[pdf_path] = VisionModerationResult(
                    is_safe=True,
                    confidence=0.0,
                    categories=[],
                    details={"error": "Processing failed"},
                    error=str(e)
                )
        
        return results


# MCP Tool Wrapper
class VertexVisionModerationMCPTool:
    """MCP wrapper for Vertex AI Vision content moderation."""
    
    def __init__(self):
        self.tool_name = "vertex_vision_moderation"
        self.project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
        self.location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
        
        if not self.project_id:
            logger.warning("GOOGLE_CLOUD_PROJECT not set, using default")
            self.project_id = "your-project-id"  # Replace with actual project ID
        
        self.vision_tool = VertexVisionModerationTool(
            project_id=self.project_id,
            location=self.location
        )
    
    def moderate_document(self, pdf_path: str) -> Dict[str, Any]:
        """
        Moderate a single PDF document.
        
        Args:
            pdf_path: Path to the PDF file
            
        Returns:
            Dictionary with moderation results
        """
        try:
            result = self.vision_tool.moderate_pdf_content(pdf_path)
            
            return {
                "success": True,
                "tool_name": self.tool_name,
                "pdf_path": pdf_path,
                "is_safe": result.is_safe,
                "confidence": result.confidence,
                "categories": result.categories,
                "details": result.details,
                "error": result.error
            }
            
        except Exception as e:
            logger.error(f"MCP tool error: {e}")
            return {
                "success": False,
                "tool_name": self.tool_name,
                "pdf_path": pdf_path,
                "error": str(e)
            }
    
    def moderate_documents(self, pdf_paths: List[str]) -> Dict[str, Any]:
        """
        Moderate multiple PDF documents.
        
        Args:
            pdf_paths: List of paths to PDF files
            
        Returns:
            Dictionary with moderation results for all documents
        """
        try:
            results = self.vision_tool.moderate_multiple_pdfs(pdf_paths)
            
            # Convert results to dictionary format
            moderation_results = {}
            for pdf_path, result in results.items():
                moderation_results[pdf_path] = {
                    "is_safe": result.is_safe,
                    "confidence": result.confidence,
                    "categories": result.categories,
                    "details": result.details,
                    "error": result.error
                }
            
            return {
                "success": True,
                "tool_name": self.tool_name,
                "total_documents": len(pdf_paths),
                "results": moderation_results
            }
            
        except Exception as e:
            logger.error(f"MCP tool error: {e}")
            return {
                "success": False,
                "tool_name": self.tool_name,
                "error": str(e)
            }


# Example usage and testing
def test_vertex_vision_moderation():
    """Test the Vertex AI Vision moderation tool."""
    try:
        # Initialize tool
        tool = VertexVisionModerationMCPTool()
        
        # Test with a sample PDF (replace with actual PDF path)
        sample_pdf = "sample_document.pdf"
        
        if os.path.exists(sample_pdf):
            print(f"Testing content moderation with: {sample_pdf}")
            result = tool.moderate_document(sample_pdf)
            print(f"Moderation result: {result}")
        else:
            print(f"Sample PDF not found: {sample_pdf}")
            print("Please provide a valid PDF path for testing")
            
    except Exception as e:
        print(f"Test failed: {e}")


if __name__ == "__main__":
    test_vertex_vision_moderation()
