"""Prompt building utilities for Pydantic AI agent, including file handling and context management."""

import mimetypes
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from typing import Optional

from markitdown import MarkItDown
from pydantic_ai import BinaryContent

from aixtools.context import SessionIdTuple
from aixtools.logging.logging_config import get_logger
from aixtools.server import container_to_host_path
from aixtools.utils.config import (
    EXTRACTABLE_DOCUMENT_TYPES,
    IMAGE_ATTACHMENT_TYPES,
    MAX_EXTRACTED_TEXT_SIZE,
    MAX_IMAGE_ATTACHMENT_SIZE,
)
from aixtools.utils.files import is_text_content

logger = get_logger(__name__)


@dataclass
class FileExtractionResult:
    """Result of file content extraction.

    Attributes:
        content: Extracted file content (str for text/documents, BinaryContent for images, None on failure)
        success: True if file was successfully read or extracted, False on any failure
        error_message: Error description if extraction failed, None otherwise
        was_extracted: True if document extraction via markitdown was used successfully
    """

    content: str | BinaryContent | None
    success: bool
    error_message: str | None = None
    was_extracted: bool = False


def should_be_included_into_context(
    file_content: BinaryContent | str | None,
    *,
    max_image_size_bytes: int = MAX_IMAGE_ATTACHMENT_SIZE,
    max_extracted_text_size_bytes: int = MAX_EXTRACTED_TEXT_SIZE,
) -> bool:
    """Check if file content should be included in model context based on type and size limits."""
    if file_content is None:
        return False

    # Handle extracted text (strings)
    if isinstance(file_content, str):
        text_size = len(file_content.encode("utf-8"))
        return text_size < max_extracted_text_size_bytes

    # Handle binary content (images only)
    if isinstance(file_content, BinaryContent):
        if file_content.media_type not in IMAGE_ATTACHMENT_TYPES:
            return False
        image_size = len(file_content.data)
        return image_size < max_image_size_bytes

    return False


def file_to_binary_content(file_path: str | Path, mime_type: Optional[str] = None) -> FileExtractionResult:
    """Read file and extract text from documents (PDF, DOCX, XLSX, PPTX) using markitdown."""
    if not mime_type:
        mime_type, _ = mimetypes.guess_type(file_path)
        mime_type = mime_type or "application/octet-stream"

    # Extract text from supported document types using markitdown
    if mime_type in EXTRACTABLE_DOCUMENT_TYPES:
        try:
            markitdown = MarkItDown()
            result = markitdown.convert(str(file_path))
            return FileExtractionResult(
                content=result.text_content, success=True, error_message=None, was_extracted=True
            )
        except Exception as e:  # pylint: disable=broad-exception-caught
            error_msg = f"Extraction failed: {type(e).__name__}: {str(e)}"
            logger.error("Document extraction failed for %s: %s", file_path, error_msg)
            return FileExtractionResult(content=None, success=False, error_message=error_msg)

    # Read the file data for non-document types
    try:
        with open(file_path, "rb") as f:
            data = f.read()

        # Return as string if it's text content
        if is_text_content(data, mime_type):
            return FileExtractionResult(content=data.decode("utf-8"), success=True)

        # Return as binary content for images and other binary files
        return FileExtractionResult(content=BinaryContent(data=data, media_type=mime_type), success=True)
    except Exception as e:  # pylint: disable=broad-exception-caught
        error_msg = f"Failed to read file: {type(e).__name__}: {str(e)}"
        logger.error("File reading failed for %s: %s", file_path, error_msg)
        return FileExtractionResult(content=None, success=False, error_message=error_msg)


def truncate_extracted_text(text: str, max_bytes: int = MAX_EXTRACTED_TEXT_SIZE) -> str:
    """Truncate text to max_bytes with warning prefix."""
    truncated_bytes = text.encode("utf-8")[:max_bytes]
    truncated_text = truncated_bytes.decode("utf-8", errors="ignore")

    total_chars = len(text)
    truncated_chars = len(truncated_text)

    return f"[TRUNCATED - showing first {truncated_chars} of {total_chars} characters]\n\n{truncated_text}"


def build_user_input(
    session_tuple: SessionIdTuple,
    user_text: str,
    file_paths: list[Path],
) -> str | list[str | BinaryContent]:
    """Build user input for the Pydantic AI agent, including file attachments if provided."""
    if not file_paths:
        return user_text

    attachment_info_lines = []
    binary_attachments: list[str | BinaryContent] = []

    for workspace_path in file_paths:
        # Convert Path to PurePosixPath for container_to_host_path
        workspace_posix_path = PurePosixPath(workspace_path)
        host_path = container_to_host_path(workspace_posix_path, ctx=session_tuple)

        # Handle None return from container_to_host_path
        if host_path is None:
            attachment_info = (
                f"* {workspace_path.name} (path in workspace: {workspace_path}) -- conversion failed: invalid path"
            )
            attachment_info_lines.append(attachment_info)
            continue

        file_size = host_path.stat().st_size
        mime_type, _ = mimetypes.guess_type(host_path)
        mime_type = mime_type or "application/octet-stream"

        attachment_info = f"* {workspace_path.name} (file_size={file_size} bytes) (path in workspace: {workspace_path})"
        extraction_result = file_to_binary_content(host_path, mime_type)

        # Handle extraction failure - exclude from attachments
        if not extraction_result.success:
            attachment_info += f" -- extraction failed: {extraction_result.error_message}"
            attachment_info_lines.append(attachment_info)
            continue

        # Handle successful extraction
        if extraction_result.was_extracted:
            attachment_info += " -- extracted as text"

        # Check if content should be included in context
        if should_be_included_into_context(extraction_result.content) and extraction_result.content is not None:
            binary_attachments.append(extraction_result.content)
            attachment_info += f" -- provided to model context at index {len(binary_attachments) - 1}"
        elif (
            isinstance(extraction_result.content, str) and extraction_result.content and extraction_result.was_extracted
        ):
            # Truncate large extracted text and include with warning (only for extracted documents)
            truncated_content = truncate_extracted_text(extraction_result.content)
            binary_attachments.append(truncated_content)
            attachment_info += f" -- truncated and provided to model context at index {len(binary_attachments) - 1}"
        elif extraction_result.content is not None:
            # Content exists but excluded from context (e.g., images too large, non-extracted text)
            attachment_info += " -- too large for context"

        attachment_info_lines.append(attachment_info)

    full_prompt = user_text + "\nAttachments:\n" + "\n".join(attachment_info_lines)

    return [full_prompt] + binary_attachments
