"""Summarization entry points using Google Gemini."""
from __future__ import annotations

import logging
import os
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional

import google.generativeai as genai
from google.generativeai import GenerativeModel

from .web_content import WebContent, fetch_web_or_pdf


@dataclass
class SummarizationResult:
    """Container for the response generated by Gemini."""

    summary: str
    source_type: str
    extracted_text: str
    image_count: int


DEFAULT_PROMPT = """You are a senior research analyst.
Provide a structured English summary based on both the textual and visual content supplied.

Please cover the following items:
1. Context — what the document or page is about and why it exists.
2. Key Findings — list three to five essential insights or messages.
3. Visual Interpretation — describe any notable charts, tables, or diagrams and explain their meaning.
4. Analyst Takeaways — offer two to three implications or recommendations.
5. Suggested Headline — propose a concise title for the material.

Write clearly and professionally as if preparing content for a briefing memo.
Date: {date}
"""


def _resolve_logger(logger: Optional[logging.Logger]) -> logging.Logger:
    if logger is not None:
        return logger
    default_logger = logging.getLogger("aiknowledge")
    if not default_logger.handlers:
        default_logger.addHandler(logging.NullHandler())
    return default_logger


def _build_prompt(date: datetime, custom_prompt: Optional[str]) -> str:
    if custom_prompt:
        return custom_prompt
    return DEFAULT_PROMPT.format(date=date.strftime("%Y-%m-%d"))


def _build_parts(prompt: str, content: WebContent) -> List[object]:
    parts: List[object] = [prompt]
    for image_bytes in content.images:
        parts.append({"mime_type": "image/png", "data": image_bytes})
    text_payload = content.text if content.text.strip() else "No text could be extracted from the source."
    parts.append(
        f"Extracted text from {content.source_type} source (length={len(text_payload)} characters):\n{text_payload}"
    )
    return parts


def summarize_url(
    url: str,
    *,
    google_api_key: Optional[str] = None,
    model_name: str = "gemini-2.5-flash",
    prompt: Optional[str] = None,
    logger: Optional[logging.Logger] = None,
    max_chars: int = 6000,
    max_image_mb: float = 4.0,
    max_pdf_pages: int = 5,
    request_timeout: int = 20,
) -> SummarizationResult:
    """Summarize the content of a URL using Gemini.

    Args:
        url: Public URL to a web page or PDF.
        google_api_key: API key for Google Gemini. Falls back to ``GOOGLE_API_KEY`` env var.
        model_name: Name of the Gemini model to use.
        prompt: Optional custom prompt template.
        logger: Optional :class:`logging.Logger`. A default logger is used when omitted.
        max_chars: Maximum number of characters to keep from extracted text.
        max_image_mb: Maximum size of each screenshot in megabytes.
        max_pdf_pages: Maximum number of PDF pages to process.
        request_timeout: Timeout (seconds) for HTTP requests.

    Returns:
        :class:`SummarizationResult` with the Gemini output and extracted metadata.
    """

    resolved_logger = _resolve_logger(logger)
    resolved_logger.debug("Starting summarization for %s", url)

    content = fetch_web_or_pdf(
        url,
        logger=resolved_logger,
        max_chars=max_chars,
        max_image_mb=max_image_mb,
        max_pdf_pages=max_pdf_pages,
        request_timeout=request_timeout,
    )

    api_key = google_api_key or os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("A Google API key must be provided via argument or the GOOGLE_API_KEY environment variable.")

    genai.configure(api_key=api_key)
    model = GenerativeModel(model_name)

    prompt_text = _build_prompt(datetime.utcnow(), prompt)
    parts = _build_parts(prompt_text, content)

    resolved_logger.debug("Sending %d parts to Gemini", len(parts))
    response = model.generate_content(parts)
    summary_text = response.text or ""

    resolved_logger.info("Finished summarization for %s", url)
    return SummarizationResult(
        summary=summary_text.strip(),
        source_type=content.source_type,
        extracted_text=content.text,
        image_count=len(content.images),
    )
