"""FastAPI application for processing Maybank credit card statement PDF files.

This module provides a REST API for uploading PDF statement files and
receiving processed transaction data in CSV format.
"""

import csv
import io
import os
import tempfile
from datetime import datetime
from typing import Any

from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import StreamingResponse

from . import __version__
from .common.pdf_convert_txt import pdf_to_text
from .common.txt_convert_csv import txt_to_csv
from .common.utils import configure_logging, get_logger

# Configure logging on module import
configure_logging()
log = get_logger(__name__)

app = FastAPI(
    title="Maybank For Me API",
    description="API for processing Maybank credit card statement PDF files to CSV",
    version=__version__,
)


def process_single_pdf_to_csv(
    pdf_file_path: str, password: str = ""
) -> tuple[str, list[list[str]]]:
    """Process a single PDF file and return the report date and transaction data.

    Args:
        pdf_file_path: Path to the PDF file
        password: Password to decrypt PDF (if needed)

    Returns:
        Tuple of (report_date, transaction_data)

    Raises:
        ValueError: If filename format is invalid
        FileNotFoundError: If PDF file does not exist
        OSError: If file operations fail
    """
    log.debug("processing_pdf_start", pdf_path=pdf_file_path)

    with tempfile.TemporaryDirectory() as temp_dir:
        # Extract filename and report date
        filename = os.path.basename(pdf_file_path)
        base_name = os.path.splitext(filename)[0]

        # Extract report date from filename (format: Statement_YYYYMMDD.pdf)
        parts = base_name.split("_")
        if len(parts) < 2:
            msg = (
                f"Invalid filename format: {filename}. "
                f"Expected format: Statement_YYYYMMDD.pdf"
            )
            log.error(
                "invalid_filename_format",
                filename=filename,
                expected_format="Statement_YYYYMMDD.pdf",
            )
            raise ValueError(msg)

        report_date = parts[-1]
        log.debug("extracted_report_date", report_date=report_date, filename=filename)

        # Create temporary paths for text and csv
        txt_path = os.path.join(temp_dir, f"{base_name}.txt")
        csv_path = os.path.join(temp_dir, f"{report_date}-{parts[0]}.csv")

        # Convert PDF to text
        log.debug("converting_pdf_to_text", pdf_path=pdf_file_path, txt_path=txt_path)
        pdf_to_text(pdf_file_path, txt_path, password)

        # Convert text to CSV
        log.debug("converting_text_to_csv", txt_path=txt_path, csv_path=csv_path)
        txt_to_csv(txt_path, csv_path)

        # Read CSV data
        content: list[list[str]] = []
        with open(csv_path, encoding="utf-8") as f:
            csv_reader = csv.reader(f)
            for i, row in enumerate(csv_reader):
                if i == 0:  # Skip header
                    continue
                content.append(row)

        transaction_count = len(content)
        log.info(
            "pdf_processed_successfully",
            filename=filename,
            report_date=report_date,
            transaction_count=transaction_count,
        )

        return (report_date, content)


def process_report_data(report_date: str, data: list[list[str]]) -> list[list[str]]:
    """Process transaction data by adding year information based on report date.

    Handles year boundaries where December transactions may appear on
    a January statement.

    Args:
        report_date: Report date in YYYYMMDD format
        data: List of transaction rows
            [posting_date, transaction_date, description, amount]

    Returns:
        List of processed transaction rows with full dates

    Raises:
        ValueError: If report_date format is invalid
    """
    log.debug("processing_report_data", report_date=report_date, row_count=len(data))

    try:
        report_date_obj = datetime.strptime(report_date, "%Y%m%d")
    except ValueError as e:
        msg = f"Invalid report date format: {report_date}"
        log.error("invalid_report_date_format", report_date=report_date, error=str(e))
        raise ValueError(msg) from e

    modified_data: list[list[str]] = []
    skipped_rows = 0

    for row in data:
        try:
            year = report_date_obj.year

            # Validate and parse posting date
            posting_parts = row[0].split("/")
            if len(posting_parts) < 2:
                log.warning("invalid_posting_date_format", posting_date=row[0], row=row)
                skipped_rows += 1
                continue
            posting_month = int(posting_parts[1])

            # Validate and parse transaction date
            trans_parts = row[1].split("/")
            if len(trans_parts) < 2:
                log.warning(
                    "invalid_transaction_date_format", transaction_date=row[1], row=row
                )
                skipped_rows += 1
                continue
            trans_month = int(trans_parts[1])

            # Handle year boundary for posting date
            if report_date_obj.month == 1 and posting_month == 12:
                post_year = year - 1
                log.debug(
                    "year_boundary_detected_posting",
                    report_month=report_date_obj.month,
                    posting_month=posting_month,
                    adjusted_year=post_year,
                )
            else:
                post_year = year

            # Handle year boundary for transaction date
            if report_date_obj.month == 1 and trans_month == 12:
                trans_year = year - 1
                log.debug(
                    "year_boundary_detected_transaction",
                    report_month=report_date_obj.month,
                    trans_month=trans_month,
                    adjusted_year=trans_year,
                )
            else:
                trans_year = year

            modified_data.append(
                [
                    f"{row[0]}/{post_year}",  # Posting Date
                    f"{row[1]}/{trans_year}",  # Transaction Date
                    row[2],  # Description
                    row[3],  # Amount
                ]
            )
        except (ValueError, IndexError) as e:
            log.warning(
                "error_processing_row",
                row=row,
                error=str(e),
                error_type=type(e).__name__,
            )
            skipped_rows += 1
            continue

    if skipped_rows > 0:
        log.warning(
            "rows_skipped_during_processing",
            skipped_count=skipped_rows,
            processed_count=len(modified_data),
            total_input=len(data),
        )

    log.debug(
        "report_data_processed",
        input_rows=len(data),
        output_rows=len(modified_data),
        skipped_rows=skipped_rows,
    )

    return modified_data


@app.get("/")
async def root() -> dict[str, Any]:
    """Root endpoint with API information.

    Returns:
        Dictionary containing API name, version, description, and available endpoints
    """
    return {
        "name": "Maybank For Me API",
        "version": __version__,
        "description": "API for processing Maybank credit card statement PDF files",
        "endpoints": {
            "/process": "POST - Upload PDF files and get processed CSV",
            "/health": "GET - Health check endpoint",
        },
    }


@app.get("/health")
async def health() -> dict[str, str]:
    """Health check endpoint.

    Returns:
        Dictionary with status indicator
    """
    return {"status": "healthy"}


@app.post("/process")
async def process_statements(
    files: list[UploadFile] = File(..., description="PDF statement files to process"),
    password: str = Form("", description="Password to decrypt PDF files (if needed)"),
) -> StreamingResponse:
    """Process Maybank credit card statement PDF files and return CSV data.

    Args:
        files: List of PDF files to process
        password: Optional password to decrypt PDF files

    Returns:
        CSV file with all processed transactions

    Raises:
        HTTPException: If validation fails or processing errors occur
            - 400: Invalid input (no files, invalid file type, file too large)
            - 500: Processing errors
    """
    log.info("processing_request_received", file_count=len(files))

    if not files:
        log.warning("no_files_provided_in_request")
        raise HTTPException(status_code=400, detail="No files provided")

    # Validate file types and size
    max_file_size = 10 * 1024 * 1024  # 10MB per file
    file_names = []
    for file in files:
        if not file.filename:
            log.warning("file_without_filename_rejected")
            raise HTTPException(status_code=400, detail="File must have a filename")
        file_names.append(file.filename)
        if not file.filename.lower().endswith(".pdf"):
            log.warning("invalid_file_type_rejected", filename=file.filename)
            raise HTTPException(
                status_code=400,
                detail=f"Invalid file type: {file.filename}. "
                f"Only PDF files are allowed.",
            )

    log.info(
        "files_validated",
        files=file_names,
        max_file_size_mb=max_file_size / 1024 / 1024,
    )
    dataset: dict[str, list[list[str]]] = {}

    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            log.debug("temporary_directory_created", temp_dir=temp_dir)

            # Process each PDF file
            for idx, file in enumerate(files, 1):
                if not file.filename:
                    log.error("file_missing_filename", file_index=idx)
                    raise HTTPException(
                        status_code=400, detail="File must have a filename"
                    )

                log.debug(
                    "processing_file",
                    file_index=idx,
                    filename=file.filename,
                    total_files=len(files),
                )

                # Save uploaded file to temporary location
                pdf_path = os.path.join(temp_dir, file.filename)

                with open(pdf_path, "wb") as f:
                    content = await file.read()
                    file_size_bytes = len(content)
                    file_size_mb = file_size_bytes / 1024 / 1024

                    # Validate file size
                    if file_size_bytes > max_file_size:
                        log.warning(
                            "file_size_exceeded",
                            filename=file.filename,
                            file_size_mb=file_size_mb,
                            max_size_mb=max_file_size / 1024 / 1024,
                        )
                        raise HTTPException(
                            status_code=400,
                            detail=f"File {file.filename} exceeds maximum "
                            f"size of {max_file_size / 1024 / 1024}MB",
                        )

                    log.debug(
                        "file_saved",
                        filename=file.filename,
                        size_mb=file_size_mb,
                        path=pdf_path,
                    )
                    f.write(content)

                # Process the PDF
                try:
                    report_date, transactions = process_single_pdf_to_csv(
                        pdf_path, password
                    )
                    dataset[report_date] = transactions
                    log.info(
                        "file_processed",
                        filename=file.filename,
                        report_date=report_date,
                        transaction_count=len(transactions),
                        file_index=idx,
                    )
                except (ValueError, FileNotFoundError, OSError) as e:
                    log.error(
                        "file_processing_failed",
                        filename=file.filename,
                        error=str(e),
                        error_type=type(e).__name__,
                        file_index=idx,
                        exc_info=True,
                    )
                    raise HTTPException(
                        status_code=500,
                        detail=f"Error processing {file.filename}: {e!s}",
                    ) from e

            log.info(
                "all_pdfs_processed", pdf_count=len(files), report_count=len(dataset)
            )

            # Process all report data
            processed: list[list[str]] = []
            for report_date, data in dataset.items():
                log.debug(
                    "processing_report_dataset",
                    report_date=report_date,
                    transaction_count=len(data),
                )
                processed.extend(process_report_data(report_date, data))

            log.info(
                "all_transactions_processed", total_transaction_count=len(processed)
            )

            # Sort by transaction date with error handling
            log.debug("sorting_transactions_by_date")
            try:
                processed = sorted(
                    processed,
                    key=lambda x: datetime.strptime(x[1], "%d/%m/%Y"),
                )
                log.debug("transactions_sorted_successfully")
            except ValueError as e:
                log.warning("error_sorting_transactions", error=str(e), exc_info=True)
                # Continue without sorting if date parsing fails

            # Add header
            processed.insert(
                0, ["Posting Date", "Transaction Date", "Description", "Amount"]
            )

            # Create CSV in memory
            log.debug("creating_csv_output", row_count=len(processed))
            output = io.StringIO()
            csv_writer = csv.writer(output)
            csv_writer.writerows(processed)

            # Return CSV as streaming response
            output.seek(0)
            csv_size = len(output.getvalue())
            log.info(
                "csv_generated_successfully",
                row_count=len(processed),
                size_bytes=csv_size,
                size_kb=csv_size / 1024,
            )

            return StreamingResponse(
                iter([output.getvalue()]),
                media_type="text/csv",
                headers={
                    "Content-Disposition": "attachment; filename=transactions.csv"
                },
            )

    except HTTPException:
        raise
    except Exception as e:
        log.error(
            "unexpected_error_processing_statements",
            error=str(e),
            error_type=type(e).__name__,
            exc_info=True,
        )
        raise HTTPException(
            status_code=500, detail=f"Internal server error: {e!s}"
        ) from e


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8000)
