"""Convert PDF files to text files.

This module provides functionality to convert PDF files, including
password-protected PDFs, to plain text files.
"""

import argparse
import os
from pathlib import Path

from pypdf import PdfReader

from .utils import configure_logging, get_logger

__all__ = ["convert_txt_folder", "pdf_to_text"]

# Configure logging
configure_logging()
log = get_logger(__name__)


def pdf_to_text(pdf_path: str, txt_path: str, password: str = "") -> None:
    """Convert a PDF file to a text file.

    Extracts all text content from a PDF file and writes it to a text file.
    Supports password-protected PDFs.

    Args:
        pdf_path: Path to the input PDF file
        txt_path: Path where the output text file will be written
        password: Password for encrypted PDFs (default: empty string)

    Raises:
        FileNotFoundError: If the PDF file does not exist
        ValueError: If the PDF file is invalid or password is incorrect
        OSError: If there are issues reading or writing files

    Examples:
        >>> pdf_to_text("statement.pdf", "output.txt")
        >>> pdf_to_text("encrypted.pdf", "output.txt", "secret123")
    """
    log.debug("converting_pdf_to_text", pdf_path=pdf_path, txt_path=txt_path)

    if not os.path.exists(pdf_path):
        msg = f"PDF file not found: {pdf_path}"
        log.error("pdf_file_not_found", pdf_path=pdf_path)
        raise FileNotFoundError(msg)

    try:
        with open(pdf_path, "rb") as pdf_file:
            reader = (
                PdfReader(pdf_file, password=password)
                if password
                else PdfReader(pdf_file)
            )
            page_count = len(reader.pages)
            log.debug(
                "pdf_opened",
                pdf_path=pdf_path,
                page_count=page_count,
                is_encrypted=bool(password),
            )

            text = ""
            for page_num in range(page_count):
                text += reader.pages[page_num].extract_text()

        text_length = len(text)
        log.debug("text_extracted", pdf_path=pdf_path, text_length=text_length)

        with open(txt_path, "w", encoding="utf-8") as txt_file:
            txt_file.write(text)

        log.info(
            "pdf_converted_to_text",
            pdf_path=pdf_path,
            txt_path=txt_path,
            page_count=page_count,
            text_length=text_length,
        )

    except Exception as e:
        log.error(
            "pdf_conversion_failed",
            pdf_path=pdf_path,
            error=str(e),
            error_type=type(e).__name__,
            exc_info=True,
        )
        raise ValueError(f"Failed to convert PDF {pdf_path}: {e}") from e


def convert_txt_folder(
    input_folder: str, output_folder: str, password: str = ""
) -> None:
    """Convert all PDF files in a folder to text files.

    Processes all PDF files in the input folder and creates corresponding
    text files in the output folder. Creates output folder if it doesn't exist.

    Args:
        input_folder: Folder containing PDF files to convert
        output_folder: Folder where text files will be saved
        password: Password for encrypted PDFs (default: empty string)

    Raises:
        FileNotFoundError: If input folder does not exist
        OSError: If output folder cannot be created or files cannot be processed

    Examples:
        >>> convert_txt_folder("/data/pdfs", "/data/txt")
        >>> convert_txt_folder("/data/pdfs", "/data/txt", "secret123")
    """
    log.info(
        "converting_folder_start",
        input_folder=input_folder,
        output_folder=output_folder,
    )

    if not os.path.exists(input_folder):
        msg = f"Input folder not found: {input_folder}"
        log.error("input_folder_not_found", input_folder=input_folder)
        raise FileNotFoundError(msg)

    output_path = Path(output_folder)
    output_path.mkdir(parents=True, exist_ok=True)
    log.debug("output_folder_created", output_folder=output_folder)

    pdf_files = [f for f in os.listdir(input_folder) if f.lower().endswith(".pdf")]
    log.info("pdf_files_found", file_count=len(pdf_files))

    converted_count = 0
    failed_count = 0

    for filename in pdf_files:
        pdf_path = os.path.join(input_folder, filename)
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        txt_path = os.path.join(output_folder, txt_filename)

        try:
            pdf_to_text(pdf_path, txt_path, password)
            converted_count += 1
        except (FileNotFoundError, ValueError, OSError) as e:
            log.error(
                "file_conversion_failed",
                filename=filename,
                error=str(e),
                error_type=type(e).__name__,
                exc_info=True,
            )
            failed_count += 1
            # Continue processing other files even if one fails
            continue

    log.info(
        "folder_conversion_complete",
        total_files=len(pdf_files),
        converted=converted_count,
        failed=failed_count,
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Convert statement PDF files to text files"
    )
    parser.add_argument("input_folder", type=str, help="Folder containing PDF files")
    parser.add_argument("output_folder", type=str, help="Folder to save text files")
    parser.add_argument(
        "--password", type=str, help="Password to open PDF files", default=""
    )
    args = parser.parse_args()
    password_val = os.environ.get("PDF_PASSWORD", args.password)
    convert_txt_folder(args.input_folder, args.output_folder, password_val)
