"""
Enhanced version of dbcache2.py with structured logging.
"""

from configargparse import Namespace, YAMLConfigFileParser  # type: ignore
from pathlib import Path
from sqlalchemy import create_engine, text
from sqlalchemy.engine.base import Engine
import configargparse
import dotenv
import io
import logging
import pandas as pd
import sys
import yaml


logger = logging.getLogger(__name__)

# --- Default log format ---
LOGFORMAT = "%(asctime)s %(levelname)s [%(module)s:%(lineno)d] %(funcName)s | %(message)s"


class CacheManager:
    """Manage caching data: retrieve from and/or update database cache table."""

    def __init__(self, engine: Engine):
        self.engine = engine

    def serialize_df_to_bytes(self, df: pd.DataFrame) -> bytes:
        """Serialize DataFrame to gzip-compressed Parquet bytes."""
        logger.debug(
            "Serializing DataFrame with %(rows)d rows and %(cols)d columns.",
            {"rows": len(df), "cols": len(df.columns)},
        )
        buffer = io.BytesIO()
        df.to_parquet(buffer, compression="gzip", engine="pyarrow", index=False)
        return buffer.getvalue()

    def deserialize_df_from_bytes(self, data: bytes) -> pd.DataFrame:
        """Deserialize gzip-compressed Parquet bytes to a DataFrame."""
        logger.debug(
            "Decompressing cached Parquet data (size=%(size)d bytes).", {"size": len(data)}
        )
        buffer = io.BytesIO(data)
        df = pd.read_parquet(buffer, engine="pyarrow")
        logger.debug(
            "Deserialized DataFrame with %(rows)d rows and %(cols)d columns.",
            {"rows": len(df), "cols": len(df.columns)},
        )
        return df

    def get_dataframe(self, table_name: str) -> pd.DataFrame:
        """Return DataFrame for given table. Use cache if available, else load and cache."""
        logger.info("Fetching DataFrame for table '%(table_name)s'.", {"table_name": table_name})
        select_sql = text("SELECT data FROM cache WHERE name = :name")

        try:
            with self.engine.connect() as conn:
                result = conn.execute(select_sql, {"name": table_name}).fetchone()
                if result:
                    logger.info(
                        "Cache hit for table '%(table_name)s'. Restoring DataFrame.",
                        {"table_name": table_name},
                    )
                    compressed_data = result[0]
                    return self.deserialize_df_from_bytes(compressed_data)
        except Exception as e:
            logger.exception(
                "Cache lookup failed for '%(table_name)s': %(error)s",
                {"table_name": table_name, "error": str(e)},
            )

        # Cache miss → load from source table
        logger.info(
            "Cache miss for '%(table_name)s'. Loading from source.", {"table_name": table_name}
        )
        df = pd.read_sql(f"SELECT * FROM {table_name}", self.engine)

        try:
            compressed = self.serialize_df_to_bytes(df)
            insert_sql = text("REPLACE INTO cache (name, data) VALUES (:name, :data)")
            with self.engine.begin() as conn:
                conn.execute(insert_sql, {"name": table_name, "data": compressed})
            logger.info("Cached table '%(table_name)s' successfully.", {"table_name": table_name})
        except Exception as e:
            logger.exception(
                "Failed to cache table '%(table_name)s': %(error)s",
                {"table_name": table_name, "error": str(e)},
            )

        return df


class Config:
    """Load and manage configuration from files, CLI, and environment."""

    def __init__(self, secrets_file: Path) -> None:
        self.config: Namespace = self._parse_config(secrets_file)
        self._setup_logging(fmt=self.config.logformat, level=self.config.loglevel)

    @staticmethod
    def _parse_config(secrets_file: Path) -> Namespace:
        p = configargparse.ArgParser(
            config_file_parser_class=YAMLConfigFileParser,
            default_config_files=[secrets_file],
        )
        p.add(
            "--loglevel",
            choices=["INFO", "WARNING", "DEBUG", "ERROR", "CRITICAL"],
            default="INFO",
            env_var="LOGLEVEL",
            help="Log level",
        )
        p.add("--logformat", default=LOGFORMAT, env_var="LOGFORMAT", help="Log format")
        p.add("--mysql_database", env_var="MYSQL_DATABASE")
        p.add("--mysql_host", env_var="MYSQL_HOST")
        p.add("--mysql_user", env_var="MYSQL_USER")
        p.add("--mysql_password", env_var="MYSQL_PASSWORD")
        p.add(
            "--mysql_options",
            env_var="MYSQL_OPTIONS",
            required=True,
            type=yaml.safe_load,
            help="MySQL options",
        )
        p.add("--table_source", env_var="TABLE_SOURCE", required=True)
        return p.parse_args()

    @staticmethod
    def _setup_logging(fmt: str, level: str) -> None:
        logging.basicConfig(
            level=getattr(logging, level.upper(), logging.INFO),
            format=fmt,
            stream=sys.stdout,
        )
        logger.info("Logging initialized at level %(level)s.", {"level": level})


class Main:
    def __init__(self, config: Namespace):
        self.config = config
        self.engine = self._create_engine()
        self.cache_manager = CacheManager(self.engine)

    @staticmethod
    def override_mysql_options(config: Namespace):
        """Override the MySQL options based on the provided configuration.
        Updates specific fields like database, host, user, and password if available.
        """
        if config.mysql_database:
            config.mysql_options.update({"database": config.mysql_database})
        if config.mysql_host:
            config.mysql_options.update({"host": config.mysql_host})
        if config.mysql_password:
            config.mysql_options.update({"password": config.mysql_password})
        if config.mysql_user:
            config.mysql_options.update({"user": config.mysql_user})

    def _create_engine(self) -> Engine:
        """Create SQLAlchemy engine from config."""
        Main.override_mysql_options(self.config)
        database = self.config.mysql_options.get("database")
        host = self.config.mysql_options.get("host")
        password = self.config.mysql_options.get("password")
        user = self.config.mysql_options.get("user")

        db_url = f"mysql+mysqlconnector://{user}:{password}@{host}/{database}"
        logger.debug("Creating SQLAlchemy engine for %(db_url)s", {"db_url": db_url})
        return create_engine(db_url)

    def run(self):
        """Main entrypoint for cache handling."""
        table_name = self.config.table_source
        logger.info(
            "Starting data retrieval for table '%(table_name)s'.", {"table_name": table_name}
        )
        try:
            df = self.cache_manager.get_dataframe(table_name)
            logger.info(
                "Retrieved DataFrame with %(rows)d rows and %(cols)d columns.",
                {"rows": len(df), "cols": len(df.columns)},
            )
            print(df.head())
        except Exception as e:
            logger.exception(
                "Failed to retrieve DataFrame for '%(table_name)s': %(error)s",
                {"table_name": table_name, "error": str(e)},
            )


if __name__ == "__main__":
    dotenv.load_dotenv(override=True)
    SECRETS_FILE = Path(__file__).resolve().parent.parent.parent / "secrets" / "dbcache_secrets.yml"
    CONFIG = Config(SECRETS_FILE)
    MAIN = Main(CONFIG.config)
    MAIN.run()
    logger.debug("Program complete.")
