import json
import typing as t
from decimal import Decimal
from enum import Enum
from typing import Sequence

if t.TYPE_CHECKING:
    import agate
    import pandas
from pydantic import BaseModel, Field


class DataFrameColumnType(Enum):
    NUMBER = "number"
    INTEGER = "integer"
    TEXT = "text"
    BOOLEAN = "boolean"
    DATE = "date"
    DATETIME = "datetime"
    TIMEDELTA = "timedelta"
    UNKNOWN = "unknown"

    @classmethod
    def from_string(cls, type_str: str) -> "DataFrameColumnType":
        """Convert string to DataFrameColumnType enum.

        Args:
            type_str: String representation of the type (e.g., "integer", "text")

        Returns:
            DataFrameColumnType enum value
        """
        type_str = type_str.lower().strip()
        try:
            return cls(type_str)
        except ValueError:
            return cls.UNKNOWN


class DataFrameColumn(BaseModel):
    key: str
    name: str
    type: DataFrameColumnType


class DataFrame(BaseModel):
    columns: t.List[DataFrameColumn]
    data: t.List[Sequence]
    limit: t.Optional[int] = Field(None, description="Limit the number of rows returned")
    more: t.Optional[bool] = Field(None, description="Whether there are more rows to fetch")

    @staticmethod
    def from_agate(table: "agate.Table", limit: t.Optional[int] = None, more: t.Optional[bool] = None):
        from recce.adapter.dbt_adapter import dbt_version

        if dbt_version < "v1.8":
            import dbt.clients.agate_helper as agate_helper
        else:
            import dbt_common.clients.agate_helper as agate_helper

        import agate

        columns = []

        for col_name, col_type in zip(table.column_names, table.column_types):

            has_integer = hasattr(agate_helper, "Integer")

            if isinstance(col_type, agate.Number):
                col_type = DataFrameColumnType.NUMBER
            elif isinstance(col_type, agate.Text):
                col_type = DataFrameColumnType.TEXT
            elif isinstance(col_type, agate.Boolean):
                col_type = DataFrameColumnType.BOOLEAN
            elif isinstance(col_type, agate.Date):
                col_type = DataFrameColumnType.DATE
            elif isinstance(col_type, agate.DateTime):
                col_type = DataFrameColumnType.DATETIME
            elif isinstance(col_type, agate.TimeDelta):
                col_type = DataFrameColumnType.TIMEDELTA
            elif has_integer and isinstance(col_type, agate_helper.Integer):
                col_type = DataFrameColumnType.INTEGER
            else:
                col_type = DataFrameColumnType.UNKNOWN
            columns.append(DataFrameColumn(key=col_name, name=col_name, type=col_type))

        def _row_values(row):
            # If the value is Decimal, check if it's finite. If not, convert it to float(xxx) (GitHub issue #476)
            return tuple([float(v) if isinstance(v, Decimal) and not v.is_finite() else v for v in row.values()])

        data = [_row_values(row) for row in table.rows]
        df = DataFrame(
            columns=columns,
            data=data,
            limit=limit,
            more=more,
        )
        return df

    @staticmethod
    def from_pandas(pandas_df: "pandas.DataFrame", limit: t.Optional[int] = None, more: t.Optional[bool] = None):
        columns = []
        for column in pandas_df.columns:
            dtype = pandas_df[column].dtype
            if dtype == "int64":
                col_type = DataFrameColumnType.INTEGER
            elif dtype == "float64":
                col_type = DataFrameColumnType.NUMBER
            elif dtype == "object":
                col_type = DataFrameColumnType.TEXT
            elif dtype == "bool":
                col_type = DataFrameColumnType.BOOLEAN
            else:
                col_type = DataFrameColumnType.UNKNOWN
            columns.append(DataFrameColumn(name=column, type=col_type))

        s = pandas_df.to_json(orient="values")
        data = json.loads(s)

        df = DataFrame(
            columns=columns,
            data=data,
            limit=limit,
            more=more,
        )
        return df

    @staticmethod
    def from_data(
        columns: t.Dict[str, str],
        data: t.List[Sequence],
        limit: t.Optional[int] = None,
        more: t.Optional[bool] = None,
    ):
        """Create a DataFrame from columns and data directly.

        Args:
            columns: Dict defining the schema where keys are column names and values are type strings.
                     Type strings can be: "number", "integer", "text", "boolean", "date", "datetime", "timedelta"
            data: List of rows (each row is a list/tuple/sequence of values)
            limit: Optional limit on the number of rows returned
            more: Optional flag indicating whether there are more rows to fetch

        Returns:
            DataFrame instance

        Examples:
            # Using simple dict format
            columns = {"idx": "integer", "name": "text", "impacted": "boolean"}
            data = [[0, "model_a", True], [1, "model_b", False]]
            df = DataFrame.from_data(columns, data)
        """
        # Convert dict columns to DataFrameColumn objects
        processed_columns = []
        for key, type_str in columns.items():
            col_type = DataFrameColumnType.from_string(type_str)
            processed_columns.append(DataFrameColumn(key=key, name=key, type=col_type))

        df = DataFrame(
            columns=processed_columns,
            data=data,
            limit=limit,
            more=more,
        )
        return df
