import csv
from collections import defaultdict
from datetime import date
from dateutil.parser import parse as parse_date
from io import StringIO
from fastapi import UploadFile
from pydantic import BaseModel, Field, PrivateAttr, model_validator
from typing import Annotated, Generic, Self, TypeVar, TypedDict
from nexo.enums.identity import Gender
from nexo.schemas.error.enums import ErrorCode
from nexo.types.dict import ListOfStrToStrDict
from nexo.types.integer import OptInt
from nexo.types.string import OptStr, SeqOfStrs
from ..enums.parameter import ParameterGroup


class Document(BaseModel):
    _raw: UploadFile | None = PrivateAttr(None)
    content: Annotated[bytes, Field(..., description="Content", exclude=True)]
    content_type: Annotated[str, Field(..., description="Content Type")]
    filename: Annotated[str, Field(..., description="Filename")]
    size: Annotated[int, Field(..., description="Size", gt=0)]

    @classmethod
    async def from_file(
        cls,
        file: UploadFile,
        *,
        max_size: OptInt = None,
        valid_content_types: SeqOfStrs | str | None = None,
        valid_extensions: SeqOfStrs | str | None = None,
    ) -> Self:
        content = await file.read()
        if not content:
            raise ValueError(ErrorCode.BAD_REQUEST, "Content can not be empty")

        size = file.size
        if size is None or size <= 0:
            raise ValueError(
                ErrorCode.BAD_REQUEST, "Size can not be None and must be larger than 0"
            )
        if max_size is not None:
            if size > max_size:
                raise ValueError(
                    ErrorCode.BAD_REQUEST,
                    f"Size of {size} exceeds set maximum of {max_size}",
                )

        content_type = file.content_type
        if content_type is None:
            raise ValueError("Content type can not be None")
        if valid_content_types is not None:
            if isinstance(valid_content_types, str):
                if content_type != valid_content_types:
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Invalid content type of '{content_type}'. Must be '{valid_content_types}'",
                    )
            else:
                if content_type not in valid_content_types:
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Invalid content type of '{content_type}'. Must be one of {valid_content_types}",
                    )

        filename = file.filename
        if filename is None:
            raise ValueError("Filename can not be None")
        if valid_extensions is not None:
            if isinstance(valid_extensions, str):
                if not filename.endswith(valid_extensions):
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Invalid extension. Must be '{valid_extensions}'",
                    )
            else:
                if not any(filename.endswith(ext) for ext in valid_extensions):
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Invalid extension. Must be one of {valid_extensions}",
                    )

        filename = filename.replace(" ", "_")

        document = cls(
            content=content, content_type=content_type, filename=filename, size=size
        )
        document._raw = file

        return document


class CSVDocument(Document):
    _content_type: str = PrivateAttr("text/csv")

    def _validate_content_type(self):
        if self.content_type != self._content_type:
            raise TypeError(
                ErrorCode.BAD_REQUEST,
                f"CSV Document content type must be {self._content_type}",
            )

    @model_validator(mode="after")
    def validate_content_type(self) -> Self:
        self._validate_content_type()
        return self

    @classmethod
    def from_document(cls, document: Document) -> Self:
        csv_document = cls(
            content=document.content,
            content_type=document.content_type,
            filename=document.filename,
            size=document.size,
        )
        csv_document._raw = document._raw
        return csv_document

    @classmethod
    async def from_file(
        cls,
        file: UploadFile,
        *,
        max_size: OptInt = None,
        valid_content_types: SeqOfStrs | str | None = "text/csv",
        valid_extensions: SeqOfStrs | str | None = ".csv",
    ) -> Self:
        return await super().from_file(
            file,
            max_size=max_size,
            valid_content_types=valid_content_types,
            valid_extensions=valid_extensions,
        )

    @property
    def rows(self) -> ListOfStrToStrDict:
        self._validate_content_type()
        text = self.content.decode(encoding="utf-8-sig")
        reader = csv.DictReader(StringIO(text), skipinitialspace=True)
        return list(reader)

    @property
    def cleaned_rows(self) -> list[dict[str, OptStr]]:
        raw_rows = self.rows
        new_rows: list[dict[str, OptStr]] = []
        seen_rows = set()
        for index, row in enumerate(raw_rows):
            new_row: dict[str, OptStr] = {}
            for key, value in row.items():
                value = value.strip() if value else None
                new_row[key] = value

            # Convert row dict to tuple of items for hashing
            row_tuple = tuple(sorted(new_row.items()))
            if row_tuple in seen_rows:
                raise ValueError(
                    ErrorCode.BAD_REQUEST,
                    f"Duplicate row found at index {index}: {new_row}",
                )
            seen_rows.add(row_tuple)

            new_rows.append(new_row)
        return new_rows


class BaseGroupPatientData(BaseModel):
    id: Annotated[
        str,
        Field(..., description="Patient's ID", max_length=16, pattern=r"^[0-9]{1,16}$"),
    ]
    name: Annotated[str, Field(..., description="Patient's name", max_length=200)]
    mcu_date: Annotated[date, Field(..., description="MCU date")]
    date_of_birth: Annotated[date, Field(..., description="Patient's Date Of Birth")]
    gender: Annotated[Gender, Field(..., description="Patient's gender")]


class GroupPatientExamination(BaseModel):
    parameter_group: Annotated[
        ParameterGroup, Field(..., description="Parameter group")
    ]
    parameter: Annotated[str, Field(..., description="Parameter", max_length=50)]
    value: Annotated[OptStr, Field(..., description="Value")]
    unit: Annotated[OptStr, Field(..., description="Unit")]


ListOfGroupPatientExaminations = list[GroupPatientExamination]


class FlatGroupPatientData(GroupPatientExamination, BaseGroupPatientData):
    pass


ListOfFlatGroupPatientData = list[FlatGroupPatientData]


class GroupedGroupPatientData(BaseGroupPatientData):
    examinations: Annotated[
        ListOfGroupPatientExaminations,
        Field(..., description="Examination", min_length=1),
    ]

    @classmethod
    def from_base(
        cls, base: BaseGroupPatientData, examinations: ListOfGroupPatientExaminations
    ) -> "GroupedGroupPatientData":
        return cls(**base.model_dump(), examinations=examinations)


class SeenData(TypedDict):
    name: str
    mcu_date: date
    date_of_birth: date
    gender: Gender


class GroupCSVDocument(CSVDocument):
    @property
    def flat_patient_data(self) -> ListOfFlatGroupPatientData:
        rows = self.cleaned_rows
        results: list[FlatGroupPatientData] = []

        for index, row in enumerate(rows):
            # Validate patient id
            id = row["patient_id"]
            if id is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null patient id in row {index}"
                )
            id = id.strip()

            # Validate patient name
            name = row["name"]
            if name is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null patient name in row {index}"
                )
            name = name.strip()

            # Validate MCU date
            mcu_date = row["mcu_date"]
            if mcu_date is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null MCU Date in row {index}"
                )
            mcu_date = mcu_date.strip()
            try:
                mcu_date = parse_date(mcu_date, dayfirst=True).date()
            except Exception:
                raise ValueError(
                    ErrorCode.BAD_REQUEST,
                    f"Invalid date format of MCU Date in row {index}: '{mcu_date}'",
                )

            # Validate Date Of Birth
            date_of_birth = row["date_of_birth"]
            if date_of_birth is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null Date Of Birth in row {index}"
                )
            date_of_birth = date_of_birth.strip()
            try:
                date_of_birth = parse_date(date_of_birth, dayfirst=True).date()
            except Exception:
                raise ValueError(
                    ErrorCode.BAD_REQUEST,
                    f"Invalid date format of Date Of Birth in row {index}: '{date_of_birth}'",
                )

            # Validate patient gender
            gender = row["gender"]
            if gender is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null patient gender in row {index}"
                )
            gender = Gender(gender.strip().lower())

            # Validate parameter group
            parameter_group = row["parameter_group"]
            if parameter_group is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null parameter group in row {index}"
                )
            parameter_group = parameter_group.strip().replace(" ", "_").lower()
            if parameter_group == "physical_examination":
                parameter_group = ParameterGroup.PHYSICAL
            else:
                parameter_group = ParameterGroup(parameter_group)

            # Validate parameter
            parameter = row["parameter"]
            if parameter is None:
                raise ValueError(
                    ErrorCode.BAD_REQUEST, f"Found null parameter in row {index}"
                )

            data = FlatGroupPatientData(
                id=id,
                name=name,
                mcu_date=mcu_date,
                gender=gender,
                date_of_birth=date_of_birth,
                parameter_group=parameter_group,
                parameter=parameter,
                value=row["value"],
                unit=row["unit"],
            )

            results.append(data)

        # --- Uniqueness validation across rows ---
        seen: dict[str, SeenData] = {}

        for index, row_data in enumerate(results):
            pid = row_data.id

            if pid not in seen:
                seen[pid] = {
                    "name": row_data.name,
                    "mcu_date": row_data.mcu_date,
                    "date_of_birth": row_data.date_of_birth,
                    "gender": row_data.gender,
                }
            else:
                known = seen[pid]

                # Name must match previous occurrences
                if row_data.name != known["name"]:
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Conflicting patient name for ID '{pid}' in row {index}: "
                        f"'{row_data.name}' != '{known['name']}'",
                    )

                # MCU date must match
                if row_data.mcu_date != known["mcu_date"]:
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Conflicting MCU date for ID '{pid}' in row {index}: "
                        f"'{row_data.mcu_date}' != '{known['mcu_date']}'",
                    )

                # Gender must match
                if row_data.gender != known["gender"]:
                    raise ValueError(
                        ErrorCode.BAD_REQUEST,
                        f"Conflicting gender for ID '{pid}' in row {index}: "
                        f"'{row_data.gender}' != '{known['gender']}'",
                    )

        return results

    @property
    def grouped_patient_data(self) -> dict[str, GroupedGroupPatientData]:
        flat_data = self.flat_patient_data

        grouped: dict[str, BaseGroupPatientData] = {}
        examinations: dict[str, ListOfGroupPatientExaminations] = defaultdict(
            ListOfGroupPatientExaminations
        )

        for row in flat_data:
            key = row.id

            if key not in grouped:
                grouped[key] = BaseGroupPatientData(
                    id=row.id,
                    name=row.name,
                    mcu_date=row.mcu_date,
                    date_of_birth=row.date_of_birth,
                    gender=row.gender,
                )

            examinations[key].append(
                GroupPatientExamination(
                    parameter_group=row.parameter_group,
                    parameter=row.parameter,
                    value=row.value,
                    unit=row.unit,
                )
            )

        results: dict[str, GroupedGroupPatientData] = {}

        for key, base_data in grouped.items():
            results[key] = GroupedGroupPatientData.from_base(
                base=base_data,
                examinations=examinations[key],
            )

        return results


class PDFDocument(Document):
    _content_type: str = PrivateAttr("application/pdf")

    def _validate_content_type(self):
        if self.content_type != self._content_type:
            raise TypeError(f"PDF Document content type must be {self._content_type}")

    @model_validator(mode="after")
    def validate_content_type(self) -> Self:
        self._validate_content_type()
        return self

    @classmethod
    def from_document(cls, document: Document) -> Self:
        pdf_document = cls(
            content=document.content,
            content_type=document.content_type,
            filename=document.filename,
            size=document.size,
        )
        pdf_document._raw = document._raw
        return pdf_document

    @classmethod
    async def from_file(
        cls,
        file: UploadFile,
        *,
        max_size: OptInt = None,
        valid_content_types: SeqOfStrs | str | None = "application/pdf",
        valid_extensions: SeqOfStrs | str | None = ".pdf",
    ) -> Self:
        return await super().from_file(
            file,
            max_size=max_size,
            valid_content_types=valid_content_types,
            valid_extensions=valid_extensions,
        )


OptPDFDocument = PDFDocument | None


AnyDocument = Document | CSVDocument | GroupCSVDocument | PDFDocument
AnyDocumentT = TypeVar("AnyDocumentT", bound=AnyDocument)
OptAnyDocument = AnyDocument | None
OptAnyDocumentT = TypeVar("OptAnyDocumentT", bound=OptAnyDocument)
ListOfAnyDocuments = list[AnyDocument]
ListOfAnyDocumentsT = TypeVar("ListOfAnyDocumentsT", bound=ListOfAnyDocuments)
OptListOfAnyDocuments = ListOfAnyDocuments | None
OptListOfAnyDocumentsT = TypeVar("OptListOfAnyDocumentsT", bound=OptListOfAnyDocuments)


class DocumentMixin(BaseModel, Generic[OptAnyDocumentT]):
    document: Annotated[OptAnyDocumentT, Field(..., description="Document")]


class HeterogenousDocumentsMixin(BaseModel, Generic[OptListOfAnyDocumentsT]):
    documents: Annotated[
        OptListOfAnyDocumentsT, Field(..., description="Documents", min_length=1)
    ]


class HomogenousDocumentsMixin(BaseModel, Generic[AnyDocumentT]):
    documents: Annotated[
        list[AnyDocumentT], Field(..., description="Documents", min_length=1)
    ]


class OptHomogenousDocumentsMixin(BaseModel, Generic[AnyDocumentT]):
    documents: Annotated[
        list[AnyDocumentT] | None, Field(None, description="Documents", min_length=1)
    ] = None


class DocumentName(BaseModel):
    document_name: Annotated[OptStr, Field(None, description="Document's name")] = None


class DocumentURL(BaseModel):
    document_url: Annotated[OptStr, Field(None, description="Document's URL")] = None
