"""Models for SSSOM."""

from __future__ import annotations

import datetime
from typing import Literal, TypeAlias

from curies.vocabulary import unspecified_matching_process
from pydantic import BaseModel, ConfigDict, Field

from .constants import PREDICATE_TYPES

__all__ = [
    "Cardinality",
    "Record",
]

Cardinality: TypeAlias = Literal["1:1", "1:n", "n:1", "1:0", "0:1", "n:n", "0:0"]


class Record(BaseModel):
    """Represents an SSSOM record (i.e., a row in a SSSOM TSV file).

    A SSSOM record contains both the mapping set information and mapping information.
    """

    model_config = ConfigDict(frozen=True)

    record_id: str | None = Field(None)

    subject_id: str = Field(...)
    subject_label: str | None = Field(None)
    subject_category: str | None = Field(None)
    subject_match_field: list[str] | None = Field(None)
    subject_preprocessing: list[str] | None = Field(None)
    subject_source: str | None = Field(None)
    subject_source_version: str | None = Field(None)
    subject_type: str | None = Field(None)

    predicate_id: str = Field(...)
    predicate_label: str | None = Field(None)
    predicate_modifier: Literal["Not"] | None = Field(None)
    predicate_type: str | None = Field(
        None,
        examples=[x.curie for x in PREDICATE_TYPES],
        description="See https://mapping-commons.github.io/sssom/predicate_type/. "
        "Values allowed are from https://mapping-commons.github.io/sssom/EntityTypeEnum/",
    )

    object_id: str = Field(...)
    object_label: str | None = Field(None)
    object_category: str | None = Field(None)
    object_match_field: list[str] | None = Field(None)
    object_preprocessing: list[str] | None = Field(None)
    object_source: str | None = Field(None)
    object_source_version: str | None = Field(None)
    object_type: str | None = Field(
        None,
        description="See https://mapping-commons.github.io/sssom/object_type/. "
        "Values allowed are from https://mapping-commons.github.io/sssom/EntityTypeEnum/",
    )

    mapping_justification: str = Field(unspecified_matching_process.curie)

    author_id: list[str] | None = Field(None)
    author_label: list[str] | None = Field(None)
    creator_id: list[str] | None = Field(None)
    creator_label: list[str] | None = Field(None)
    reviewer_id: list[str] | None = Field(None)
    reviewer_label: list[str] | None = Field(None)

    publication_date: datetime.date | None = Field(None)
    mapping_date: datetime.date | None = Field(None)

    comment: str | None = Field(None)
    confidence: float | None = Field(
        None,
        description="""\
        An assessment of the confidence of the mapping, reported by the method used to generate it.

        This means that confidence values aren't generally comparable, though they should follow
        the rough standard that closer to 1 is more confident and closer to 0 is less confident.

        Most of the lexical mappings already in Biomappings were generated with Gilda.
        Depending on the script, the score therefore refers to either:

        1. The Gilda match score, inspired by https://aclanthology.org/W15-3801/. Section 5.2 of
           the `supplementary material for the Gilda paper <https://doi.org/10.1093/bioadv/vbac034>`_
           describes this score in detail, where 1.0 is best and 0 is worst.
           https://github.com/biopragmatics/biomappings/blob/master/scripts/generate_agrovoc_mappings.py
           is an example that uses this variant.
        2. A high-level estimation of the precision of the scores generated by the given script.
           For example, the CL-MeSH mappings were estimated to be 90% correct, so all the mappings
           generated by
           https://github.com/biopragmatics/biomappings/blob/master/scripts/generate_cl_mesh_mappings.py
           are marked with 0.9 as its score.

        However, other variants are possible. For example, this confidence could reflect the loss
        function if a knowledge graph embedding model was used ot generate a mapping prediction.
        """,
    )
    curation_rule: list[str] | None = Field(None)
    curation_rule_text: list[str] | None = Field(None)
    issue_tracker_item: str | None = Field(None)
    license: str | None = Field(None)

    #: see https://mapping-commons.github.io/sssom/MappingCardinalityEnum/
    mapping_cardinality: Cardinality | None = Field(None)
    cardinality_scope: list[str] | None = Field(None)
    mapping_provider: str | None = Field(None)
    mapping_source: str | None = Field(None)
    mapping_tool: str | None = Field(None)
    mapping_tool_id: str | None = Field(None)
    mapping_tool_version: str | None = Field(None)
    match_string: list[str] | None = Field(None)

    other: str | None = Field(None)
    see_also: list[str] | None = Field(None)
    similarity_measure: str | None = Field(None)
    similarity_score: float | None = Field(None)
