"""Utilities for loading and routing OmicVerse project Agent Skills."""
from __future__ import annotations

import logging
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple

# Optional YAML support for robust frontmatter parsing
try:  # pragma: no cover - optional dependency
    import yaml  # type: ignore
    _YAML_AVAILABLE = True
except Exception:  # pragma: no cover - optional dependency
    yaml = None
    _YAML_AVAILABLE = False

logger = logging.getLogger(__name__)


@dataclass
class SkillDefinition:
    """Represents a single Agent Skill discovered on disk.

    name: display title; slug: lowercase-hyphen identifier for routing.
    """

    name: str
    slug: str
    description: str
    path: Path
    body: str
    metadata: Dict[str, str] = field(default_factory=dict)

    def prompt_instructions(self, max_chars: int = 4000) -> str:
        """Return the main instruction body, trimmed if necessary."""

        text = self.body.strip()
        if len(text) <= max_chars:
            return text
        return text[: max_chars - 3].rstrip() + "..."

    @property
    def summary_text(self) -> str:
        """Combine metadata and first section for lightweight scoring."""

        header = f"{self.name}\n{self.description}\n"
        primary_section = self.body.split("\n\n", 1)[0]
        return header + primary_section


@dataclass
class SkillMatch:
    """Represents a routing decision for a query."""

    skill: SkillDefinition
    score: float

    def as_dict(self) -> Dict[str, str]:
        return {
            "name": self.skill.name,  # display title
            "slug": self.skill.slug,
            "score": f"{self.score:.3f}",
            "description": self.skill.description,
            "path": str(self.skill.path),
        }


class SkillRegistry:
    """Loads skills from the filesystem and stores their metadata."""

    def __init__(self, skill_root: Path):
        self.skill_root = skill_root
        self._skills: Dict[str, SkillDefinition] = {}

    @property
    def skills(self) -> Dict[str, SkillDefinition]:
        return self._skills

    def load(self) -> None:
        """Discover every SKILL.md under the configured skill root."""

        if not self.skill_root.exists():
            logger.warning("Skill root %s does not exist; no skills loaded.", self.skill_root)
            self._skills = {}
            return

        discovered: Dict[str, SkillDefinition] = {}
        for skill_file in sorted(self.skill_root.glob("*/SKILL.md")):
            definition = self._parse_skill_file(skill_file)
            if not definition:
                continue
            key = definition.slug.lower()
            if key in discovered:
                logger.warning("Duplicate skill name '%s' found; keeping first occurrence.", definition.name)
                continue
            discovered[key] = definition
            logger.info("Loaded skill '%s' from %s", definition.name, skill_file)
        self._skills = discovered

    def _parse_skill_file(self, skill_file: Path) -> Optional[SkillDefinition]:
        try:
            content = skill_file.read_text(encoding="utf-8")
        except OSError as exc:
            logger.error("Unable to read skill file %s: %s", skill_file, exc)
            return None

        lines = content.splitlines()
        if not lines or lines[0].strip() != "---":
            logger.warning("Skill file %s is missing YAML frontmatter.", skill_file)
            return None

        try:
            closing_index = lines.index("---", 1)
        except ValueError:
            logger.warning("Skill file %s has unterminated YAML frontmatter.", skill_file)
            return None

        frontmatter_lines = lines[1:closing_index]
        metadata = self._parse_frontmatter(frontmatter_lines)
        raw_name = metadata.get("name")
        description = metadata.get("description")
        # Determine display title and slug with backward compatibility
        title = metadata.get("title") or metadata.get("display_title") or raw_name
        slug_value = metadata.get("slug")
        if not slug_value:
            # If name is slug-like, use it; otherwise slugify the title
            slug_value = raw_name if self._looks_like_slug(raw_name) else self._slugify(title)
        if not (title and description and slug_value):
            logger.warning("Skill file %s is missing required title/description/slug metadata.", skill_file)
            return None

        body = "\n".join(lines[closing_index + 1 :]).strip()
        skill_path = skill_file.parent
        return SkillDefinition(name=str(title), slug=str(slug_value), description=str(description), path=skill_path, body=body, metadata=metadata)

    @staticmethod
    def _parse_frontmatter(lines: Iterable[str]) -> Dict[str, str]:
        """Parse YAML frontmatter into a simple string dictionary.

        Prefers yaml.safe_load if PyYAML is available to support multiline
        values and rich YAML constructs. Falls back to a minimal line-based
        parser if PyYAML is not installed.
        """

        # Try robust YAML parsing first
        if _YAML_AVAILABLE:
            text = "\n".join(list(lines))
            try:
                loaded: Optional[Dict[str, Any]] = yaml.safe_load(text)  # type: ignore[attr-defined]
            except Exception as exc:  # pragma: no cover - defensive logging
                logger.warning("Failed to parse YAML frontmatter with PyYAML: %s", exc)
                loaded = None

            if isinstance(loaded, dict):
                # Coerce values to strings for SkillDefinition.metadata type
                result: Dict[str, str] = {}
                for k, v in loaded.items():
                    try:
                        result[str(k)] = v if isinstance(v, str) else ("\n".join(v) if isinstance(v, list) else str(v))
                    except Exception:
                        result[str(k)] = str(v)
                return result

        # Fallback: simple line-based parser (single-line key: value pairs only)
        metadata: Dict[str, str] = {}
        for raw_line in lines:
            line = raw_line.strip()
            if not line or line.startswith("#"):
                continue
            if ":" not in line:
                continue
            key, value = line.split(":", 1)
            metadata[key.strip()] = value.strip().strip('"')
        if not _YAML_AVAILABLE:
            logger.debug("PyYAML not installed; used fallback frontmatter parser.")
        return metadata

    @staticmethod
    def _looks_like_slug(value: Optional[str]) -> bool:
        if not value or not isinstance(value, str):
            return False
        return re.fullmatch(r"[a-z0-9]+(?:-[a-z0-9]+)*", value) is not None

    @staticmethod
    def _slugify(value: Optional[str], max_len: int = 64) -> str:
        if not value:
            return ""
        slug = re.sub(r"[^a-z0-9]+", "-", str(value).lower()).strip("-")
        slug = re.sub(r"-+", "-", slug)
        if len(slug) > max_len:
            slug = slug[:max_len].strip("-")
        return slug


class SkillRouter:
    """Simple keyword-based router that ranks skills for a query."""

    def __init__(self, registry: SkillRegistry, min_score: float = 0.1):
        self.registry = registry
        self.min_score = min_score
        self._skill_vectors: Dict[str, Dict[str, int]] = {}
        self._build_vectors()

    def _build_vectors(self) -> None:
        self._skill_vectors = {
            key: self._token_frequency(definition.summary_text)
            for key, definition in self.registry.skills.items()
        }

    def refresh(self) -> None:
        self._build_vectors()

    def route(self, query: str, top_k: int = 1) -> List[SkillMatch]:
        if not query or not query.strip():
            return []
        query_vector = self._token_frequency(query)
        if not query_vector:
            return []

        scored: List[Tuple[str, float]] = []
        for key, skill_vector in self._skill_vectors.items():
            score = self._cosine_similarity(query_vector, skill_vector)
            scored.append((key, score))

        scored.sort(key=lambda item: item[1], reverse=True)
        matches: List[SkillMatch] = []
        for key, score in scored[:top_k]:
            if score < self.min_score:
                continue
            skill = self.registry.skills.get(key)
            if not skill:
                continue
            matches.append(SkillMatch(skill=skill, score=score))
        return matches

    @staticmethod
    def _token_frequency(text: str) -> Dict[str, int]:
        tokens = re.findall(r"[a-z0-9]+", text.lower())
        freq: Dict[str, int] = {}
        for token in tokens:
            freq[token] = freq.get(token, 0) + 1
        return freq

    @staticmethod
    def _cosine_similarity(vec_a: Dict[str, int], vec_b: Dict[str, int]) -> float:
        if not vec_a or not vec_b:
            return 0.0
        common = set(vec_a.keys()) & set(vec_b.keys())
        numerator = sum(vec_a[token] * vec_b[token] for token in common)
        if numerator == 0:
            return 0.0
        sum_sq_a = sum(value * value for value in vec_a.values())
        sum_sq_b = sum(value * value for value in vec_b.values())
        denominator = (sum_sq_a ** 0.5) * (sum_sq_b ** 0.5)
        if denominator == 0:
            return 0.0
        return numerator / denominator


def build_skill_registry(project_root: Path) -> Optional[SkillRegistry]:
    """Helper to create and load a registry from the project root."""

    skill_root = project_root / ".claude" / "skills"
    registry = SkillRegistry(skill_root=skill_root)
    registry.load()
    if not registry.skills:
        logger.warning("No skills discovered under %s", skill_root)
    return registry


__all__ = [
    "SkillDefinition",
    "SkillMatch",
    "SkillRegistry",
    "SkillRouter",
    "build_skill_registry",
]
