"""
Core logic for manifest and data integrity verification using a two-phase audit.
"""

from pathlib import Path

from retrocast.io.provenance import calculate_file_hash
from retrocast.models.provenance import Manifest, VerificationReport

# Directories that contain primary artifacts (not generated by the workflow)
PRIMARY_ARTIFACT_DIRS = {"1-benchmarks", "2-raw"}


def _build_provenance_graph(start_path: Path, root_dir: Path, report: VerificationReport) -> dict[Path, Manifest]:
    """Recursively discover and load all manifests in the dependency graph."""
    graph: dict[Path, Manifest] = {}
    queue = [start_path]
    visited: set[Path] = set()

    report.add("INFO", start_path, "Graph Discovery", category="header")

    while queue:
        manifest_path = queue.pop(0)
        if manifest_path in visited:
            continue
        visited.add(manifest_path)

        relative_path = manifest_path.relative_to(root_dir)
        if not manifest_path.exists():
            report.add("FAIL", relative_path, "Manifest file in dependency chain is MISSING.", category="graph")
            continue

        try:
            with open(manifest_path, encoding="utf-8") as f:
                manifest = Manifest.model_validate_json(f.read())
            graph[relative_path] = manifest
            report.add("PASS", relative_path, f"Loaded manifest for action '{manifest.action}'.", category="graph")

            for source_file in manifest.source_files:
                source_path = Path(source_file.path)
                is_primary = any(part in PRIMARY_ARTIFACT_DIRS for part in source_path.parts)
                if not is_primary:
                    parent_manifest_path = root_dir / source_path.parent / "manifest.json"
                    if parent_manifest_path not in visited:
                        report.add(
                            "INFO",
                            source_path,
                            "Source is a generated artifact, adding its manifest to the queue.",
                            category="graph",
                        )
                        queue.append(parent_manifest_path)
        except Exception as e:
            report.add("FAIL", relative_path, f"Failed to load or parse manifest: {e}", category="graph")

    return graph


def _verify_logical_chain(graph: dict[Path, Manifest], report: VerificationReport) -> None:
    """Phase 1: Check for hash consistency between parent and child manifests."""
    report.add("INFO", report.manifest_path, "Phase 1 - Verifying manifest chain consistency", category="header")

    for child_path, child_manifest in graph.items():
        report.add(
            "INFO", child_path, f"Inspecting links for manifest '{child_manifest.action}'...", category="context"
        )
        if not child_manifest.source_files:
            continue

        for source_file in child_manifest.source_files:
            source_path = Path(source_file.path)
            is_primary = any(part in PRIMARY_ARTIFACT_DIRS for part in source_path.parts)

            if is_primary:
                # This is just a statement of fact, no promise.
                report.add("PASS", source_path, "Source is a primary artifact.", category="phase1")
                continue

            parent_manifest_path = source_path.parent / "manifest.json"
            if parent_manifest_path not in graph:
                report.add(
                    "WARN",
                    source_path,
                    f"Parent manifest '{parent_manifest_path}' not found; cannot verify link.",
                    category="phase1",
                )
                continue

            parent_manifest = graph[parent_manifest_path]
            parent_output_info = next(
                (out for out in parent_manifest.output_files if out.path == source_file.path), None
            )

            if not parent_output_info:
                report.add(
                    "FAIL",
                    source_path,
                    f"Provenance broken. Not declared as output in parent manifest ('{parent_manifest.action}').",
                    category="phase1",
                )
            elif parent_output_info.file_hash != source_file.file_hash:
                report.add(
                    "FAIL",
                    source_path,
                    "Provenance broken. Hash mismatch between parent and child manifests.",
                    category="phase1",
                )
            else:
                report.add(
                    "PASS",
                    source_path,
                    f"Link to parent manifest ('{parent_manifest.action}') is consistent.",
                    category="phase1",
                )


def _verify_physical_integrity(graph: dict[Path, Manifest], root_dir: Path, report: VerificationReport) -> None:
    """Phase 2: Verify ALL files mentioned in the graph against the disk."""
    report.add("INFO", report.manifest_path, "Phase 2 - Verifying on-disk file integrity", category="header")

    # 1. Build a canonical map of every file to its expected hash.
    # Outputs are the source of truth; sources are secondary.
    expected_hashes: dict[Path, str] = {}
    for manifest in graph.values():
        for f in manifest.output_files:
            expected_hashes[Path(f.path)] = f.file_hash
    for manifest in graph.values():
        for f in manifest.source_files:
            path = Path(f.path)
            if path not in expected_hashes:  # Only add if it's not a generated output
                expected_hashes[path] = f.file_hash

    # 2. Iterate and check every file against the disk.
    for relative_path, expected_hash in sorted(expected_hashes.items()):
        absolute_path = root_dir / relative_path

        if not absolute_path.exists():
            report.add("FAIL", relative_path, "File is MISSING from disk.", category="phase2")
            continue

        actual_hash = calculate_file_hash(absolute_path)
        if actual_hash != expected_hash:
            report.add("FAIL", relative_path, "HASH MISMATCH (Disk vs. Manifest).", category="phase2")
        else:
            report.add("PASS", relative_path, "On-disk file hash matches manifest record.", category="phase2")


def verify_manifest(manifest_path: Path, root_dir: Path, deep: bool = False) -> VerificationReport:
    """
    Verifies the integrity and lineage of an artifact via its manifest.
    """
    report = VerificationReport(manifest_path=manifest_path.relative_to(root_dir))

    if not deep:
        # Perform a simple, shallow verification if not deep
        try:
            with open(manifest_path, encoding="utf-8") as f:
                manifest = Manifest.model_validate_json(f.read())
            # A shallow check is just phase 2 on a single manifest
            _verify_physical_integrity({report.manifest_path: manifest}, root_dir, report)
        except Exception as e:
            report.add("FAIL", report.manifest_path, f"Failed to load manifest: {e}", category="phase2")
        return report

    # --- Deep Verification Starts Here ---

    # 1. Build the full dependency graph of all manifests.
    provenance_graph = _build_provenance_graph(manifest_path, root_dir, report)
    if not report.is_valid:
        report.add("FAIL", report.manifest_path, "Could not build provenance graph, aborting.", category="graph")
        return report
    report.add(
        "PASS",
        report.manifest_path,
        f"Successfully built provenance graph with {len(provenance_graph)} manifests.",
        category="graph",
    )

    # 2. Phase 1: Verify the logical consistency of the entire graph.
    _verify_logical_chain(provenance_graph, report)
    if not report.is_valid:
        report.add(
            "FAIL",
            report.manifest_path,
            "Logical chain verification failed, aborting physical check.",
            category="phase1",
        )
        return report

    # 3. Phase 2: Verify the physical integrity of all files in the graph.
    _verify_physical_integrity(provenance_graph, root_dir, report)

    return report
