"""Haupt-Flow für MongoDB → Solr Synchronisation."""

from __future__ import annotations

import time
from typing import Any, Dict, List

from prefect import flow, get_run_logger
from prefect.artifacts import create_markdown_artifact
from prefect.client.orchestration import get_client
from prefect.task_runners import ConcurrentTaskRunner
from prefect.utilities.context import get_flow_run_id
from prefect_aws.s3 import S3Bucket

from mongodbsolr.tasks.mongodb import (
    check_indexes,
    fetch_documents_cursor,
    process_document_batch,
    extract_ids_from_documents,
)
from mongodbsolr.tasks.solr import (
    delete_by_query,
    upsert_batch,
    delete_by_ids,
    commit_solr,
)
from mongodbsolr.utils.config import (
    load_config_from_variable,
    validate_config,
    resolve_config,
)


# Maximale ID-Anzahl pro Solr-Delete-Batch im Delete-Only-Modus, um zu große Solr-Queries zu vermeiden
MAX_DELETE_ONLY_SOLR_BATCH_SIZE = 500


@flow(
    name="MongoDB → Solr Sync",
    flow_run_name="mongodb-solr-sync-{config_var}",
    log_prints=True,
    task_runner=ConcurrentTaskRunner(),
    result_storage="s3-bucket/prefect-results",
)
def mongodb_to_solr_sync(
    *,
    config_var: str,
    mode_override: str | None = None,
    explain_indexes: bool = False,
) -> Dict[str, Any]:
    """Synchronisiert Daten von MongoDB nach Solr.

    Args:
        config_var: Name der Prefect Variable mit Konfiguration
        mode_override: Optionaler Modus-Override (überschreibt Config)
        explain_indexes: Führt optional Explain zur Index-Nutzung aus

    Returns:
        Dict mit Statistiken

    Raises:
        ValueError: Bei ungültiger Konfiguration
        Exception: Bei Verbindungs- oder Verarbeitungsfehlern
    """
    import asyncio

    logger = get_run_logger()
    start_time = time.time()

    # Hilfsfunktion fr kooperativen Abbruch: prft Flow-Run-State beim Orchestrator
    async def _is_cancelling() -> bool:
        async with get_client() as client:
            flow_run = await client.read_flow_run(flow_run_id)
            if not flow_run.state:
                return False
            state_type = str(flow_run.state.type).upper()
            return state_type in {"CANCELLING", "CANCELLED"}

    # Konfiguration laden
    logger.info(f"Lade Konfiguration aus Variable: {config_var}")
    config = asyncio.run(load_config_from_variable(config_var))

    # Konfiguration validieren
    validate_config(config)

    # Secret Blocks auflösen (async mit aload() in Prefect 3)
    config = asyncio.run(resolve_config(config))

    # Modus bestimmen
    mode = mode_override or config.get("mode", "upsert")
    logger.info(f"Modus: {mode}")

    # Konfiguration extrahieren
    mongodb_cfg = config["mongodb"]
    solr_cfg = config["solr"]
    mapping_cfg = config["mapping"]

    mongodb_uri = mongodb_cfg["uri"]
    mongodb_db = mongodb_cfg["database"]
    mongodb_coll = mongodb_cfg["collection"]
    mongodb_query = mongodb_cfg["query"]
    batch_size = mongodb_cfg.get("batch_size", 100)

    # Batch-Größe ggf. für Delete-Only-Modus begrenzen, um zu große Solr-Queries zu vermeiden
    if mode == "delete-only" and batch_size > MAX_DELETE_ONLY_SOLR_BATCH_SIZE:
        logger.warning(
            f"Batch-Größe für Delete-Only von {batch_size} auf {MAX_DELETE_ONLY_SOLR_BATCH_SIZE} begrenzt, "
            "um Solr-Queries mit zu vielen IDs zu vermeiden."
        )
        batch_size = MAX_DELETE_ONLY_SOLR_BATCH_SIZE

    solr_url = solr_cfg["url"]
    solr_core = solr_cfg["core"]
    commit_within = solr_cfg.get("commit_within", 5000)
    max_concurrent_upserts = solr_cfg.get("max_concurrent_upserts", 2)
    final_commit = solr_cfg.get("finalcommit", False)

    mongodb_id_field = mapping_cfg["mongodb_id_field"]
    solr_id_field = mapping_cfg["solr_id_field"]
    exclude_fields = mapping_cfg.get("exclude_fields", [])
    required_fields = mapping_cfg.get("required_fields", [])

    delete_query = config.get("delete_query")

    # Index-Check durchführen
    logger.info("Prüfe MongoDB-Explain, wenn aktiviert...")
    index_result = check_indexes(
        uri=mongodb_uri,
        database=mongodb_db,
        collection=mongodb_coll,
        query_json=mongodb_query,
        explain=explain_indexes,
    )
    logger.info(f"Index-Analyse Ergebnis: {index_result['status']}")

    # Statistiken initialisieren
    stats = {
        "mode": mode,
        "total_documents": 0,
        "processed_documents": 0,
        "skipped_documents": 0,
        "deleted_documents": 0,
        "duration_seconds": 0.0,
        "index_check": index_result,
    }

    skipped_sample: List[Dict[str, Any]] = []
    example_docs: List[Dict[str, Any]] = []

    # S3-Pfad fr cbersprungene Dokumente auf Basis der Flow-Run-ID
    flow_run_id = get_flow_run_id()
    if flow_run_id is None:
        import uuid

        flow_run_id = f"local-{uuid.uuid4()}"
    else:
        flow_run_id = str(flow_run_id)

    skipped_docs_s3_prefix: str | None = f"mongodbsolr/skipped/{flow_run_id}"

    # S3-Block fr Listen/JSON-Daten (z.B. cbersprungene Dokumente)
    default_s3_bucket = S3Bucket.load("prefect-default")

    try:
        # Modus: Delete-Before-Import
        if mode == "delete-before-import":
            if not delete_query:
                raise ValueError("Modus 'delete-before-import' erfordert 'delete_query'")

            logger.info(f"Führe Delete-by-Query aus: {delete_query}")
            deleted = delete_by_query(
                solr_url=solr_url,
                core=solr_core,
                query=delete_query,
                commit=True,
            )
            stats["deleted_documents"] = deleted
            logger.info(f"Delete-by-Query abgeschlossen: {deleted} Dokumente")

        # Modus: Delete-Only
        if mode == "delete-only":
            logger.info("Modus: Delete-Only - Extrahiere IDs aus MongoDB")

            # MongoDB-Dokumente abrufen und IDs pro Batch löschen
            for batch in fetch_documents_cursor(
                uri=mongodb_uri,
                database=mongodb_db,
                collection=mongodb_coll,
                query_json=mongodb_query,
                batch_size=batch_size,
                required_fields=required_fields,
                exclude_fields=exclude_fields,
            ):
                # Kooperativer Cancel-Check für Delete-Only
                if asyncio.run(_is_cancelling()):
                    logger.warning(
                        "Flow-Run befindet sich im Zustand 'Cancelling' – breche Delete-Only ab."
                    )
                    break

                stats["total_documents"] += len(batch)

                # IDs extrahieren
                batch_ids = extract_ids_from_documents(
                    documents=batch,
                    mongodb_id_field=mongodb_id_field,
                    solr_id_field=solr_id_field,
                )
                if not batch_ids:
                    continue

                deleted = delete_by_ids(
                    solr_url=solr_url,
                    core=solr_core,
                    ids=batch_ids,
                    id_field=solr_id_field,
                    batch_size=batch_size,
                    commit=False,
                )
                stats["deleted_documents"] += deleted

            logger.info(
                f"Delete-Only abgeschlossen: {stats['deleted_documents']} Dokumente gelöscht"
            )

            # Finalen Commit ausführen, falls Löschungen erfolgt sind und gewünscht
            if final_commit and stats["deleted_documents"]:
                commit_solr(solr_url=solr_url, core=solr_core)

        # Modus: Upsert oder Delete-Before-Import (nach Delete)
        if mode in ["upsert", "delete-before-import"]:
            logger.info("Starte Upsert-Verarbeitung")

            batch_index = 0
            inflight_upserts: list = []

            # MongoDB-Dokumente abrufen und verarbeiten
            for batch in fetch_documents_cursor(
                uri=mongodb_uri,
                database=mongodb_db,
                collection=mongodb_coll,
                query_json=mongodb_query,
                batch_size=batch_size,
                required_fields=required_fields,
                exclude_fields=exclude_fields,
            ):
                # Kooperativer Cancel-Check ff Upserts
                if asyncio.run(_is_cancelling()):
                    logger.warning(
                        "Flow-Run befindet sich im Zustand 'Cancelling' f breche Upsert-Verarbeitung ab."
                    )
                    break

                stats["total_documents"] += len(batch)
                batch_index += 1

                # Batch verarbeiten (Validierung, Filterung, ID-Mapping)
                valid_docs, skipped_docs = process_document_batch(
                    documents=batch,
                    required_fields=required_fields,
                    exclude_fields=exclude_fields,
                    mongodb_id_field=mongodb_id_field,
                    solr_id_field=solr_id_field,
                )

                stats["processed_documents"] += len(valid_docs)
                stats["skipped_documents"] += len(skipped_docs)

                # Übersprungene Dokumente: Sample + S3-Export
                if skipped_docs:
                    if len(skipped_sample) < 100:
                        remaining = 100 - len(skipped_sample)
                        skipped_sample.extend(skipped_docs[:remaining])

                    _write_skipped_docs_batch_to_s3(
                        s3_bucket=default_s3_bucket,
                        prefix=skipped_docs_s3_prefix,
                        batch_index=batch_index,
                        docs=skipped_docs,
                    )

                # Beispiel-Dokumente sammeln (erste 5)
                if len(example_docs) < 5:
                    remaining = 5 - len(example_docs)
                    example_docs.extend(valid_docs[:remaining])

                # Upsert in Solr mit begrenzter Parallelität
                if valid_docs:
                    future = upsert_batch.with_options(
                        tags=["solr", "upsert", "solr-upsert"]
                    ).submit(
                        solr_url=solr_url,
                        core=solr_core,
                        documents=valid_docs,
                        commit_within=commit_within,
                    )
                    inflight_upserts.append(future)

                    # Fenster für parallele Upserts begrenzen
                    if len(inflight_upserts) >= max_concurrent_upserts:
                        finished = inflight_upserts.pop(0)
                        upserted = finished.result()
                        logger.info(f"Batch in Solr geschrieben (parallel): {upserted} Dokumente")

            # Ausstehende Upserts abschließen
            for future in inflight_upserts:
                upserted = future.result()
                logger.info(f"Batch in Solr geschrieben (restliche Futures): {upserted} Dokumente")

            # Finaler Commit (optional, da commit_within verwendet wird)
            if final_commit:
                logger.info("Führe finalen Commit aus")
                commit_solr(solr_url=solr_url, core=solr_core)

        # Statistiken finalisieren
        stats["duration_seconds"] = round(time.time() - start_time, 2)

        # Artifacts erstellen
        _create_artifacts(stats, skipped_sample, example_docs, skipped_docs_s3_prefix)

        logger.info(f"Sync abgeschlossen: {stats}")
        return stats

    except Exception as e:
        logger.error(f"Fehler bei Sync: {e}")
        raise


def _write_skipped_docs_batch_to_s3(
    s3_bucket: S3Bucket,
    prefix: str,
    batch_index: int,
    docs: List[Dict[str, Any]],
) -> None:
    """Schreibt übersprungene Dokumente als JSONL-Batch in den gegebenen S3-Bucket."""
    if not docs:
        return

    import json

    key = f"{prefix}/batch-{batch_index:06d}.jsonl"
    lines = [json.dumps(doc, ensure_ascii=False) for doc in docs]
    content = "\n".join(lines).encode("utf-8")
    s3_bucket.write_path(path=key, content=content)


def _create_artifacts(
    stats: Dict[str, Any],
    skipped_sample: List[Dict[str, Any]],
    example_docs: List[Dict[str, Any]],
    skipped_docs_s3_prefix: str | None = None,
) -> None:
    """Erstellt Prefect Artifacts für Statistiken und Beispiele.

    Args:
        stats: Statistiken
        skipped_sample: Stichprobe übersprungener Dokumente
        example_docs: Beispiel-Dokumente
        skipped_docs_s3_prefix: Optionaler S3-Pfad für alle übersprungenen Dokumente
    """
    import json

    # 1. Sync-Statistiken
    stats_md = f"""# MongoDB → Solr Sync: Statistiken

## Zusammenfassung
- **Modus**: {stats["mode"]}
- **Dauer**: {stats["duration_seconds"]}s
- **Gesamt**: {stats["total_documents"]} Dokumente
- **Verarbeitet**: {stats["processed_documents"]} Dokumente
- **Übersprungen**: {stats["skipped_documents"]} Dokumente
- **Gelöscht**: {stats["deleted_documents"]} Dokumente
"""
    if skipped_docs_s3_prefix and stats["skipped_documents"]:
        stats_md += f"- **S3-Pfad für übersprungene Dokumente**: `{skipped_docs_s3_prefix}`\n"

    stats_md += f"""
## Details
```json
{json.dumps(stats, indent=2)}
```
"""
    create_markdown_artifact(
        key="sync-statistics",
        markdown=stats_md,
        description="Sync-Statistiken",
    )

    # 2. Übersprungene Dokumente (Sample)
    if skipped_sample:
        skipped_md = f"""# Übersprungene Dokumente (Sample)

**Anzahl im Sample**: {len(skipped_sample)}
**Gesamt übersprungen**: {stats["skipped_documents"]}

## Details
```json
{json.dumps(skipped_sample[:100], indent=2)}
```

*Hinweis: Maximal 100 Einträge aus dem Sample angezeigt*
"""
        create_markdown_artifact(
            key="skipped-documents",
            markdown=skipped_md,
            description=f"{len(skipped_sample)} übersprungene Dokumente (Sample)",
        )

    # 3. Beispiel-Dokumente
    if example_docs:
        examples_md = f"""# Beispiel-Dokumente (nach Mapping)

**Anzahl**: {len(example_docs)}

```json
{json.dumps(example_docs, indent=2)}
```
"""
        create_markdown_artifact(
            key="example-documents",
            markdown=examples_md,
            description=f"{len(example_docs)} Beispiel-Dokumente",
        )
