"""
Take a gitlab template with inline yaml and split it up into yaml and shell
commands. Useful for project initialization.

Fixes:
 - Support shredding a *file* or an entire *folder* tree
 - Force --out to be a *directory* (scripts live next to output YAML)
 - Script refs are made *relative to the YAML file* (e.g., "./script.sh")
 - Any YAML ``!reference [...]`` items in scripts are emitted as *bash comments*
 - Logging prints *paths relative to CWD* to reduce noise
"""

from __future__ import annotations

import io
import logging
import re
from collections.abc import Iterable
from pathlib import Path
from typing import Any

from ruamel.yaml import YAML
from ruamel.yaml.comments import TaggedScalar
from ruamel.yaml.scalarstring import FoldedScalarString

from bash2gitlab.utils.mock_ci_vars import generate_mock_ci_variables_script
from bash2gitlab.utils.pathlib_polyfills import is_relative_to
from bash2gitlab.utils.yaml_factory import get_yaml

logger = logging.getLogger(__name__)

SHEBANG = "#!/bin/bash"

__all__ = [
    "run_shred_gitlab_file",
    "run_shred_gitlab_tree",
    # Back-compat alias (old name processed a single file)
    "run_shred_gitlab",
]


# --- helpers -----------------------------------------------------------------


def rel(p: Path) -> str:
    """Return the path relative to CWD when possible for quieter logs."""
    try:
        return str(p.resolve().relative_to(Path.cwd()))
    except Exception:
        return str(p)


def dump_inline_no_doc_markers(yaml: YAML, node: Any) -> str:
    buf = io.StringIO()
    prev_start, prev_end = yaml.explicit_start, yaml.explicit_end
    try:
        yaml.explicit_start = False
        yaml.explicit_end = False
        yaml.dump(node, buf)
    finally:
        yaml.explicit_start, yaml.explicit_end = prev_start, prev_end
    return buf.getvalue().rstrip("\n")


def create_script_filename(job_name: str, script_key: str) -> str:
    """Create a standardized, safe filename for a script.

    For the main 'script' key, just use the job name. For others, append the key.
    """
    sanitized_job_name = re.sub(r"[^\w.-]", "-", job_name.lower())
    sanitized_job_name = re.sub(r"-+", "-", sanitized_job_name).strip("-")
    return f"{sanitized_job_name}.sh" if script_key == "script" else f"{sanitized_job_name}_{script_key}.sh"


def bashify_script_items(script_content: list[str | Any] | str, yaml: YAML) -> list[str]:
    """Convert YAML items from a script block into bash lines.

    - Strings are kept as-is.
    - Other YAML nodes are dumped to text with no doc markers.
    - ``!reference [...]`` turns into a bash comment line so the intent isn't lost.
    - Empty/whitespace lines are dropped.
    """
    raw_lines: list[str] = []

    if isinstance(script_content, str):
        raw_lines.extend(script_content.splitlines())
    else:
        for item in script_content:  # ruamel CommentedSeq-like or list
            if isinstance(item, str):
                raw_lines.append(item)
            elif isinstance(item, TaggedScalar) and str(item.tag).endswith("reference"):
                dumped = dump_inline_no_doc_markers(yaml, item)
                raw_lines.append(f"# {dumped}")
            elif item is not None:
                dumped = dump_inline_no_doc_markers(yaml, item)
                # If the dump still contains an explicit !reference tag, comment it out
                if dumped.lstrip().startswith("!reference"):
                    raw_lines.append(f"# {dumped}")
                else:
                    raw_lines.append(dumped)

    # Filter empties
    return [ln for ln in (ln if isinstance(ln, str) else str(ln) for ln in raw_lines) if ln and ln.strip()]


# --- shredders ---------------------------------------------------------------


def shred_variables_block(
    variables_data: dict,
    base_name: str,
    scripts_output_path: Path,
    *,
    dry_run: bool = False,
) -> str | None:
    """Extract variables dict into a ``.sh`` file of ``export`` statements.

    Returns the filename (not full path) of the created variables script, or ``None``.
    """
    if not variables_data or not isinstance(variables_data, dict):
        return None

    variable_lines: list[str] = []
    for key, value in variables_data.items():
        value_str = str(value).replace('"', '\\"')
        variable_lines.append(f'export {key}="{value_str}"')

    if not variable_lines:
        return None

    script_filename = f"{base_name}_variables.sh"
    script_filepath = scripts_output_path / script_filename
    full_script_content = "\n".join(variable_lines) + "\n"

    logger.info("Shredding variables for '%s' to '%s'", base_name, rel(script_filepath))

    if not dry_run:
        script_filepath.parent.mkdir(parents=True, exist_ok=True)
        script_filepath.write_text(full_script_content, encoding="utf-8")
        script_filepath.chmod(0o755)

    return script_filename


def shred_script_block(
    *,
    script_content: list[str | Any] | str,
    job_name: str,
    script_key: str,
    scripts_output_path: Path,
    yaml_dir: Path,
    dry_run: bool = False,
    global_vars_filename: str | None = None,
    job_vars_filename: str | None = None,
) -> tuple[str | None, str | None]:
    """Extract a script block into a ``.sh`` file and return (script_path, bash_command).

    The generated bash command will reference the script *relative to the YAML file*.
    """
    if not script_content:
        return None, None

    yaml = get_yaml()

    script_lines = bashify_script_items(script_content, yaml)
    if not script_lines:
        logger.debug("Skipping empty script block in job '%s' for key '%s'.", job_name, script_key)
        return None, None

    script_filename = create_script_filename(job_name, script_key)
    script_filepath = scripts_output_path / script_filename

    # Build header with conditional sourcing for local execution
    header_parts: list[str] = [SHEBANG]
    sourcing_block: list[str] = []
    if global_vars_filename:
        sourcing_block.append(f"  . ./{global_vars_filename}")
    if job_vars_filename:
        sourcing_block.append(f"  . ./{job_vars_filename}")

    if sourcing_block:
        header_parts.append('\nif [[ "${CI:-}" == "" ]]; then')
        header_parts.extend(sourcing_block)
        header_parts.append("fi")

    script_header = "\n".join(header_parts)
    full_script_content = f"{script_header}\n\n" + "\n".join(script_lines) + "\n"

    logger.info("Shredding script from '%s:%s' to '%s'", job_name, script_key, rel(script_filepath))

    if not dry_run:
        script_filepath.parent.mkdir(parents=True, exist_ok=True)
        script_filepath.write_text(full_script_content, encoding="utf-8")
        script_filepath.chmod(0o755)

    # Compute bash command relative to YAML
    base = yaml_dir.resolve()
    target = script_filepath.resolve()
    relative_path = target.relative_to(base) if is_relative_to(target, base) else Path(script_filename)

    # Normalize to posix for YAML
    rel_str = str(relative_path).replace("\\", "/")
    if not rel_str.startswith(".") and "/" not in rel_str:
        rel_str = f"./{rel_str}"
    elif not rel_str.startswith("."):
        rel_str = "./" + rel_str

    return str(script_filepath), rel_str


def process_shred_job(
    *,
    job_name: str,
    job_data: dict,
    scripts_output_path: Path,
    yaml_dir: Path,
    dry_run: bool = False,
    global_vars_filename: str | None = None,
) -> int:
    """Process a single job definition to shred its script and variables blocks."""
    shredded_count = 0

    # Job-specific variables first
    job_vars_filename: str | None = None
    if isinstance(job_data.get("variables"), dict):
        sanitized_job_name = re.sub(r"[^\w.-]", "-", job_name.lower())
        sanitized_job_name = re.sub(r"-+", "-", sanitized_job_name).strip("-")
        job_vars_filename = shred_variables_block(
            job_data["variables"], sanitized_job_name, scripts_output_path, dry_run=dry_run
        )
        if job_vars_filename:
            shredded_count += 1

    # Script-like keys to shred
    for key in ("script", "before_script", "after_script", "pre_get_sources_script"):
        if key in job_data and job_data[key]:
            _, command = shred_script_block(
                script_content=job_data[key],
                job_name=job_name,
                script_key=key,
                scripts_output_path=scripts_output_path,
                yaml_dir=yaml_dir,
                dry_run=dry_run,
                global_vars_filename=global_vars_filename,
                job_vars_filename=job_vars_filename,
            )
            if command:
                job_data[key] = FoldedScalarString(command.replace("\\", "/"))
                shredded_count += 1
    return shredded_count


# --- public entry points -----------------------------------------------------


def iterate_yaml_files(root: Path) -> Iterable[Path]:
    for path in root.rglob("*.yml"):
        yield path
    for path in root.rglob("*.yaml"):
        yield path


def run_shred_gitlab_file(
    *,
    input_yaml_path: Path,
    output_dir: Path,
    dry_run: bool = False,
) -> tuple[int, int, Path]:
    """Shred a *single* GitLab CI YAML file into scripts + modified YAML in *output_dir*.

    Returns (jobs_processed, total_files_created, output_yaml_path).
    """
    if not input_yaml_path.is_file():
        raise FileNotFoundError(f"Input YAML file not found: {input_yaml_path}")

    output_dir = output_dir.resolve()
    output_dir.mkdir(parents=True, exist_ok=True)  # force directory

    yaml = get_yaml()
    yaml.indent(mapping=2, sequence=4, offset=2)

    logger.info("Loading GitLab CI configuration from: %s", rel(input_yaml_path))
    data = yaml.load(input_yaml_path)

    # Layout: write YAML and scripts side-by-side under output_dir[/subdirs]
    output_yaml_path = output_dir / input_yaml_path.name
    scripts_dir = output_yaml_path.parent
    yaml_dir = output_yaml_path.parent

    jobs_processed = 0
    total_files_created = 0

    # Top-level variables -> global_variables.sh next to YAML
    global_vars_filename: str | None = None
    if isinstance(data.get("variables"), dict):
        logger.info("Processing global variables block.")
        global_vars_filename = shred_variables_block(data["variables"], "global", scripts_dir, dry_run=dry_run)
        if global_vars_filename:
            total_files_created += 1

    # Jobs
    for key, value in data.items():
        if isinstance(value, dict) and "script" in value:
            logger.debug("Processing job: %s", key)
            jobs_processed += 1
            total_files_created += process_shred_job(
                job_name=key,
                job_data=value,
                scripts_output_path=scripts_dir,
                yaml_dir=yaml_dir,
                dry_run=dry_run,
                global_vars_filename=global_vars_filename,
            )

    if total_files_created > 0:
        logger.info("Shredded %s file(s) from %s job(s).", total_files_created, jobs_processed)
        if not dry_run:
            logger.info("Writing modified YAML to: %s", rel(output_yaml_path))
            output_yaml_path.parent.mkdir(parents=True, exist_ok=True)
            with output_yaml_path.open("w", encoding="utf-8") as f:
                yaml.dump(data, f)
    else:
        logger.info("No script or variable blocks found to shred.")

    if not dry_run:
        output_yaml_path.parent.mkdir(exist_ok=True)
        generate_mock_ci_variables_script(str(output_yaml_path.parent / "mock_ci_variables.sh"))

    return jobs_processed, total_files_created, output_yaml_path


def run_shred_gitlab_tree(
    *,
    input_root: Path,
    output_dir: Path,
    dry_run: bool = False,
) -> tuple[int, int, int]:
    """Shred *all* ``*.yml`` / ``*.yaml`` under ``input_root`` into ``output_dir``.

    The relative directory structure under ``input_root`` is preserved in ``output_dir``.

    Returns (yaml_files_processed, total_jobs_processed, total_files_created).
    """
    if not input_root.is_dir():
        raise FileNotFoundError(f"Input folder not found: {input_root}")

    yaml_files_processed = 0
    total_jobs = 0
    total_created = 0

    for in_file in iterate_yaml_files(input_root):
        rel_dir = in_file.parent.relative_to(input_root)
        out_subdir = (output_dir / rel_dir).resolve()
        jobs, created, _ = run_shred_gitlab_file(input_yaml_path=in_file, output_dir=out_subdir, dry_run=dry_run)
        yaml_files_processed += 1
        total_jobs += jobs
        total_created += created

    return yaml_files_processed, total_jobs, total_created


# Back-compat alias (old API name) – keep single-file semantics
run_shred_gitlab = run_shred_gitlab_file
