import platform
import warnings
from collections.abc import Iterator
from pathlib import Path

import pandas as pd

from synthegrator.code_problem_builders import make_simple_line_edit_problem
from synthegrator.code_problems import (
    CodeProblem,
    DiscoveredTestsuite,
)
from synthegrator.environments import CodeExecutionEnvironment
from synthegrator.lang_specs.lang_spec_java import JavaLangSpec
from synthegrator.sandboxing import Cmd, DockerExecutionContext, ExecLimits
from synthegrator.transformation_spec import (
    StsEditable,
    StsSelectorLineSpan,
    StsSpecStatement,
    TransformationSpec,
)
from synthegrator.util import (
    MultilineString,
)

_defects4j_root = Path(__file__).parent / "defects4j_data"


def _default_projects_root() -> Path:
    directory = _defects4j_root / "Defects4J_projects_clean"
    if not directory.exists():
        if (_defects4j_root / "Defects4J_projects_clean.zip").exists():
            # Unzip the projects
            # TODO eventually just read directly from the zip file
            import zipfile

            with zipfile.ZipFile(
                _defects4j_root / "Defects4J_projects_clean.zip", "r"
            ) as zip_ref:
                zip_ref.extractall(_defects4j_root)
    assert directory.exists()
    return directory


def _make_docker_execution_context():
    dockerfile = (_defects4j_root / "Dockerfile").read_text()
    is_arm = "arm" in platform.machine().lower()
    if is_arm:
        dockerfile.replace(
            "openjdk-amd64",
            "openjdk-arm64",
        )
    return DockerExecutionContext(
        image_name="synthegrator_defects4j",
        dockerfile=dockerfile,
        build_pre_cmds=Cmd.from_cmds_str_mix_tuple(
            f"cp -r {_defects4j_root}/Defects4J_projects_clean .",
            f"cp {_defects4j_root}/dataset.py .",
            f"cp {_defects4j_root}/defects4j.build.xml .",
            f"cp {_defects4j_root}/Defects4J_oneLiner_metadata.csv .",
        ),
        default_limits=ExecLimits(
            timeout_cpu_s=60 * 10,
            timeout_realtime_s=60 * 20,
            networking_allowed=False,
        ),
    )


DEFECTS_4_J_DOCKER_ENV = _make_docker_execution_context()


def _figure_out_known_solution_for_defects4j(
    project_name: str,
    problem_id: str,
    file_name: str,
    line_no: int,
):
    # Diff the two paths
    # Render the problem markup so that way we know get a mark_id for the node
    # assert just one diff line
    # add a solve step for the whatever the mark_id is and new line value
    # (I can help explain this a bit better)
    solution_path = (
        _default_projects_root() / f"{project_name}_{problem_id}_fixed" / file_name
    )
    return MultilineString(solution_path.read_text())[line_no]


INSTRUCTIONS_ONE_LINE = (
    "There is a single line bug in this java code. " "Your task is to repair the bug."
)


def make_problem_for_defect4j(path, line_no):
    problem = CodeProblem(
        instructions=(
            "There is a single line bug in this java code. "
            "Your task is to repair the bug."
        ),
        # instructable_metadata={
        # InstructableMetadataKeys.commit_message: "",  # commit_message
        # },
        transformation_spec=_make_transformation_spec_single_line(path, line_no),
        environment=CodeExecutionEnvironment(docker_env=DEFECTS_4_J_DOCKER_ENV),
    )
    problem.known_solutions = _figure_out_known_solution_for_defects4j()
    return problem


def _make_transformation_spec_single_line(path, line_no):
    return TransformationSpec(
        statements=(
            StsSpecStatement(
                path,
                StsSelectorLineSpan(line_no - 1, line_no),
                StsEditable(current_text_visible=True, lang_spec_name="java"),
            ),
        ),
    )


def make_discovered_test_case(
    project_name: str,
    problem_id: str,
    file_path: str,
    is_buggy: bool = True,  # Run the buggy version by default since that's the one we'll patch
):
    # s1 = "ls /usr/lib/jvm/java-8-openjdk-arm64"
    # s2 = "ls /usr/lib/jvm/java-8-openjdk-amd64"
    # assert s1 == s2
    return DiscoveredTestsuite(
        cmds=[
            Cmd(
                f"cd /defects4j/Defects4J_projects/{project_name}_{problem_id}{'' if is_buggy else '_fixed'} &&"
                f" rm /defects4j/Defects4J_projects/{project_name}_{problem_id}{'' if is_buggy else '_fixed'}/{file_path} &&"
                " cp /sandbox/solution.java"
                f" /defects4j/Defects4J_projects/{project_name}_{problem_id}{'' if is_buggy else '_fixed'}/{file_path} &&"
                " defects4j test && cp TESTS-TestSuites.xml"
                " /sandbox/TESTS-TestSuites.xml",
                files=(
                    {
                        "name": "defects4j.build.xml",
                        "content": (
                            _defects4j_root / "defects4j.build.xml"
                        ).read_bytes(),
                    },
                ),
            ),
            # Cmd(
            #     f"cd /defects4j/Defects4J_projects/{project_name}_{problem_id}{'' if is_buggy else '_fixed'} && defects4j test"
            # ),
            # Cmd(f"ls /defects4j/Defects4J_projects/Chart_1_fixed"),
            # Cmd(
            #    f"cp /sandbox/defects4j.build.xml /defects4j/Defects4J_projects/Chart_1_fixed/defects4j.build.xml"
            #    f" && cd /defects4j/Defects4J_projects/Chart_1_fixed && defects4j test",
            #    files=(
            #        {
            #            "name": "defects4j.build.xml",
            #            "content": (
            #                _defects4j_root / "defects4j.build.xml"
            #            ).read_bytes(),
            #        },
            #    ),
            # ),
            # Cmd("ls -lha /usr/lib/jvm"),
            # Cmd("ls /usr/lib/jvm/java-8-openjdk-arm64"),
            # Cmd("ls /usr/lib/jvm/java-8-openjdk-amd64"),
            # Cmd("echo $JAVA_HOME"),
            Cmd("cat TESTS-TestSuites.xml"),
        ],
    )


def _get_deepest_path(path: Path, file_only: bool = False) -> Path:
    """
    Returns the deepest path that is a child of the given path.

    If the path is a file, returns the path itself.
    If the path is a directory, returns the deepest path (file or directory) that is a child of the given path,
    depending on the 'file_only' flag.

    Raises  FileNotFoundError if the path does not exist.

    Args:
    ----
        path (Path): The path to search.
        file_only (bool, optional): If True, only consider files in determining the deepest path.
            If False, considers both files and directories. Defaults to False.

    """
    if not path.exists():
        msg = f"The path {path} does not exist."
        raise FileNotFoundError(msg)

    if path.is_file():
        return path

    deepest_path = None
    max_depth = -1

    for current_path in path.rglob("*"):
        if file_only and not current_path.is_file():
            continue  # Skip directories if we're only looking for files

        depth = len(current_path.relative_to(path).parts)
        if depth > max_depth:
            max_depth = depth
            deepest_path = current_path

    if deepest_path is None:
        msg = "No suitable deepest path found under the given directory."
        raise FileNotFoundError(
            msg,
        )

    return deepest_path


def produce_defects4j_problem(
    project_name: str,
    problem_id: str,
    file_name: str,
    line_no: int,
    projects_root: Path | None = None,
) -> CodeProblem:
    projects_root = projects_root or _default_projects_root()

    file_path = Path(f"{project_name}_{problem_id}") / file_name
    source_path = projects_root / file_path
    if not source_path.exists():
        msg = f"Could not find the buggy source file for {source_path}"
        raise Exception(msg)

    source_file = MultilineString(source_path.read_text())

    # source_file[line_no] = "<buggy>" + source_file[line_no].rstrip() + "</buggy>\n"

    known_solution = _figure_out_known_solution_for_defects4j(
        project_name,
        problem_id,
        file_name,
        line_no,
    )

    invisible_spans = []
    print(line_no)

    if line_no > 0:
        invisible_spans.append((0, max(line_no - 10, 1)))

    if line_no < len(source_file) - 1:
        invisible_spans.append(
            (min(len(source_file) - 1, line_no + 10), len(source_file)),
        )

    test_case = make_discovered_test_case(
        project_name,
        problem_id,
        file_name,
        is_buggy=True,
    )

    return make_simple_line_edit_problem(
        code=str(source_file),
        invisible_spans=invisible_spans,
        test_cases=[test_case],
        edit_lines_span=(line_no, line_no + 1),
        current_text_visible=True,
        target_path="solution.java",  # str(Path("/defects4j/Defects4J_projects") / file_path),
        known_solutions=[known_solution],
        lang_spec=JavaLangSpec(),
        dataset_name="defects4j",
        problem_id=problem_id,
        docker_env=DEFECTS_4_J_DOCKER_ENV,
        override_instructions=INSTRUCTIONS_ONE_LINE,
        cap_to_max_same_num_lines=True,
    )


def yield_defects4j(one_line: bool = True) -> Iterator[CodeProblem]:
    warnings.warn(
        "There might be problems with the docker setup currently for defects4j"
    )
    if not one_line:
        raise NotImplementedError
    df = pd.read_csv(
        _defects4j_root / "Defects4J_oneLiner_metadata.csv",
        header=None,
        names=["Project", "ID", "File_Path", "LOC"],
    )

    for _, row in df.iterrows():
        project_name = str(row["Project"])
        problem_id = str(row["ID"])
        file_path = str(row["File_Path"])
        line_no = int(row["LOC"]) - 1

        problem = produce_defects4j_problem(
            project_name=project_name,
            problem_id=problem_id,
            file_name=file_path,
            line_no=line_no,
        )

        if problem:
            yield problem
