"""
Utilities for serializing results to DataFrames
for analysis, visualization, and persistence.
"""

import gzip
import pickle
import random
from collections.abc import Callable, Iterable

import pandas as pd
from lmwrapper.utils import flatten_dict

from synthegrator.code_problems import CodeProblem
from synthegrator.solution_eval import (
    SolutionEvaluation,
    SolutionMetric,
    TestRunResultList,
)


def problem_to_dict(
    code_problem: CodeProblem,
    encode_transformation_spec: bool = False,
    encode_wd: bool = False,
    include_pickle_gzip: bool = False,
) -> dict:
    """
    Converts a CodeProblem to a dict, for serialization.

    Args:
    ----
        code_problem: The CodeProblem to convert.
        encode_transformation_spec: NOT IMPLEMENTED
        encode_wd: NOT IMPLEMENTED
        include_pickle_gzip: If True, includes a gzip-compressed pickle of the
            CodeProblem in the output dict. This is mostly a stopgap until
            have more complete serialization and deserialization.

    """
    if encode_transformation_spec:
        raise NotImplementedError
    if encode_wd:
        raise NotImplementedError
    out = {
        "problem_id": code_problem.problem_id,
        "dataset_name": code_problem.dataset_name,
    }
    if include_pickle_gzip:
        try:
            out["pickle_gzip"] = gzip.compress(pickle.dumps(code_problem), mtime=0)
        except TypeError as e:
            out["pickle_gzip"] = None
    return out


def _dict_key_prepend(prefix: str, d: dict) -> dict:
    if d is None:
        return {}
    return {prefix + "__" + k: v for k, v in d.items()}


def _prop_to_dict(obj, prop, serializer) -> dict:
    if serializer is None:
        return {}
    return _dict_key_prepend(prop, serializer(getattr(obj, prop)))


def test_results_list_to_dict(test_results_list: TestRunResultList) -> dict:
    prefix = "test_results_list"
    if test_results_list is None:
        return {}
    return {
        f"{prefix}__runtime": test_results_list.runtime,
        f"{prefix}__syntax_check_result": test_results_list.syntax_check_result,
        f"{prefix}__len": len(test_results_list),
        f"{prefix}__collection_error": test_results_list.collection_error,
        f"{prefix}__exec_error": test_results_list.exec_error,
        f"{prefix}__timeout": test_results_list.timeout,
    }


def solution_eval_to_dict(
    solution_eval: SolutionEvaluation,
    test_results_list_to_dict: Callable[[TestRunResultList], dict] | None = test_results_list_to_dict,
    pickle_gzip_whole_solution_eval: bool = False,
) -> dict:
    out = {
        "solution": solution_eval.solution.dict_serialize(),
        **solution_metric_to_dict(solution_eval.main_metric, is_main=True),
    }
    for metric in solution_eval.extra_metrics:
        out.update(solution_metric_to_dict(metric, is_main=False))
    if test_results_list_to_dict is not None:
        out.update(test_results_list_to_dict(solution_eval.test_results))
    if solution_eval.exception is not None:
        out["exception"] = str(solution_eval.exception)
    if pickle_gzip_whole_solution_eval:
        out["pickle_gzip"] = gzip.compress(pickle.dumps(solution_eval), mtime=0)
    return out


def solution_metric_to_dict(
    metric: SolutionMetric,
    is_main: bool,
) -> dict:
    if metric is None:
        return {}
    key = "main_metric" if is_main else metric.label
    out = {
        f"{key}__is_success": metric.is_success,
        f"{key}__float_val": metric.float_val,
    }
    if is_main:
        out[f"{key}__value"] = float(
            metric.is_success if metric.is_success is not None else metric.float_val,
        )
    return out


def solution_evals_to_df(
    solve_evals: Iterable[SolutionEvaluation],
    solver_key: str | None = None,
    solution_eval_to_dict: Callable[[SolutionEvaluation], dict] = solution_eval_to_dict,
    pickle_gzip_whole_solution_eval=False,
) -> pd.DataFrame:
    data = []
    if solver_key is None:
        solver_key = f"unspecified-{hex(random.randint(0, 1000000))}"
    for eval in solve_evals:
        serial = flatten_dict(
            solution_eval_to_dict(
                eval,
                pickle_gzip_whole_solution_eval=pickle_gzip_whole_solution_eval,
            ),
            verify_keys_do_not_have_sep=False,
        )
        serial["solver_key"] = solver_key
        data.append(serial)
    return pd.DataFrame(data)
