from abc import ABC, abstractmethod
from typing import Dict, Type, Optional
import uuid
from datetime import datetime
from ..utils.timer import timer
from ..engines.base import BaseEngine
from importlib.metadata import version, PackageNotFoundError

class BaseBenchmark(ABC):
    """
    Abstract base class for defining benchmarks. This class provides a structure for implementing benchmarks
    with a specific engine and scenario, and includes functionality for timing and saving results.

    Attributes
    ----------
    BENCHMARK_IMPL_REGISTRY : Dict[Type, Type]
        A registry for engines that the benchmark supports. If the engine requires a specific implementation
        that doesn't use the engines existing methods, the dictionary will map engines to the specific implementation
        class rather than. If only shared methods are used, the dictionary value will be None.
    engine : object
        The engine used to execute the benchmark.
    scenario_name : str
        The name of the scenario being benchmarked.
    result_abfss_path : Optional[str]
        The path where benchmark results will be saved, if `save_results` is True.
    save_results : bool
        Flag indicating whether to save benchmark results to a Delta table.
    header_detail_dict : dict
        A dictionary containing metadata about the benchmark run, including run ID, datetime, engine type,
        benchmark name, scenario name, total cores, and compute size.
    timer : object
        A timer object used to measure the duration of benchmark phases.
    results : list
        A list to store benchmark results.
        
    Methods
    -------
    run()
        Abstract method that must be implemented by subclasses to define the benchmark logic.
    post_results()
        Processes and saves benchmark results. If `save_results` is True, results are appended to a Delta table
        at the specified `result_abfss_path`. Clears the timer results after processing.
    """
    BENCHMARK_IMPL_REGISTRY: Dict[Type[BaseEngine], Type] = {}
    RESULT_SCHEMA = [
        ('run_id', 'STRING'),
        ('run_datetime', 'TIMESTAMP'),
        ('lakebench_version', 'STRING'),
        ('engine', 'STRING'),
        ('engine_version', 'STRING'),
        ('benchmark', 'STRING'),
        ('benchmark_version', 'STRING'),
        ('scale_factor', 'INT'),
        ('scenario', 'STRING'),
        ('total_cores', 'SMALLINT'),
        ('compute_size', 'STRING'),
        ('phase', 'STRING'),
        ('test_item', 'STRING'),
        ('start_datetime', 'TIMESTAMP'),
        ('duration_ms', 'INT'),
        ('estimated_job_cost', 'DECIMAL(18,10)'),
        ('iteration', 'TINYINT'),
        ('success', 'BOOLEAN'),
        ('error_message', 'STRING'),
        ('engine_metadata', 'MAP<STRING, STRING>')
    ]
    VERSION = ''

    def __init__(self, engine, scenario_name: str, result_abfss_path: Optional[str], save_results: bool = False):
        self.engine = engine
        self.scenario_name = scenario_name
        self.result_abfss_path = result_abfss_path
        self.save_results = save_results

        self.header_detail_dict = {
            'run_id': str(uuid.uuid1()),
            'run_datetime': datetime.now(),
            'lakebench_version': version('lakebench'),
            'engine': type(engine).__name__,
            'engine_version': self.engine.version,
            'benchmark': self.__class__.__name__,
            'benchmark_version': self.VERSION,
            'scale_factor': getattr(self, 'scale_factor', None),
            'scenario': scenario_name,
            'total_cores': self.engine.get_total_cores(),
            'compute_size': self.engine.get_compute_size()
        }
        self.timer = timer
        self.timer.clear_results()
        self.results = []

    @classmethod
    def register_engine(cls, engine_class: Type[BaseEngine], benchmark_impl: Optional[Type] = None):
        """
        Registers a custom engine class and its corresponding benchmark implementation.

        Parameters
        ----------
        engine_class : Type[BaseEngine]
            The engine class to register.
        benchmark_impl : Type[BaseBenchmark], optional
            The benchmark implementation class for the engine. If None, the engine's default methods will be used.
        """
        cls.BENCHMARK_IMPL_REGISTRY[engine_class] = benchmark_impl

    @abstractmethod
    def run(self):
        pass

    def post_results(self):
        """
        Processes and posts benchmark results, saving them to a specified location if save_results is True.
        This method collects timing results from the benchmark execution, formats them into a 
        structured array, and optionally saves the results to a Delta table. It also clears the timer 
        instance after offloading results to the `self.results` attribute.

        Parameters
        ----------
        None
        
        Notes
        -----
        - If `save_results` is True, the results are appended to the Delta table specified by 
          `result_abfss_path` using the `engine.append_array_to_delta` method.
        - After processing, the results are stored in `self.results` and the timer results are cleared.
        
        Examples
        --------
        >>> benchmark = Benchmark()
        >>> benchmark.post_results()
        # Processes the results and saves them if `save_results` is True.
        # post_results() should be called after each major benchmark phase.
        """

        result_array = [
            {
                **self.header_detail_dict,
                'phase': phase,
                'test_item': test_item,
                'start_datetime': start_datetime,
                'duration_ms': duration_ms,
                'estimated_job_cost': self.engine.get_job_cost(duration_ms), 
                'iteration': iteration,
                'success': success,
                'error_message': error_message
            }
            for phase, test_item, start_datetime, duration_ms, iteration, success, error_message in self.timer.results
        ]
        self.results.extend(result_array)

        if self.save_results:
            if self.result_abfss_path is None:
                raise ValueError("result_abfss_path must be provided if save_results is True.")
            else:
                try:
                    self.engine._append_results_to_delta(self.result_abfss_path, result_array, self.RESULT_SCHEMA)
                except Exception as e:
                    raise e
                finally:
                    self.timer.clear_results()
