"""
ChalkML SCAFFOLD Engine
=======================
Mathematical data generation with distributional guarantees.

Core Concepts:
- Mathematical Sequences: Primes, Fibonacci, geometric, arithmetic
- Equation Systems: Linear, parametric, constraint-based
- Probabilistic: Distributions with exact probability mass
- Graph-Based: Hierarchical relationships
- Time-Series: Temporal patterns with frequency control

SCAFFOLD = Structure + Constraints + Algorithms + Formula + Functional + Operations + Logic + Data
"""

import json
import hashlib
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Any, Set, Callable
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from scipy.stats import norm, uniform, expon, lognorm, gamma
from itertools import combinations, product
import sympy as sp


class MathematicalSequences:
    """
    Generate mathematical sequences with provable properties.
    """
    
    @staticmethod
    def primes(limit: int = 1000) -> List[int]:
        """
        Sieve of Eratosthenes for prime generation.
        Complexity: O(n log log n)
        """
        if limit < 2:
            return []
        
        sieve = [True] * (limit + 1)
        sieve[0] = sieve[1] = False
        
        for i in range(2, int(limit**0.5) + 1):
            if sieve[i]:
                for j in range(i*i, limit + 1, i):
                    sieve[j] = False
        
        return [i for i in range(limit + 1) if sieve[i]]
    
    @staticmethod
    def fibonacci(n: int) -> List[int]:
        """
        Generate first n Fibonacci numbers.
        """
        if n <= 0:
            return []
        if n == 1:
            return [1]
        
        fib = [1, 1]
        for i in range(2, n):
            fib.append(fib[-1] + fib[-2])
        return fib
    
    @staticmethod
    def geometric(start: float, ratio: float, n: int) -> List[float]:
        """
        Geometric sequence: a, ar, ar², ar³, ...
        """
        return [start * (ratio ** i) for i in range(n)]
    
    @staticmethod
    def arithmetic(start: float, diff: float, n: int) -> List[float]:
        """
        Arithmetic sequence: a, a+d, a+2d, a+3d, ...
        """
        return [start + i * diff for i in range(n)]
    
    @staticmethod
    def lucas(n: int) -> List[int]:
        """
        Lucas numbers: L(n) = L(n-1) + L(n-2), L(0)=2, L(1)=1
        """
        if n <= 0:
            return []
        if n == 1:
            return [2]
        
        lucas = [2, 1]
        for i in range(2, n):
            lucas.append(lucas[-1] + lucas[-2])
        return lucas
    
    @staticmethod
    def catalan(n: int) -> List[int]:
        """
        Catalan numbers: C(n) = (2n)! / ((n+1)! * n!)
        """
        catalan = [1]
        for i in range(1, n):
            catalan.append(catalan[-1] * 2 * (2*i - 1) // (i + 1))
        return catalan


class DistributionGenerator:
    """
    Generate data from probability distributions with exact properties.
    """
    
    @staticmethod
    def normal(mean: float, std: float, size: int, seed: Optional[int] = None) -> np.ndarray:
        """Normal (Gaussian) distribution."""
        if seed is not None:
            np.random.seed(seed)
        return norm.rvs(loc=mean, scale=std, size=size)
    
    @staticmethod
    def uniform_dist(low: float, high: float, size: int, seed: Optional[int] = None) -> np.ndarray:
        """Uniform distribution."""
        if seed is not None:
            np.random.seed(seed)
        return uniform.rvs(loc=low, scale=high-low, size=size)
    
    @staticmethod
    def exponential(scale: float, size: int, seed: Optional[int] = None) -> np.ndarray:
        """Exponential distribution."""
        if seed is not None:
            np.random.seed(seed)
        return expon.rvs(scale=scale, size=size)
    
    @staticmethod
    def lognormal(mean: float, sigma: float, size: int, seed: Optional[int] = None) -> np.ndarray:
        """Log-normal distribution."""
        if seed is not None:
            np.random.seed(seed)
        return lognorm.rvs(s=sigma, scale=np.exp(mean), size=size)
    
    @staticmethod
    def categorical(categories: Dict[str, float], size: int, seed: Optional[int] = None) -> np.ndarray:
        """
        Categorical distribution with exact probabilities.
        
        Args:
            categories: Dict of {value: probability}
            size: Number of samples
        """
        if seed is not None:
            np.random.seed(seed)
        
        values = list(categories.keys())
        probs = list(categories.values())
        
        # Normalize probabilities
        total = sum(probs)
        probs = [p/total for p in probs]
        
        return np.random.choice(values, size=size, p=probs)
    
    @staticmethod
    def gamma_dist(shape: float, scale: float, size: int, seed: Optional[int] = None) -> np.ndarray:
        """Gamma distribution."""
        if seed is not None:
            np.random.seed(seed)
        return gamma.rvs(a=shape, scale=scale, size=size)


class EquationSolver:
    """
    Solve systems of equations and generate constrained data.
    """
    
    @staticmethod
    def linear_equation(variables: List[str], equation: str, n_samples: int) -> pd.DataFrame:
        """
        Generate data satisfying linear equation.
        
        Example: "x + y = 10" with n_samples=100
        """
        # Parse equation using sympy
        symbols = {var: sp.Symbol(var) for var in variables}
        
        try:
            lhs, rhs = equation.split('=')
            lhs_expr = sp.sympify(lhs.strip(), locals=symbols)
            rhs_expr = sp.sympify(rhs.strip(), locals=symbols)
            eq = sp.Eq(lhs_expr, rhs_expr)
            
            # Generate data
            data = {}
            
            # Solve for last variable in terms of others
            target_var = symbols[variables[-1]]
            solution = sp.solve(eq, target_var)
            
            if solution:
                # Generate random values for all but last variable
                for var in variables[:-1]:
                    data[var] = np.random.uniform(-100, 100, n_samples)
                
                # Compute last variable from equation
                last_vals = []
                for i in range(n_samples):
                    subs = {symbols[var]: data[var][i] for var in variables[:-1]}
                    val = float(solution[0].subs(subs))
                    last_vals.append(val)
                
                data[variables[-1]] = last_vals
                
                return pd.DataFrame(data)
            
        except Exception as e:
            raise ValueError(f"Could not solve equation: {equation}. Error: {str(e)}")
    
    @staticmethod
    def system_of_equations(variables: List[str], equations: List[str]) -> pd.DataFrame:
        """
        Solve system of equations.
        
        Example: ["x+y=10", "y+z=15", "x+z=13"]
        """
        symbols_dict = {var: sp.Symbol(var) for var in variables}
        eqs = []
        
        for eq_str in equations:
            try:
                lhs, rhs = eq_str.split('=')
                lhs_expr = sp.sympify(lhs.strip(), locals=symbols_dict)
                rhs_expr = sp.sympify(rhs.strip(), locals=symbols_dict)
                eqs.append(sp.Eq(lhs_expr, rhs_expr))
            except Exception as e:
                raise ValueError(f"Could not parse equation: {eq_str}")
        
        # Solve system
        solution = sp.solve(eqs, list(symbols_dict.values()))
        
        if solution:
            data = {}
            for var in variables:
                val = float(solution[symbols_dict[var]])
                data[var] = [val]  # Single solution
            
            return pd.DataFrame(data)
        else:
            raise ValueError("System has no solution or infinite solutions")
    
    @staticmethod
    def parametric_equations(
        variables: List[str],
        equations: List[str],
        param: str,
        param_range: Tuple[float, float, float]
    ) -> pd.DataFrame:
        """
        Generate data from parametric equations.
        
        Example: x=t*cos(t), y=t*sin(t), t from 0 to 100 step 0.1
        """
        t_symbol = sp.Symbol(param)
        symbols_dict = {var: sp.Symbol(var) for var in variables}
        symbols_dict[param] = t_symbol
        
        # Parse equations
        expressions = []
        for eq_str in equations:
            try:
                var, expr_str = eq_str.split('=')
                var = var.strip()
                expr = sp.sympify(expr_str.strip(), locals=symbols_dict)
                expressions.append((var, expr))
            except Exception as e:
                raise ValueError(f"Could not parse equation: {eq_str}")
        
        # Generate parameter values
        start, end, step = param_range
        param_vals = np.arange(start, end, step)
        
        # Compute values
        data = {}
        for var, expr in expressions:
            vals = []
            for t_val in param_vals:
                val = float(expr.subs(t_symbol, t_val))
                vals.append(val)
            data[var] = vals
        
        return pd.DataFrame(data)


class ScaffoldEngine:
    """
    High-level engine for mathematical data generation.
    """
    
    def __init__(self, workspace_path: Optional[str] = None):
        if workspace_path is None:
            workspace_path = Path.cwd()
        self.workspace_path = Path(workspace_path)
        self.chalkml_dir = self.workspace_path / ".chalkml"
        self.scaffold_dir = self.chalkml_dir / "scaffold_reports"
        self.scaffold_dir.mkdir(parents=True, exist_ok=True)
        
        self.sequences = MathematicalSequences()
        self.distributions = DistributionGenerator()
        self.equations = EquationSolver()
    
    def generate_sequence(
        self,
        sequence_type: str,
        n: int,
        **kwargs
    ) -> List[Any]:
        """
        Generate mathematical sequence.
        
        Args:
            sequence_type: 'primes', 'fibonacci', 'geometric', 'arithmetic', 'lucas', 'catalan'
            n: Number of elements
        """
        if sequence_type == 'primes':
            limit = kwargs.get('limit', n * 20)  # Estimate
            primes = self.sequences.primes(limit)
            return primes[:n]
        
        elif sequence_type == 'fibonacci':
            return self.sequences.fibonacci(n)
        
        elif sequence_type == 'geometric':
            start = kwargs.get('start', 1.0)
            ratio = kwargs.get('ratio', 2.0)
            return self.sequences.geometric(start, ratio, n)
        
        elif sequence_type == 'arithmetic':
            start = kwargs.get('start', 0.0)
            diff = kwargs.get('diff', 1.0)
            return self.sequences.arithmetic(start, diff, n)
        
        elif sequence_type == 'lucas':
            return self.sequences.lucas(n)
        
        elif sequence_type == 'catalan':
            return self.sequences.catalan(n)
        
        else:
            raise ValueError(f"Unknown sequence type: {sequence_type}")
    
    def scaffold_file(
        self,
        output_file: str,
        method: str,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate scaffolded data and save to file.
        
        Methods:
            - 'sequence': Mathematical sequences
            - 'distribution': Probability distributions
            - 'equation': Equation-based generation
            - 'combinatorial': Combinations/permutations
            - 'timeseries': Time-based sequences
        """
        if method == 'sequence':
            seq_type = kwargs.get('sequence_type', 'arithmetic')
            n = kwargs.get('count', 100)
            scale = kwargs.get('scale', 1.0)
            
            seq = self.generate_sequence(seq_type, n, **kwargs)
            seq_scaled = [x * scale for x in seq]
            
            df = pd.DataFrame({
                'value': seq_scaled,
                'index': range(len(seq_scaled))
            })
        
        elif method == 'distribution':
            dist_type = kwargs.get('dist_type', 'normal')
            size = kwargs.get('count', 100)
            seed = kwargs.get('seed', None)
            
            if dist_type == 'normal':
                mean = kwargs.get('mean', 0)
                std = kwargs.get('std', 1)
                values = self.distributions.normal(mean, std, size, seed)
            
            elif dist_type == 'uniform':
                low = kwargs.get('low', 0)
                high = kwargs.get('high', 1)
                values = self.distributions.uniform_dist(low, high, size, seed)
            
            elif dist_type == 'categorical':
                categories = kwargs.get('categories', {'A': 0.5, 'B': 0.3, 'C': 0.2})
                values = self.distributions.categorical(categories, size, seed)
            
            else:
                raise ValueError(f"Unknown distribution: {dist_type}")
            
            df = pd.DataFrame({'value': values})
        
        elif method == 'timeseries':
            start = kwargs.get('start', '2024-01-01')
            freq = kwargs.get('freq', '1D')
            count = kwargs.get('count', 365)
            
            dates = pd.date_range(start=start, periods=count, freq=freq)
            df = pd.DataFrame({'timestamp': dates})
        
        else:
            raise ValueError(f"Unknown method: {method}")
        
        # Save
        df.to_csv(output_file, index=False)
        
        # Create report
        report_path = self.scaffold_dir / f"scaffold_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(report_path, 'w') as f:
            json.dump({
                "output_file": output_file,
                "method": method,
                "parameters": kwargs,
                "rows_generated": len(df),
                "timestamp": datetime.now().isoformat()
            }, f, indent=2)
        
        return {
            "success": True,
            "output_file": output_file,
            "rows": len(df),
            "method": method,
            "report_path": str(report_path)
        }


def get_scaffold_engine(workspace_path: Optional[str] = None) -> ScaffoldEngine:
    """Factory function to get ScaffoldEngine instance."""
    return ScaffoldEngine(workspace_path=workspace_path)
