"""
ChalkML Knowledge Graph - Hierarchical Data Classes
Smart randomization with realistic, context-aware data generation

Philosophy: Like BMW knows how parts relate, ChalkML knows how data relates
"""

from typing import Dict, List, Any, Optional
import random
from faker import Faker
import numpy as np

fake = Faker()

class KnowledgeGraph:
    """
    Hierarchical knowledge graph for intelligent data generation.
    
    Supports domains:
    - person (names, demographics, contact)
    - location (countries, cities, addresses)
    - education (universities, majors, courses)
    - business (companies, industries, roles)
    - biology (taxonomy, species)
    - finance (transactions, accounts)
    """
    
    def __init__(self):
        self.graph = self._build_graph()
        self.fake = Faker()
        
        # Country -> City mappings
        self.location_hierarchy = {
            'USA': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia'],
            'UK': ['London', 'Manchester', 'Birmingham', 'Liverpool', 'Leeds'],
            'Canada': ['Toronto', 'Montreal', 'Vancouver', 'Calgary', 'Ottawa'],
            'Australia': ['Sydney', 'Melbourne', 'Brisbane', 'Perth', 'Adelaide'],
            'Germany': ['Berlin', 'Munich', 'Hamburg', 'Frankfurt', 'Cologne'],
            'France': ['Paris', 'Marseille', 'Lyon', 'Toulouse', 'Nice'],
            'Japan': ['Tokyo', 'Osaka', 'Kyoto', 'Yokohama', 'Nagoya'],
            'China': ['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Chengdu'],
        }
        
        # University -> Major mappings
        self.education_hierarchy = {
            'MIT': ['Computer Science', 'Electrical Engineering', 'Physics', 'Mathematics', 'Mechanical Engineering'],
            'Stanford': ['Computer Science', 'Business', 'Engineering', 'Medicine', 'Law'],
            'Harvard': ['Law', 'Business', 'Medicine', 'Economics', 'Political Science'],
            'Cambridge': ['Mathematics', 'Physics', 'Engineering', 'Medicine', 'Law'],
            'Oxford': ['Philosophy', 'Law', 'Medicine', 'History', 'English Literature'],
        }
        
    def _build_graph(self) -> Dict[str, Any]:
        """Build hierarchical knowledge structure"""
        return {
            'person': {
                'name': {
                    'first_name': lambda: fake.first_name(),
                    'last_name': lambda: fake.last_name(),
                    'full_name': lambda: fake.name(),
                },
                'demographics': {
                    'age': lambda: random.randint(18, 80),
                    'gender': lambda: random.choice(['Male', 'Female', 'Non-binary']),
                    'ethnicity': lambda: random.choice(['Caucasian', 'African', 'Asian', 'Hispanic', 'Other']),
                },
                'contact': {
                    'email': lambda: fake.email(),
                    'phone': lambda: fake.phone_number(),
                    'address': lambda: fake.address().replace('\n', ', '),
                },
            },
            'location': {
                'country': lambda: random.choice(list(self.location_hierarchy.keys())),
                'city': None,  # Context-dependent
                'zip_code': lambda: fake.zipcode(),
                'latitude': lambda: fake.latitude(),
                'longitude': lambda: fake.longitude(),
            },
            'education': {
                'university': {
                    'name': lambda: random.choice(list(self.education_hierarchy.keys())),
                },
                'major': None,  # Context-dependent
                'degree': lambda: random.choice(['Bachelor', 'Master', 'PhD', 'Associate']),
                'gpa': lambda: round(random.uniform(2.0, 4.0), 2),
                'graduation_year': lambda: random.randint(2000, 2024),
            },
            'business': {
                'company_name': lambda: fake.company(),
                'industry': lambda: random.choice(['Technology', 'Finance', 'Healthcare', 'Retail', 'Manufacturing']),
                'job_title': lambda: fake.job(),
                'salary': lambda: random.randint(30000, 250000),
                'experience_years': lambda: random.randint(0, 40),
            },
            'biology': {
                'mammal': {
                    'carnivore': lambda: random.choice(['Lion', 'Tiger', 'Wolf', 'Bear', 'Leopard']),
                    'herbivore': lambda: random.choice(['Elephant', 'Deer', 'Cow', 'Horse', 'Giraffe']),
                    'omnivore': lambda: random.choice(['Bear', 'Pig', 'Human', 'Raccoon', 'Chimpanzee']),
                },
            },
            'finance': {
                'transaction_type': lambda: random.choice(['Purchase', 'Refund', 'Transfer', 'Deposit', 'Withdrawal']),
                'currency': lambda: random.choice(['USD', 'EUR', 'GBP', 'JPY', 'CAD']),
                'amount': lambda: round(random.uniform(1.0, 10000.0), 2),
                'account_number': lambda: fake.bban(),
            },
            'temporal': {
                'date': lambda: fake.date_between(start_date='-10y', end_date='today').strftime('%Y-%m-%d'),
                'datetime': lambda: fake.date_time_between(start_date='-5y', end_date='now').strftime('%Y-%m-%d %H:%M:%S'),
                'time': lambda: fake.time(),
                'timestamp': lambda: int(fake.unix_time()),
            }
        }
    
    def generate(self, class_path: str, context: Optional[Dict] = None) -> Any:
        """
        Generate data for a class path.
        
        Args:
            class_path: e.g. "person.name.first_name" or "location.city"
            context: Context dictionary for dependent generation
        
        Returns:
            Generated value
        """
        parts = class_path.split('.')
        
        # Navigate knowledge graph
        current = self.graph
        for part in parts:
            if part not in current:
                raise ValueError(f"Unknown class path: {class_path}")
            current = current[part]
        
        # Context-aware generation
        if class_path == 'location.city' and context and 'country' in context:
            country = context['country']
            if country in self.location_hierarchy:
                return random.choice(self.location_hierarchy[country])
            return fake.city()
        
        if class_path == 'education.major' and context and 'university' in context:
            university = context['university']
            if university in self.education_hierarchy:
                return random.choice(self.education_hierarchy[university])
            return random.choice(['Computer Science', 'Business', 'Engineering', 'Liberal Arts'])
        
        # Generate value
        if callable(current):
            return current()
        elif current is None:
            # Context-required but not provided
            if 'city' in class_path:
                return fake.city()
            if 'major' in class_path:
                return 'Computer Science'
        
        raise ValueError(f"Cannot generate value for: {class_path}")
    
    def generate_distribution(
        self, 
        class_path: str, 
        count: int, 
        distribution: str = 'uniform',
        **kwargs
    ) -> List[Any]:
        """
        Generate multiple values with specified distribution.
        
        Args:
            class_path: Class path to generate
            count: Number of values
            distribution: 'uniform', 'normal', 'beta', etc.
            **kwargs: Distribution parameters
        
        Returns:
            List of generated values
        """
        if distribution == 'uniform':
            return [self.generate(class_path) for _ in range(count)]
        
        elif distribution == 'normal':
            # For numerical classes, use normal distribution
            if 'age' in class_path:
                mean = kwargs.get('mean', 35)
                std = kwargs.get('std', 12)
                ages = np.random.normal(mean, std, count)
                return [max(18, min(80, int(age))) for age in ages]
            
            elif 'gpa' in class_path:
                mean = kwargs.get('mean', 3.2)
                std = kwargs.get('std', 0.5)
                gpas = np.random.normal(mean, std, count)
                return [max(0.0, min(4.0, round(gpa, 2))) for gpa in gpas]
        
        # Default to uniform
        return [self.generate(class_path) for _ in range(count)]
    
    def get_class_info(self, class_path: str) -> Dict[str, Any]:
        """Get information about a class"""
        parts = class_path.split('.')
        current = self.graph
        
        for part in parts:
            if isinstance(current, dict) and part in current:
                current = current[part]
            else:
                return {'exists': False}
        
        return {
            'exists': True,
            'type': 'callable' if callable(current) else 'nested',
            'requires_context': current is None,
        }


# Global singleton
_knowledge_graph: Optional[KnowledgeGraph] = None

def get_knowledge_graph() -> KnowledgeGraph:
    """Get or create singleton knowledge graph instance"""
    global _knowledge_graph
    if _knowledge_graph is None:
        _knowledge_graph = KnowledgeGraph()
    return _knowledge_graph
