# createsonline/ai/fields.py
"""
CREATESONLINE AI Field Types - COMPLETE INTERNAL IMPLEMENTATION

Intelligent field types that automatically compute values using AI models,
generate content with LLMs, and store vector embeddings for similarity search.
"""
import hashlib
import json
import re
import math
import random
from datetime import datetime
from typing import Any, Dict, List, Optional

# Internal fallbacks for external dependencies
try:
    import sqlalchemy as sa
    from sqlalchemy import Column, Float, Integer, String, Text, JSON
    from sqlalchemy.types import TypeDecorator, UserDefinedType
    SA_AVAILABLE = True
except ImportError:
    SA_AVAILABLE = False
    # Create mock SQLAlchemy components
    class MockSQLAlchemy:
        Float = "Float"
        Integer = "Integer"
        String = lambda x: f"String({x})"
        Text = "Text"
        JSON = "JSON"
        Column = lambda *args, **kwargs: f"Column({args}, {kwargs})"
    sa = MockSQLAlchemy()

# ========================================
# INTERNAL AI SERVICES
# ========================================

class InternalAIEngine:
    """Pure Python AI engine with mock/basic implementations"""
    
    def __init__(self):
        self.cache = {}
        self.models = {}
    
    def hash_text(self, text: str) -> str:
        """Generate consistent hash for text"""
        return hashlib.md5(text.encode()).hexdigest()
    
    def generate_embedding(self, text: str, dimensions: int = 768) -> List[float]:
        """Generate consistent mock embedding from text"""
        hash_val = self.hash_text(text)
        
        # Use hash to seed deterministic "embedding"
        embedding = []
        for i in range(dimensions):
            # Create pseudo-random but deterministic values
            seed_char = hash_val[i % len(hash_val)]
            seed_value = ord(seed_char) / 255.0  # Normalize to [0,1]
            
            # Apply some math to spread values
            value = math.sin(seed_value * math.pi * 2) * 0.5
            embedding.append(value)
        
        return embedding
    
    def similarity(self, vec1: List[float], vec2: List[float]) -> float:
        """Calculate cosine similarity"""
        try:
            # Dot product
            dot_product = sum(a * b for a, b in zip(vec1, vec2))
            
            # Magnitudes
            mag1 = math.sqrt(sum(a * a for a in vec1))
            mag2 = math.sqrt(sum(b * b for b in vec2))
            
            if mag1 == 0 or mag2 == 0:
                return 0.0
            
            return dot_product / (mag1 * mag2)
        except:
            return 0.0
    
    def generate_text(self, prompt: str, max_tokens: int = 100) -> str:
        """Generate mock text based on prompt"""
        prompt_words = prompt.lower().split()
        
        # Simple rule-based generation
        if any(word in prompt_words for word in ['summary', 'summarize']):
            return f"Summary: {prompt[:50]}... (Generated by CREATESONLINE AI)"
        elif any(word in prompt_words for word in ['title', 'headline']):
            return f"Title: {prompt.split()[-3:]} - AI Generated"
        elif any(word in prompt_words for word in ['description', 'explain']):
            return f"This is an AI-generated description based on: {prompt[:40]}..."
        else:
            return f"AI Response: Generated content for '{prompt[:30]}...' using CREATESONLINE"
    
    def classify_text(self, text: str, categories: List[str] = None) -> Dict[str, float]:
        """Basic text classification"""
        if not categories:
            categories = ["positive", "negative", "neutral"]
        
        text_lower = text.lower()
        scores = {}
        
        # Simple keyword-based classification
        positive_words = {"good", "great", "excellent", "amazing", "wonderful", "fantastic"}
        negative_words = {"bad", "terrible", "awful", "horrible", "disappointing", "poor"}
        
        positive_count = sum(1 for word in positive_words if word in text_lower)
        negative_count = sum(1 for word in negative_words if word in text_lower)
        
        if "positive" in categories:
            scores["positive"] = min(1.0, positive_count * 0.3 + 0.1)
        if "negative" in categories:
            scores["negative"] = min(1.0, negative_count * 0.3 + 0.1)
        if "neutral" in categories:
            scores["neutral"] = 1.0 - max(scores.get("positive", 0), scores.get("negative", 0))
        
        return scores
    
    def predict_numeric(self, features: Dict[str, Any]) -> float:
        """Mock numeric prediction"""
        # Simple linear combination with some randomness
        feature_sum = 0
        for key, value in features.items():
            if isinstance(value, (int, float)):
                feature_sum += value
            elif isinstance(value, str):
                feature_sum += len(value) * 0.1
        
        # Add some deterministic "AI magic"
        hash_val = self.hash_text(str(features))
        seed = int(hash_val[:8], 16) % 100
        prediction = (feature_sum * 0.1 + seed * 0.01) % 1.0
        
        return prediction

# Global AI engine instance
_ai_engine = InternalAIEngine()

# ========================================
# BASE AI FIELD FUNCTIONALITY
# ========================================

class AIFieldMixin:
    """Base mixin for AI-enhanced fields with internal implementations"""
    
    def __init__(self, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize AI field with configuration"""
        self.ai_config = ai_config or {}
        self.ai_enabled = True
        self.ai_cache_enabled = True
        self.ai_cache_ttl = 3600  # 1 hour
        self._ai_cache = {}
        
        # Default AI configuration
        self._setup_ai_defaults()
    
    def _setup_ai_defaults(self):
        """Setup default AI configuration"""
        defaults = {
            "enable_caching": True,
            "cache_ttl": 3600,
            "fallback_value": None,
            "confidence_threshold": 0.5,
            "mock_mode": True,  # Use internal implementations by default
            "provider": "internal"
        }
        
        for key, value in defaults.items():
            if key not in self.ai_config:
                self.ai_config[key] = value
    
    def get_ai_service(self, service_type: str = "internal"):
        """Get AI service - always returns internal engine"""
        return _ai_engine
    
    def generate_cache_key(self, input_data: Any) -> str:
        """Generate cache key for AI operations"""
        if isinstance(input_data, (dict, list)):
            input_str = json.dumps(input_data, sort_keys=True, default=str)
        else:
            input_str = str(input_data)
        
        cache_data = {
            "input": input_str,
            "config": {k: v for k, v in self.ai_config.items() if k != "api_key"},
            "field_type": self.__class__.__name__
        }
        
        cache_str = json.dumps(cache_data, sort_keys=True)
        return hashlib.md5(cache_str.encode()).hexdigest()
    
    def is_cache_valid(self, cache_key: str) -> bool:
        """Check if cached result is still valid"""
        if not self.ai_cache_enabled or cache_key not in self._ai_cache:
            return False
        
        cached_time = self._ai_cache[cache_key].get("timestamp", 0)
        return (datetime.utcnow().timestamp() - cached_time) < self.ai_cache_ttl
    
    def get_cached_result(self, cache_key: str) -> Any:
        """Get cached AI result"""
        if self.is_cache_valid(cache_key):
            return self._ai_cache[cache_key]["result"]
        return None
    
    def set_cached_result(self, cache_key: str, result: Any):
        """Cache AI result"""
        if self.ai_cache_enabled:
            self._ai_cache[cache_key] = {
                "result": result,
                "timestamp": datetime.utcnow().timestamp()
            }

# ========================================
# AI FIELD UTILITIES AND HELPERS
# ========================================

class AIFieldManager:
    """Manager for AI field operations across models"""
    
    def __init__(self):
        self.registered_fields = {}
        self.compute_queue = []
        self.stats = {
            "computations": 0,
            "cache_hits": 0,
            "cache_misses": 0,
            "errors": 0
        }
    
    def register_field(self, model_class, field_name, field_instance):
        """Register an AI field for management"""
        model_key = f"{model_class.__module__}.{model_class.__name__}"
        if model_key not in self.registered_fields:
            self.registered_fields[model_key] = {}
        
        self.registered_fields[model_key][field_name] = field_instance
    
    async def compute_all_fields(self, instance):
        """Compute all AI fields for a model instance"""
        model_key = f"{instance.__class__.__module__}.{instance.__class__.__name__}"
        
        if model_key not in self.registered_fields:
            return
        
        for field_name, field_instance in self.registered_fields[model_key].items():
            try:
                await self._compute_field(instance, field_name, field_instance)
                self.stats["computations"] += 1
            except Exception as e:
                self.stats["errors"] += 1
    
    async def _compute_field(self, instance, field_name, field_instance):
        """Compute a specific AI field"""
        if isinstance(field_instance, AIComputedField):
            features = self._extract_features(instance, field_instance)
            value = await field_instance.compute_value(instance, features)
            setattr(instance, field_name, value)
        
        elif isinstance(field_instance, LLMField):
            template_data = self._extract_template_data(instance, field_instance)
            content = await field_instance.generate_content(instance, template_data)
            setattr(instance, field_name, content)
        
        elif isinstance(field_instance, VectorField):
            source_data = self._extract_source_data(instance, field_instance)
            embedding = await field_instance.generate_embedding(instance, source_data)
            setattr(instance, field_name, embedding)
        
        elif isinstance(field_instance, SmartTextField):
            text_value = getattr(instance, field_name, "")
            if text_value:
                analysis = await field_instance.analyze_text(text_value)
                # Store analysis in a related field or instance metadata
                setattr(instance, f"{field_name}_analysis", analysis)
    
    def _extract_features(self, instance, field_instance):
        """Extract features for AI computation"""
        features = {}
        feature_fields = field_instance.ai_config.get('features', [])
        source_field = field_instance.ai_config.get('source_field')
        
        if feature_fields:
            for feature_field in feature_fields:
                if hasattr(instance, feature_field):
                    features[feature_field] = getattr(instance, feature_field)
        elif source_field:
            if hasattr(instance, source_field):
                features[source_field] = getattr(instance, source_field)
        else:
            # Extract all non-AI fields as features
            for attr_name in dir(instance):
                if not attr_name.startswith('_') and hasattr(instance, attr_name):
                    value = getattr(instance, attr_name)
                    if isinstance(value, (str, int, float, bool)) and value is not None:
                        features[attr_name] = value
        
        return features
    
    def _extract_template_data(self, instance, field_instance):
        """Extract template data for LLM generation"""
        template_data = {}
        
        # Extract all suitable fields as template variables
        for attr_name in dir(instance):
            if not attr_name.startswith('_') and hasattr(instance, attr_name):
                value = getattr(instance, attr_name)
                if isinstance(value, (str, int, float, bool)) and value is not None:
                    template_data[attr_name] = value
        
        return template_data
    
    def _extract_source_data(self, instance, field_instance):
        """Extract source data for vector embedding"""
        source_field = field_instance.ai_config.get('source_field')
        
        if source_field and hasattr(instance, source_field):
            return getattr(instance, source_field)
        
        # Fallback to common text fields
        for fallback_field in ['description', 'content', 'text', 'name', 'title']:
            if hasattr(instance, fallback_field):
                value = getattr(instance, fallback_field)
                if value:
                    return value
        
        return str(instance)
    
    def get_stats(self):
        """Get AI field computation statistics"""
        return self.stats.copy()

# Global AI field manager
_ai_field_manager = AIFieldManager()

def get_ai_field_manager():
    """Get the global AI field manager"""
    return _ai_field_manager

# ========================================
# MODEL INTEGRATION HELPERS
# ========================================

class AIModelMixin:
    """Mixin for models that use AI fields"""
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._register_ai_fields()
    
    def _register_ai_fields(self):
        """Register AI fields with the field manager"""
        for attr_name in dir(self.__class__):
            attr = getattr(self.__class__, attr_name)
            if isinstance(attr, AIFieldMixin):
                _ai_field_manager.register_field(self.__class__, attr_name, attr)
    
    async def compute_ai_fields(self):
        """Compute all AI fields for this instance"""
        await _ai_field_manager.compute_all_fields(self)
    
    async def compute_field(self, field_name):
        """Compute a specific AI field"""
        field_instance = getattr(self.__class__, field_name, None)
        if field_instance and isinstance(field_instance, AIFieldMixin):
            await _ai_field_manager._compute_field(self, field_name, field_instance)

# ========================================
# VALIDATION AND TYPE CHECKING
# ========================================

def validate_ai_config(config: Dict[str, Any], field_type: str) -> Dict[str, Any]:
    """Validate and normalize AI configuration"""
    
    # Common validation
    validated = config.copy()
    
    # Ensure required fields exist
    if "provider" not in validated:
        validated["provider"] = "internal"
    
    if "enable_caching" not in validated:
        validated["enable_caching"] = True
    
    # Field-specific validation
    if field_type == "AIComputedField":
        if "prediction_type" not in validated:
            validated["prediction_type"] = "regression"
        
        if validated["prediction_type"] not in ["classification", "regression"]:
            validated["prediction_type"] = "regression"
    
    elif field_type == "LLMField":
        if "max_tokens" not in validated:
            validated["max_tokens"] = 500
        
        if "temperature" not in validated:
            validated["temperature"] = 0.7
        
        # Ensure temperature is in valid range
        validated["temperature"] = max(0.0, min(2.0, validated["temperature"]))
    
    elif field_type == "VectorField":
        if "similarity_metric" not in validated:
            validated["similarity_metric"] = "cosine"
        
        if validated["similarity_metric"] not in ["cosine", "euclidean", "dot_product"]:
            validated["similarity_metric"] = "cosine"
    
    return validated

def check_ai_field_compatibility(field_instance, model_class):
    """Check if AI field is compatible with model"""
    
    issues = []
    
    # Check if model has required source fields
    if hasattr(field_instance, 'ai_config'):
        source_field = field_instance.ai_config.get('source_field')
        if source_field and not hasattr(model_class, source_field):
            issues.append(f"Source field '{source_field}' not found in model {model_class.__name__}")
        
        features = field_instance.ai_config.get('features', [])
        for feature in features:
            if not hasattr(model_class, feature):
                issues.append(f"Feature field '{feature}' not found in model {model_class.__name__}")
    
    return issues

# ========================================
# CREATESONLINE AI FIELD TYPES
# ========================================

class CreatesonlineField:
    """Base CREATESONLINE field that works with or without SQLAlchemy"""
    
    def __init__(self, field_type=None, *args, **kwargs):
        self.field_type = field_type or "Text"
        self.args = args
        self.kwargs = kwargs
        self.name = None
        self.value = None
    
    def __set_name__(self, owner, name):
        """Called when field is assigned to a class"""
        self.name = name
    
    def __get__(self, instance, owner):
        """Get field value"""
        if instance is None:
            return self
        return getattr(instance, f"_{self.name}", self.value)
    
    def __set__(self, instance, value):
        """Set field value"""
        setattr(instance, f"_{self.name}", value)

class AIComputedField(CreatesonlineField, AIFieldMixin):
    """
    Field that automatically computes values using AI models
    
    Examples:
        # Predict customer lifetime value
        clv = AIComputedField("Float", ai_config={
            "model": "customer_lifetime_value",
            "features": ["age", "purchase_history", "engagement_score"],
            "prediction_type": "regression"
        })
        
        # Classify content category
        category = AIComputedField("String", ai_config={
            "model": "content_classifier", 
            "source_field": "description",
            "prediction_type": "classification"
        })
    """
    
    def __init__(self, field_type="Float", *args, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize AI computed field"""
        
        # Default AI configuration for computed fields
        default_config = {
            "model": "default_predictor",
            "prediction_type": "regression",
            "confidence_threshold": 0.5,
            "fallback_value": 0.0,
            "enable_async": True,
            "timeout": 30,
            "features": [],
            "source_field": None
        }
        
        ai_config = {**default_config, **(ai_config or {})}
        
        # Initialize base classes
        CreatesonlineField.__init__(self, field_type, *args, **kwargs)
        AIFieldMixin.__init__(self, ai_config=ai_config)
        
        # SQLAlchemy integration if available
        if SA_AVAILABLE and field_type in ["Float", "Integer", "String", "Text"]:
            try:
                if field_type == "Float":
                    self.sql_field = sa.Column(sa.Float, *args, **kwargs)
                elif field_type == "Integer":
                    self.sql_field = sa.Column(sa.Integer, *args, **kwargs)
                elif field_type == "String":
                    length = kwargs.get("length", 255)
                    self.sql_field = sa.Column(sa.String(length), *args, **kwargs)
                elif field_type == "Text":
                    self.sql_field = sa.Column(sa.Text, *args, **kwargs)
            except:
                self.sql_field = None
    
    async def compute_value(self, instance, features_data: Dict[str, Any]) -> Any:
        """Compute field value using AI model"""
        if not self.ai_enabled:
            return self.ai_config.get("fallback_value")
        
        # Generate cache key
        cache_key = self.generate_cache_key(features_data)
        
        # Check cache first
        cached_result = self.get_cached_result(cache_key)
        if cached_result is not None:
            return cached_result
        
        try:
            # Get AI service
            ai_service = self.get_ai_service()
            
            # Prepare features
            processed_features = await self._preprocess_features(features_data)
            
            # Make prediction based on type
            if self.ai_config["prediction_type"] == "classification":
                categories = self.ai_config.get("categories", ["positive", "negative", "neutral"])
                prediction_result = ai_service.classify_text(
                    str(processed_features.get("text", processed_features)), 
                    categories
                )
                # Return the category with highest score
                final_result = max(prediction_result.items(), key=lambda x: x[1])[0]
                confidence = max(prediction_result.values())
            else:  # regression
                prediction_value = ai_service.predict_numeric(processed_features)
                final_result = prediction_value
                confidence = 0.8 + random.random() * 0.15  # Mock confidence
            
            # Validate confidence
            if confidence < self.ai_config["confidence_threshold"]:
                return self.ai_config.get("fallback_value")
            
            # Cache result
            self.set_cached_result(cache_key, final_result)
            
            return final_result
            
        except Exception as e:
            return self.ai_config.get("fallback_value")
    
    async def _preprocess_features(self, features_data: Dict[str, Any]) -> Dict[str, Any]:
        """Preprocess features before AI computation"""
        preprocessing_func = self.ai_config.get("preprocessing")
        if preprocessing_func and callable(preprocessing_func):
            return await preprocessing_func(features_data)
        return features_data

class LLMField(CreatesonlineField, AIFieldMixin):
    """
    Field that generates content using Large Language Models
    
    Examples:
        # Generate product description
        description = LLMField(ai_config={
            "model": "gpt-4",
            "prompt_template": "Write a compelling product description for {name} with features: {features}",
            "max_tokens": 200,
            "temperature": 0.7
        })
        
        # Generate email subject line
        subject = LLMField(ai_config={
            "model": "gpt-3.5-turbo",
            "prompt_template": "Create an engaging email subject for: {content}",
            "max_tokens": 50
        })
    """
    
    def __init__(self, field_type="Text", *args, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize LLM field"""
        
        # Default LLM configuration
        default_config = {
            "model": "internal-llm",
            "max_tokens": 500,
            "temperature": 0.7,
            "provider": "internal",
            "regenerate_on_change": True,
            "enable_streaming": False,
            "timeout": 30,
            "prompt_template": "Generate content based on: {input}",
            "fallback_content": ""
        }
        
        ai_config = {**default_config, **(ai_config or {})}
        
        # Initialize base classes
        CreatesonlineField.__init__(self, field_type, *args, **kwargs)
        AIFieldMixin.__init__(self, ai_config=ai_config)
        
        # SQLAlchemy integration
        if SA_AVAILABLE:
            try:
                self.sql_field = sa.Column(sa.Text, *args, **kwargs)
            except:
                self.sql_field = None
    
    async def generate_content(self, instance, template_data: Dict[str, Any]) -> str:
        """Generate content using LLM"""
        if not self.ai_enabled:
            return self.ai_config.get("fallback_content", "")
        
        # Generate cache key
        cache_key = self.generate_cache_key(template_data)
        
        # Check cache first
        cached_result = self.get_cached_result(cache_key)
        if cached_result is not None:
            return cached_result
        
        try:
            # Get AI service
            ai_service = self.get_ai_service()
            
            # Build prompt from template
            prompt = await self._build_prompt(template_data)
            
            # Generate content using internal engine
            generated_content = ai_service.generate_text(
                prompt=prompt,
                max_tokens=self.ai_config["max_tokens"]
            )
            
            # Post-process content
            final_content = await self._postprocess_content(generated_content)
            
            # Cache result
            self.set_cached_result(cache_key, final_content)
            
            return final_content
            
        except Exception as e:
            return self.ai_config.get("fallback_content", "")
    
    async def _build_prompt(self, template_data: Dict[str, Any]) -> str:
        """Build prompt from template and data"""
        prompt_template = self.ai_config.get("prompt_template", "Generate content based on: {input}")
        
        try:
            return prompt_template.format(**template_data)
        except KeyError as e:
            # Fallback to simple input
            return f"Generate content based on: {template_data}"
    
    async def _postprocess_content(self, content: str) -> str:
        """Post-process generated content"""
        # Basic cleanup
        content = content.strip()
        
        # Apply custom post-processing if provided
        postprocessing_func = self.ai_config.get("postprocessing")
        if postprocessing_func and callable(postprocessing_func):
            content = await postprocessing_func(content)
        
        return content

class VectorField(CreatesonlineField, AIFieldMixin):
    """
    Field that stores vector embeddings for similarity search
    
    Examples:
        # Store document embeddings
        embedding = VectorField(dimensions=768, ai_config={
            "model": "text-embedding-ada-002",
            "source_field": "content",
            "normalize": True
        })
        
        # Store image embeddings  
        image_embedding = VectorField(dimensions=512, ai_config={
            "model": "clip-vit-base-patch32",
            "source_field": "image_path",
            "provider": "internal"
        })
    """
    
    def __init__(self, dimensions: int = 768, field_type="JSON", *args, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize vector field"""
        self.dimensions = dimensions
        
        # Default vector configuration
        default_config = {
            "model": "internal-embeddings",
            "provider": "internal",
            "normalize": True,
            "batch_size": 100,
            "similarity_metric": "cosine",
            "enable_indexing": True,
            "source_field": None
        }
        
        ai_config = {**default_config, **(ai_config or {})}
        
        # Initialize base classes
        CreatesonlineField.__init__(self, field_type, *args, **kwargs)
        AIFieldMixin.__init__(self, ai_config=ai_config)
        
        # SQLAlchemy integration
        if SA_AVAILABLE:
            try:
                self.sql_field = sa.Column(sa.JSON, *args, **kwargs)
            except:
                self.sql_field = None
    
    def process_value(self, value):
        """Process value before storing"""
        if value is None:
            return None
        
        if isinstance(value, list):
            # Validate dimensions
            if len(value) != self.dimensions:
                raise ValueError(f"Vector must have {self.dimensions} dimensions, got {len(value)}")
            return json.dumps(value)
        
        if hasattr(value, 'tolist'):  # numpy array
            if len(value) != self.dimensions:
                raise ValueError(f"Vector must have {self.dimensions} dimensions, got {len(value)}")
            return json.dumps(value.tolist())
        
        if isinstance(value, str):
            try:
                parsed = json.loads(value)
                if isinstance(parsed, list) and len(parsed) == self.dimensions:
                    return value
            except:
                pass
        
        raise ValueError(f"Unsupported vector type: {type(value)}")
    
    def parse_value(self, value):
        """Parse value when loading"""
        if value is None:
            return None
        
        if isinstance(value, str):
            return json.loads(value)
        
        return value
    
    async def generate_embedding(self, instance, source_data: Any) -> List[float]:
        """Generate vector embedding from source data"""
        if not self.ai_enabled:
            return [0.0] * self.dimensions
        
        # Generate cache key
        cache_key = self.generate_cache_key(source_data)
        
        # Check cache first
        cached_result = self.get_cached_result(cache_key)
        if cached_result is not None:
            return cached_result
        
        try:
            # Preprocess source data
            processed_data = await self._preprocess_data(source_data)
            
            # Get embedding service (always internal)
            ai_service = self.get_ai_service()
            
            # Generate embedding
            embedding = ai_service.generate_embedding(
                text=processed_data,
                dimensions=self.dimensions
            )
            
            # Normalize if requested
            if self.ai_config["normalize"]:
                embedding = self._normalize_vector(embedding)
            
            # Validate dimensions
            if len(embedding) != self.dimensions:
                # Pad or truncate to match dimensions
                if len(embedding) < self.dimensions:
                    embedding.extend([0.0] * (self.dimensions - len(embedding)))
                else:
                    embedding = embedding[:self.dimensions]
            
            # Cache result
            self.set_cached_result(cache_key, embedding)
            
            return embedding
            
        except Exception as e:
            return [0.0] * self.dimensions
    
    async def _preprocess_data(self, data: Any) -> str:
        """Preprocess data before embedding generation"""
        if isinstance(data, str):
            return data
        elif isinstance(data, (dict, list)):
            return json.dumps(data, default=str)
        else:
            return str(data)
    
    def _normalize_vector(self, vector: List[float]) -> List[float]:
        """Normalize vector to unit length"""
        try:
            norm = math.sqrt(sum(x * x for x in vector))
            if norm == 0:
                return vector
            return [x / norm for x in vector]
        except:
            return vector
    
    def similarity(self, vector1: List[float], vector2: List[float]) -> float:
        """Calculate similarity between two vectors"""
        ai_service = self.get_ai_service()
        return ai_service.similarity(vector1, vector2)

class SmartTextField(CreatesonlineField, AIFieldMixin):
    """
    Enhanced text field with AI analysis capabilities
    
    Examples:
        # Analyze sentiment and extract keywords
        content = SmartTextField(ai_config={
            "analyze_sentiment": True,
            "extract_keywords": True,
            "detect_language": True,
            "content_moderation": True
        })
    """
    
    def __init__(self, field_type="Text", *args, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize smart text field"""
        
        default_config = {
            "analyze_sentiment": False,
            "extract_keywords": False,
            "detect_language": False,
            "content_moderation": False,
            "summarize": False,
            "auto_enhance": False
        }
        
        ai_config = {**default_config, **(ai_config or {})}
        
        # Initialize base classes
        CreatesonlineField.__init__(self, field_type, *args, **kwargs)
        AIFieldMixin.__init__(self, ai_config=ai_config)
        
        # SQLAlchemy integration
        if SA_AVAILABLE:
            try:
                self.sql_field = sa.Column(sa.Text, *args, **kwargs)
            except:
                self.sql_field = None
    
    async def analyze_text(self, text: str) -> Dict[str, Any]:
        """Perform AI analysis on text content"""
        if not self.ai_enabled or not text:
            return {}
        
        cache_key = self.generate_cache_key(text)
        cached_result = self.get_cached_result(cache_key)
        if cached_result is not None:
            return cached_result
        
        analysis_results = {}
        
        try:
            ai_service = self.get_ai_service()
            
            # Sentiment analysis
            if self.ai_config["analyze_sentiment"]:
                sentiment_scores = ai_service.classify_text(text, ["positive", "negative", "neutral"])
                analysis_results["sentiment"] = {
                    "label": max(sentiment_scores.items(), key=lambda x: x[1])[0],
                    "scores": sentiment_scores
                }
            
            # Keyword extraction (simple implementation)
            if self.ai_config["extract_keywords"]:
                words = re.findall(r'\b\w+\b', text.lower())
                # Filter common words and get unique words by frequency
                common_words = {"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by"}
                keywords = [word for word in set(words) if len(word) > 3 and word not in common_words]
                analysis_results["keywords"] = keywords[:10]  # Top 10
            
            # Language detection (simple heuristic)
            if self.ai_config["detect_language"]:
                if any(word in text.lower() for word in ['hola', 'gracias', 'por favor', 'español']):
                    language = 'es'
                elif any(word in text.lower() for word in ['bonjour', 'merci', 'français']):
                    language = 'fr'
                elif any(word in text.lower() for word in ['guten tag', 'danke', 'deutsch']):
                    language = 'de'
                else:
                    language = 'en'
                analysis_results["language"] = language
            
            # Content moderation (basic)
            if self.ai_config["content_moderation"]:
                flagged_words = {"spam", "scam", "offensive", "inappropriate"}
                is_flagged = any(word in text.lower() for word in flagged_words)
                analysis_results["moderation"] = {
                    "flagged": is_flagged,
                    "confidence": 0.8 if is_flagged else 0.1
                }
            
            # Summarization
            if self.ai_config["summarize"] and len(text) > 100:
                summary = ai_service.generate_text(f"Summarize: {text[:200]}")
                analysis_results["summary"] = summary
            
            # Cache results
            self.set_cached_result(cache_key, analysis_results)
            
            return analysis_results
            
        except Exception as e:
            return {}

class PredictionField(AIComputedField):
    """
    Field that provides real-time predictions based on current data
    
    Examples:
        # Predict next purchase date
        next_purchase = PredictionField("Date", ai_config={
            "model": "purchase_predictor",
            "features": ["last_purchase", "purchase_frequency", "user_engagement"],
            "prediction_horizon": "30_days"
        })
    """
    
    def __init__(self, field_type="Float", *args, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize prediction field"""
        
        default_config = {
            "model": "default_predictor",
            "update_frequency": "daily",
            "confidence_threshold": 0.7,
            "prediction_horizon": "7_days",
            "prediction_type": "regression"
        }
        
        ai_config = {**default_config, **(ai_config or {})}
        
        super().__init__(field_type, *args, ai_config=ai_config, **kwargs)

class EmbeddingField(VectorField):
    """
    Specialized vector field optimized for embedding storage and retrieval
    
    Examples:
        # Store product embeddings for recommendation
        product_embedding = EmbeddingField(
            dimensions=768,
            ai_config={
                "model": "sentence-transformers/all-MiniLM-L6-v2",
                "source_field": "description",
                "enable_similarity_search": True
            }
        )
    """
    
    def __init__(self, dimensions: int = 768, *args, ai_config: Optional[Dict[str, Any]] = None, **kwargs):
        """Initialize embedding field with optimized defaults"""
        
        default_config = {
            "model": "internal-embeddings",
            "provider": "internal",
            "normalize": True,
            "enable_similarity_search": True,
            "similarity_threshold": 0.8,
            "index_type": "flat"
        }
        
        ai_config = {**default_config, **(ai_config or {})}
        
        super().__init__(dimensions=dimensions, ai_config=ai_config, *args, **kwargs)
    
    async def find_similar(self, query_vector: List[float], candidates: List[Dict[str, Any]], top_k: int = 10) -> List[Dict[str, Any]]:
        """Find similar embeddings using vector search"""
        
        if not candidates:
            return []
        
        # Calculate similarities
        similarities = []
        for candidate in candidates:
            if "embedding" in candidate and candidate["embedding"]:
                similarity = self.similarity(query_vector, candidate["embedding"])
                if similarity >= self.ai_config["similarity_threshold"]:
                    similarities.append({
                        **candidate,
                        "similarity": similarity
                    })
        
        # Sort by similarity and return top_k
        similarities.sort(key=lambda x: x["similarity"], reverse=True)
# ========================================
# EXPORT AND MODULE INTERFACE
# ========================================

# Main field types for export
__all__ = [
    # Core AI field types
    "AIComputedField",
    "LLMField", 
    "VectorField",
    "SmartTextField",
    "PredictionField",
    "EmbeddingField",
    
    # Base classes and mixins
    "AIFieldMixin",
    "CreatesonlineField",
    "AIModelMixin",
    
    # Utilities
    "AIFieldManager",
    "get_ai_field_manager",
    "validate_ai_config",
    "check_ai_field_compatibility",
    
    # Internal engine (for testing)
    "InternalAIEngine",
]

# ========================================
# CONVENIENCE FUNCTIONS
# ========================================

def create_smart_model(model_name: str, fields: Dict[str, Any]) -> type:
    """Create a model class with AI fields dynamically"""
    
    class_attrs = {
        "__tablename__": model_name.lower(),
        "__module__": __name__,
    }
    
    # Add standard fields
    if SA_AVAILABLE:
        class_attrs["id"] = sa.Column(sa.Integer, primary_key=True)
        class_attrs["created_at"] = sa.Column(sa.DateTime, default=datetime.utcnow)
        class_attrs["updated_at"] = sa.Column(sa.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
    
    # Add AI fields
    for field_name, field_config in fields.items():
        field_type = field_config.get("type", "AIComputedField")
        ai_config = field_config.get("ai_config", {})
        
        if field_type == "AIComputedField":
            data_type = field_config.get("data_type", "Float")
            class_attrs[field_name] = AIComputedField(data_type, ai_config=ai_config)
        elif field_type == "LLMField":
            class_attrs[field_name] = LLMField(ai_config=ai_config)
        elif field_type == "VectorField":
            dimensions = field_config.get("dimensions", 768)
            class_attrs[field_name] = VectorField(dimensions=dimensions, ai_config=ai_config)
        elif field_type == "SmartTextField":
            class_attrs[field_name] = SmartTextField(ai_config=ai_config)
    
    # Create the class
    if SA_AVAILABLE:
        from sqlalchemy.ext.declarative import declarative_base
        Base = declarative_base()
        return type(model_name, (Base, AIModelMixin), class_attrs)
    else:
        return type(model_name, (AIModelMixin,), class_attrs)

def get_field_info() -> Dict[str, Any]:
    """Get information about available AI field types"""
    return {
        "framework": "CREATESONLINE",
        "version": "0.1.0",
        "ai_fields": {
            "AIComputedField": {
                "description": "Automatically computes values using ML models",
                "use_cases": ["predictions", "scoring", "classification"],
                "data_types": ["Float", "Integer", "String", "Text"],
                "ai_config_options": [
                    "model", "prediction_type", "features", "source_field",
                    "confidence_threshold", "fallback_value"
                ]
            },
            "LLMField": {
                "description": "Generates content using Large Language Models",
                "use_cases": ["content generation", "summarization", "translation"],
                "data_types": ["Text"],
                "ai_config_options": [
                    "model", "prompt_template", "max_tokens", "temperature",
                    "provider", "fallback_content"
                ]
            },
            "VectorField": {
                "description": "Stores vector embeddings for similarity search",
                "use_cases": ["semantic search", "recommendations", "clustering"],
                "data_types": ["JSON"],
                "ai_config_options": [
                    "model", "dimensions", "normalize", "similarity_metric",
                    "source_field", "provider"
                ]
            },
            "SmartTextField": {
                "description": "Enhanced text field with AI analysis",
                "use_cases": ["sentiment analysis", "keyword extraction", "language detection"],
                "data_types": ["Text"],
                "ai_config_options": [
                    "analyze_sentiment", "extract_keywords", "detect_language",
                    "content_moderation", "summarize"
                ]
            },
            "PredictionField": {
                "description": "Real-time predictions based on current data",
                "use_cases": ["forecasting", "trend analysis", "risk assessment"],
                "data_types": ["Float", "Integer", "Date"],
                "ai_config_options": [
                    "model", "prediction_horizon", "update_frequency",
                    "features", "confidence_threshold"
                ]
            },
            "EmbeddingField": {
                "description": "Optimized vector field for embeddings",
                "use_cases": ["document similarity", "product recommendations", "search"],
                "data_types": ["JSON"],
                "ai_config_options": [
                    "model", "dimensions", "similarity_threshold",
                    "enable_similarity_search", "index_type"
                ]
            }
        },
        "providers": {
            "internal": {
                "description": "Built-in CREATESONLINE AI engine",
                "features": ["Mock predictions", "Hash-based embeddings", "Rule-based text analysis"],
                "dependencies": "None - Pure Python"
            },
            "openai": {
                "description": "OpenAI API integration",
                "features": ["GPT models", "Text embeddings", "Moderation"],
                "dependencies": "openai package + API key"
            },
            "anthropic": {
                "description": "Anthropic Claude integration", 
                "features": ["Claude models", "Text generation"],
                "dependencies": "anthropic package + API key"
            }
        },
        "configuration": {
            "global_settings": [
                "default_provider", "cache_ttl", "enable_async",
                "timeout", "batch_size"
            ],
            "caching": {
                "enabled": True,
                "ttl": 3600,
                "size_limit": "100MB"
            }
        }
    }

# ========================================
# DEMONSTRATION AND EXAMPLES
# ========================================

def create_example_model():
    """Create an example model with AI fields for demonstration"""
    
    # Example AI field configurations
    ai_fields = {
        "lead_score": {
            "type": "AIComputedField",
            "data_type": "Float",
            "ai_config": {
                "model": "lead_classifier",
                "prediction_type": "regression",
                "features": ["company_size", "industry", "engagement_score"],
                "confidence_threshold": 0.7,
                "fallback_value": 0.5
            }
        },
        "summary": {
            "type": "LLMField",
            "ai_config": {
                "model": "gpt-4",
                "prompt_template": "Summarize this lead: {company_name} - {description}",
                "max_tokens": 150,
                "temperature": 0.3
            }
        },
        "content_embedding": {
            "type": "VectorField",
            "dimensions": 768,
            "ai_config": {
                "model": "text-embedding-ada-002",
                "source_field": "description",
                "normalize": True
            }
        },
        "sentiment": {
            "type": "SmartTextField",
            "ai_config": {
                "analyze_sentiment": True,
                "extract_keywords": True,
                "detect_language": True
            }
        }
    }
    
    return create_smart_model("ExampleLead", ai_fields)