"""
PowerPoint DSL Parser Module

This module provides parsing functionality for PowerPoint editing DSL format.
It converts pipe-delimited markdown-style DSL into structured data for PowerPoint editing.
"""

import re
import json
import logging
import ast
from typing import Dict, Optional, Any, List
from datetime import datetime

# Configure logger
logger = logging.getLogger(__name__)

def _fix_python_literal_quotes(text: str) -> str:
    """Fixes unescaped apostrophes in Python literal strings.
    
    This function specifically handles the case where text content within Python
    literals contains unescaped apostrophes that break ast.literal_eval parsing.
    
    Args:
        text: Python literal string that may contain unescaped apostrophes
        
    Returns:
        Fixed string with properly escaped apostrophes
    """
    import re
    
    def fix_text_value(match):
        """Fix apostrophes within a 'text': '...' value"""
        prefix = match.group(1)  # 'text': '
        content = match.group(2)  # the actual text content
        suffix = match.group(3)   # '
        
        # Escape apostrophes that aren't already escaped
        # Temporarily protect existing escapes
        content = content.replace("\\'", "___TEMP_ESCAPE___")
        # Escape unescaped apostrophes
        content = content.replace("'", "\\'")
        # Restore protected escapes
        content = content.replace("___TEMP_ESCAPE___", "\\'")
        
        return f"{prefix}{content}{suffix}"
    
    # Fix 'text' values specifically - pattern matches 'text': 'content with apostrophe's'
    # This regex handles the most common case where text content has unescaped apostrophes
    fixed_text = re.sub(r"('text':\s*')([^']*(?:\\'[^']*)*)(')(?=\s*[,}])", fix_text_value, text)
    
    # If that didn't fix it, try a more general approach
    if fixed_text.count("'") % 2 != 0:
        # Count quotes to see if we still have an odd number
        # Apply a broader fix for any long quoted strings (likely text content)
        def fix_long_strings(match):
            content = match.group(1)
            if len(content) > 15:  # Only fix long strings (likely text content)
                # Same escape logic as above
                content = content.replace("\\'", "___TEMP_ESCAPE___")
                content = content.replace("'", "\\'")
                content = content.replace("___TEMP_ESCAPE___", "\\'")
            return f"'{content}'"
        
        # Match any single-quoted string
        fixed_text = re.sub(r"'([^']*(?:\\'[^']*)*)'(?=\s*[,}\]])", fix_long_strings, fixed_text)
    
    return fixed_text

def _sanitize_text_content(text: str) -> str:
    """
    Sanitize text content by removing HTML-like tags and converting to plain text.
    
    Args:
        text: Text content that may contain HTML-like formatting
        
    Returns:
        Plain text with HTML tags stripped and proper line breaks
    """
    if not text or not isinstance(text, str):
        return text
    
    # Remove HTML tags but preserve the text content
    # Handle common HTML tags that might be generated by LLM
    import re
    
    # Replace <p> tags with line breaks (but not the first one)
    text = re.sub(r'<p[^>]*>', '\n', text)
    text = re.sub(r'</p>', '', text)
    
    # Remove all other HTML tags
    text = re.sub(r'<[^>]+>', '', text)
    
    # Clean up multiple consecutive newlines
    text = re.sub(r'\n\s*\n', '\n', text)
    
    # Strip leading/trailing whitespace and newlines
    text = text.strip()
    
    # If text starts with a newline, remove it (from first <p> replacement)
    if text.startswith('\n'):
        text = text[1:]
    
    return text

def _parse_shape_properties(entry: str):
    """
    Parse shape properties from a comma-separated string, handling quoted values and brackets properly.
    
    Args:
        entry: String like "shape_name, prop1=value1, prop2="quoted value", table_data="[['A', 'B'], ['C', 'D']]"
        
    Returns:
        A tuple containing the list of property strings and the extractions dictionary.
    """
    # First, extract bracketed content and JSON objects to avoid comma splitting issues
    import re
    
    # Dictionary to store extracted content
    extractions = {}
    bracket_counter = 0
    json_counter = 0
    
    def extract_brackets(match):
        """Extract bracketed content and replace with placeholder"""
        nonlocal bracket_counter
        content = match.group(1)
        placeholder = f"BRACKET_PLACEHOLDER_{bracket_counter}"
        extractions[placeholder] = content
        bracket_counter += 1
        return f"[{placeholder}]"
    
    def extract_json_objects(match):
        """Extract JSON object content and replace with placeholder"""
        nonlocal json_counter
        content = match.group(1)
        placeholder = f"JSON_PLACEHOLDER_{json_counter}"
        extractions[placeholder] = content
        json_counter += 1
        return f"{{{placeholder}}}"
    
    # Extract nested brackets for table properties
    # This regex handles nested brackets like [[...], [...]] 
    entry_with_placeholders = re.sub(r'\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]', extract_brackets, entry)
    
    # Extract JSON objects to avoid comma splitting issues
    # This regex handles nested JSON objects like {"key": ["val1", "val2"], "key2": [{"nested": "value"}]}
    entry_with_placeholders = re.sub(r'\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}', extract_json_objects, entry_with_placeholders)
    
    # Now parse the entry with placeholders
    parts = []
    current_part = ""
    in_quotes = False
    quote_char = None
    
    i = 0
    while i < len(entry_with_placeholders):
        char = entry_with_placeholders[i]
        
        if char in ['"', "'"] and not in_quotes:
            # Start of quoted string
            in_quotes = True
            quote_char = char
            current_part += char
        elif char == quote_char and in_quotes:
            # End of quoted string
            in_quotes = False
            quote_char = None
            current_part += char
        elif char == ',' and not in_quotes:
            # Comma outside quotes - end of property
            if current_part.strip():
                parts.append(current_part.strip())
            current_part = ""
        else:
            current_part += char
        
        i += 1
    
    # Add the last part
    if current_part.strip():
        parts.append(current_part.strip())
    
    return parts, extractions

def _parse_property_value(value: str, extractions: Dict[str, str] = None) -> Any:
    """
    Parse a property value string into the appropriate Python type.
    
    Args:
        value: String value to parse
        extractions: Dictionary of placeholder -> content mappings for restoration
        
    Returns:
        Parsed value (str, int, float, bool, dict, list)
    """
    if not value:
        return value
    
    # Restore placeholders if extractions are provided (with recursive restoration)
    if extractions:
        # Recursive placeholder restoration to handle nested structures
        def restore_placeholders_recursive(text, extractions, max_iterations=10):
            """Recursively restore placeholders to handle nested structures."""
            for iteration in range(max_iterations):
                changed = False
                for placeholder, content in extractions.items():
                    if placeholder in text:
                        text = text.replace(placeholder, content)
                        changed = True
                if not changed:
                    break  # No more placeholders to replace
            return text
        
        value = restore_placeholders_recursive(value, extractions)
    
    # Handle backtick-wrapped values (remove backticks)
    if value.startswith('`') and value.endswith('`'):
        value = value[1:-1]
    
    # Handle quoted strings that contain JSON-like structures
    if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
        # Remove quotes
        text_value = value[1:-1]
        
        # Check if it's a JSON object or array (starts with { or [)
        if (text_value.startswith('{') and text_value.endswith('}')) or (text_value.startswith('[') and text_value.endswith(']')):
            try:
                import json
                # Try to parse as JSON - first attempt direct parsing
                return json.loads(text_value)
            except (json.JSONDecodeError, ValueError):
                # If JSON parsing fails, try unescaping quotes and parsing again
                try:
                    import json
                    # Handle escaped quotes in JSON strings
                    unescaped_text = text_value.replace('\\"', '"')
                    return json.loads(unescaped_text)
                except (json.JSONDecodeError, ValueError):
                    # If JSON parsing still fails, try ast.literal_eval for Python literal structures
                    try:
                        import ast
                        # Apply robust quote fixing before ast.literal_eval to handle unescaped apostrophes
                        fixed_text = _fix_python_literal_quotes(text_value)
                        return ast.literal_eval(fixed_text)
                    except (ValueError, SyntaxError) as e:
                        logger.warning(f"Could not parse Python literal '{text_value[:100]}...': {e}")
                        # If both fail, return as string
                        pass
        
        # Handle escape sequences for regular text
        text_value = text_value.replace('\\n', '\n')
        text_value = text_value.replace('\\t', '\t')
        text_value = text_value.replace('\\r', '\r')
        text_value = text_value.replace('\\\\', '\\')
        text_value = text_value.replace('\\"', '"')
        text_value = text_value.replace("\\'", "'")
        return text_value
    
    # Handle boolean values
    if value.lower() == 'true':
        return True
    elif value.lower() == 'false':
        return False
    
    # Handle numeric values
    try:
        # Try integer first
        if '.' not in value:
            return int(value)
        else:
            return float(value)
    except ValueError:
        pass
    
    # Handle hex colors (keep as string)
    if value.startswith('#') and len(value) == 7:
        return value
    
    # Handle unquoted JSON-like structures (fallback)
    if (value.startswith('{') and value.endswith('}')) or (value.startswith('[') and value.endswith(']')):
        try:
            import json
            return json.loads(value)
        except (json.JSONDecodeError, ValueError):
            try:
                import ast
                return ast.literal_eval(value)
            except (ValueError, SyntaxError):
                pass
    
    # Default to string
    return value

def parse_slide_operations_dsl(dsl_input: str) -> Optional[Dict[str, Any]]:
    """
    Parse slide management DSL operations into structured commands.
    
    Expected input format:
        operation_type: param=value, param=value | next_operation: param=value
    
    Examples:
        "add_slide: position=1, layout=\"Title Slide\""
        "delete_slide: slide_number=3"
        "move_slide: from=2, to=5"
        "duplicate_slide: source=1, position=end"
        "add_slide: position=1, layout=\"Title Slide\" | delete_slide: slide_number=5"
    
    Args:
        dsl_input: String in DSL format with slide operations
        
    Returns:
        Dictionary with operations list and metadata if valid, None otherwise.
    """
    logger.info("Starting to parse slide operations DSL")
    
    try:
        if not dsl_input or not isinstance(dsl_input, str):
            logger.error("Invalid DSL input: empty or not a string")
            return None

        result = {
            "operations": [],
            "total_operations": 0,
            "operation_types": set(),
            "parsed_at": datetime.now().isoformat()
        }

        # Split by pipe (|) to separate different operations
        operation_sections = [s.strip() for s in dsl_input.split('|') if s.strip()]
        
        for section in operation_sections:
            # Parse each operation: "operation_type: param=value, param=value"
            if ':' not in section:
                logger.warning(f"Invalid operation format (missing colon): {section}")
                continue
            
            parts = section.split(':', 1)
            operation_type = parts[0].strip()
            
            if len(parts) < 2:
                logger.warning(f"No parameters found for operation: {operation_type}")
                continue
                
            params_str = parts[1].strip()
            
            # Parse parameters
            operation_params = {}
            if params_str:
                # Split parameters by comma, handling quoted values
                param_parts, extractions = _parse_shape_properties(f"dummy, {params_str}")
                
                # Skip the dummy first part
                for param in param_parts[1:]:
                    if '=' in param:
                        key_value = param.split('=', 1)
                        if len(key_value) == 2:
                            key = key_value[0].strip()
                            value = key_value[1].strip()
                            
                            # Parse the value
                            parsed_value = _parse_property_value(value, extractions)
                            operation_params[key] = parsed_value
            
            # Validate operation type and parameters
            valid_operation = False
            
            if operation_type == "add_slide":
                if "position" in operation_params:
                    valid_operation = True
                    # Convert "end" to special marker
                    if operation_params["position"] == "end":
                        operation_params["position"] = -1
                else:
                    logger.warning(f"add_slide operation missing required 'position' parameter")
            
            elif operation_type == "delete_slide":
                if "slide_number" in operation_params:
                    valid_operation = True
                else:
                    logger.warning(f"delete_slide operation missing required 'slide_number' parameter")
            
            elif operation_type == "move_slide":
                if "from" in operation_params and "to" in operation_params:
                    valid_operation = True
                else:
                    logger.warning(f"move_slide operation missing required 'from' and/or 'to' parameters")
            
            elif operation_type == "duplicate_slide":
                if "source" in operation_params:
                    valid_operation = True
                    # Convert "end" to special marker for position
                    if operation_params.get("position") == "end":
                        operation_params["position"] = -1
                    # Default position to end if not specified
                    if "position" not in operation_params:
                        operation_params["position"] = -1
                else:
                    logger.warning(f"duplicate_slide operation missing required 'source' parameter")
            
            else:
                logger.warning(f"Unknown operation type: {operation_type}")
            
            if valid_operation:
                operation_data = {
                    "type": operation_type,
                    "parameters": operation_params,
                    "raw_section": section
                }
                result["operations"].append(operation_data)
                result["operation_types"].add(operation_type)
                logger.debug(f"Parsed valid operation: {operation_type} with params {operation_params}")
            else:
                logger.warning(f"Skipping invalid operation: {section}")

        result["total_operations"] = len(result["operations"])
        result["operation_types"] = list(result["operation_types"])  # Convert set to list for JSON serialization
        
        if result["total_operations"] == 0:
            logger.error("No valid operations found in DSL input")
            return None
        
        logger.info(f"Successfully parsed {result['total_operations']} slide operations")
        return result

    except Exception as e:
        logger.error(f"Error in parse_slide_operations_dsl: {str(e)}", exc_info=True)
        return None

def validate_slide_dsl_format(dsl_input: str) -> Dict[str, Any]:
    """
    Validate DSL format and return validation results.
    
    Args:
        dsl_input: DSL string to validate
        
    Returns:
        Dictionary with validation results and helpful error messages
    """
    try:
        result = {
            "valid": False,
            "errors": [],
            "warnings": [],
            "suggestions": []
        }
        
        if not dsl_input or not isinstance(dsl_input, str):
            result["errors"].append("DSL input is empty or not a string")
            return result
        
        # Check basic format requirements
        if ':' not in dsl_input:
            result["errors"].append("DSL format must contain ':' to separate operation type from parameters")
            result["suggestions"].append("Use format: operation_type: param=value, param=value")
            return result
        
        # Try to parse and collect specific errors
        parsed_result = parse_slide_operations_dsl(dsl_input)
        
        if parsed_result and parsed_result.get("total_operations", 0) > 0:
            result["valid"] = True
            result["suggestions"].append(f"Successfully parsed {parsed_result['total_operations']} operations")
        else:
            result["errors"].append("No valid operations could be parsed from DSL input")
            result["suggestions"].extend([
                "Supported operations: add_slide, delete_slide, move_slide, duplicate_slide",
                "Example: add_slide: position=1, layout=\"Title Slide\"",
                "Example: delete_slide: slide_number=3",
                "Example: move_slide: from=2, to=5",
                "Example: duplicate_slide: source=1, position=end"
            ])
        
        return result
        
    except Exception as e:
        return {
            "valid": False,
            "errors": [f"Validation error: {str(e)}"],
            "warnings": [],
            "suggestions": ["Check DSL syntax and try again"]
        }

def get_dsl_format_examples() -> Dict[str, Any]:
    """
    Get comprehensive DSL format examples and documentation.
    
    Returns:
        Dictionary with format documentation and examples
    """
    return {
        "format": "operation_type: param=value, param=value | next_operation: param=value",
        "operations": {
            "add_slide": {
                "description": "Add a new slide at the specified position",
                "required_params": ["position"],
                "optional_params": ["layout"],
                "examples": [
                    'add_slide: position=1, layout="Title Slide"',
                    'add_slide: position=end, layout="Blank"',
                    'add_slide: position=3'
                ]
            },
            "delete_slide": {
                "description": "Delete a slide by its number",
                "required_params": ["slide_number"],
                "optional_params": [],
                "examples": [
                    'delete_slide: slide_number=3',
                    'delete_slide: slide_number=5'
                ]
            },
            "move_slide": {
                "description": "Move a slide from one position to another",
                "required_params": ["from", "to"],
                "optional_params": [],
                "examples": [
                    'move_slide: from=2, to=5',
                    'move_slide: from=1, to=3'
                ]
            },
            "duplicate_slide": {
                "description": "Duplicate a slide and place it at the specified position",
                "required_params": ["source"],
                "optional_params": ["position"],
                "examples": [
                    'duplicate_slide: source=1, position=end',
                    'duplicate_slide: source=3, position=5',
                    'duplicate_slide: source=2'  # defaults to end
                ]
            }
        },
        "multiple_operations": {
            "description": "Use pipe (|) to separate multiple operations",
            "examples": [
                'add_slide: position=1, layout="Title Slide" | delete_slide: slide_number=5',
                'move_slide: from=2, to=3 | duplicate_slide: source=1, position=end',
                'add_slide: position=end, layout="Blank" | add_slide: position=end, layout="Title Slide"'
            ]
        },
        "special_values": {
            "position": "Use 'end' to add at the end of the presentation",
            "layout": "Quote layout names with spaces, e.g. \"Title Slide\"",
            "slide_number": "Use 1-based slide numbering"
        }
    }
