"""
OpenAI-compatible chat completions endpoint for SimaCode API.

Provides /api/v1/chat/completions endpoint that matches OpenAI's API specification,
enabling SimaCode's ReAct capabilities to be used as a drop-in replacement for
OpenAI chat completions.

Key features:
- Automatic tool execution (no confirmation required)
- Full ReAct engine integration
- Chunks aggregation to OpenAI format
- Non-streaming and streaming support
"""

import asyncio
import json
import logging
import time
import uuid
from typing import Any, AsyncGenerator, Dict, List, Optional

try:
    from fastapi import APIRouter, Depends, HTTPException, Request
    from fastapi.responses import StreamingResponse
    FASTAPI_AVAILABLE = True
except ImportError:
    FASTAPI_AVAILABLE = False
    APIRouter = None
    Request = None

from ..dependencies import get_simacode_service
from ..models import OpenAIChatCompletionRequest, ErrorResponse
from ...core.service import SimaCodeService, ReActRequest

logger = logging.getLogger(__name__)

if FASTAPI_AVAILABLE:
    router = APIRouter()
else:
    router = None


# ==================== Helper Functions ====================

def extract_last_user_message(messages: List[Dict[str, str]]) -> str:
    """
    Extract the last user message from OpenAI messages array.

    Args:
        messages: List of message dicts with 'role' and 'content'

    Returns:
        The content of the last user message

    Raises:
        HTTPException: If no user message found
    """
    for msg in reversed(messages):
        if msg.get("role") == "user":
            return msg.get("content", "")

    raise HTTPException(
        status_code=400,
        detail="No user message found in messages array"
    )


def generate_completion_id() -> str:
    """Generate OpenAI-style completion ID."""
    return f"chatcmpl-{uuid.uuid4().hex[:24]}"


def estimate_tokens(text: str) -> int:
    """
    Rough token estimation (4 chars ≈ 1 token).

    Args:
        text: Input text

    Returns:
        Estimated token count
    """
    return max(1, len(text) // 4)


def extract_context_from_headers(request: "Request") -> Dict[str, Any]:
    """
    Extract custom context from HTTP headers.

    Supports headers in format:
    - X-Context-Key: value
    - X-Simacode-Context-Key: value

    Args:
        request: FastAPI Request object

    Returns:
        Dictionary of extracted context
    """
    context = {}

    if not request:
        return context

    # Extract headers starting with X-Context- or X-Simacode-Context-
    for header_name, header_value in request.headers.items():
        header_lower = header_name.lower()

        if header_lower.startswith("x-context-"):
            # Remove x-context- prefix and use the rest as key
            key = header_name[10:]  # len("x-context-") = 10
            context[key] = header_value
            logger.debug(f"Extracted context from header X-Context-{key}: {header_value}")

        elif header_lower.startswith("x-simacode-context-"):
            # Remove x-simacode-context- prefix
            key = header_name[19:]  # len("x-simacode-context-") = 19
            context[key] = header_value
            logger.debug(f"Extracted context from header X-Simacode-Context-{key}: {header_value}")

    if context:
        logger.info(f"Extracted {len(context)} context items from HTTP headers: {list(context.keys())}")

    return context


async def aggregate_chunks_to_content(chunks: List[Dict[str, Any]]) -> str:
    """
    Aggregate SimaCode chunks into a single content string.

    Strategy (as confirmed):
    - Preserve all informational chunks (content, tool_output, task_summary)
    - Filter out internal state chunks (status, planning, confirmation_request)
    - Merge into coherent response

    Args:
        chunks: List of chunk dictionaries from ReAct engine

    Returns:
        Aggregated content string
    """
    content_parts = []

    for chunk in chunks:
        chunk_type = chunk.get("type", "")
        content = chunk.get("content", "").strip()

        if not content:
            continue

        # Include content-bearing chunks
        if chunk_type in ["content", "conversational_response"]:
            content_parts.append(content)

        # Include tool outputs (user-visible results)
        elif chunk_type == "tool_output":
            # Format tool output with context
            tool_name = chunk.get("metadata", {}).get("tool_name", "tool")
            content_parts.append(f"[Tool: {tool_name}]\n{content}")

        # Include task summary at the end
        elif chunk_type == "task_summary":
            content_parts.append(f"\n{content}")

        # Include error information
        elif chunk_type == "error":
            content_parts.append(f"Error: {content}")

        # Skip internal chunks
        elif chunk_type in ["status", "planning", "confirmation_request",
                           "confirmation_received", "task_init", "completion"]:
            continue

    # Join all parts with newlines
    aggregated = "\n\n".join(content_parts)

    # Fallback if no content
    if not aggregated.strip():
        aggregated = "Task completed successfully."

    return aggregated


def extract_tool_calls(chunks: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
    """
    Extract tool calls from chunks and convert to OpenAI format.

    Note: SimaCode auto-executes tools, so we return tool execution results
    rather than tool call requests. This differs from OpenAI's tool_calls
    which typically require user confirmation.

    Args:
        chunks: List of chunk dictionaries

    Returns:
        List of tool call dicts in OpenAI format, or None if no tools used
    """
    tool_calls = []

    for chunk in chunks:
        if chunk.get("type") == "tool_execution":
            metadata = chunk.get("metadata", {})
            tool_name = metadata.get("tool_name", "unknown")
            tool_input = metadata.get("tool_input", {})

            # Convert to OpenAI tool_call format
            tool_calls.append({
                "id": f"call_{uuid.uuid4().hex[:24]}",
                "type": "function",
                "function": {
                    "name": tool_name,
                    "arguments": str(tool_input)  # OpenAI expects JSON string
                }
            })

    return tool_calls if tool_calls else None


# ==================== Main Endpoint ====================

@router.post("/completions")
async def chat_completions(
    body: OpenAIChatCompletionRequest,
    http_request: Request,
    service: SimaCodeService = Depends(get_simacode_service)
):
    """
    OpenAI-compatible chat completions endpoint.

    Supports both streaming and non-streaming modes with full ReAct engine integration.
    Tools are automatically executed without user confirmation.

    Args:
        body: OpenAI-compatible chat completion request
        http_request: FastAPI Request object for accessing headers
        service: SimaCode service instance

    Returns:
        ChatCompletion (non-streaming) or StreamingResponse (streaming)
    """
    try:
        # Extract user message
        user_message = extract_last_user_message(body.messages)
        logger.info(f"OpenAI-compat: Processing message: {user_message[:100]}...")

        # Extract context from HTTP headers
        header_context = extract_context_from_headers(http_request)

        # Create ReAct request with forced skip_confirmation
        # Merge header context into the context dict
        react_context = {
            "openai_mode": True,
            "full_messages": body.messages,
            "model": body.model,
            "temperature": body.temperature,
            "max_tokens": body.max_tokens
        }
        # Merge header context (header context takes precedence if keys overlap)
        react_context.update(header_context)

        react_request = ReActRequest(
            task=user_message,
            skip_confirmation=True,  # Critical: Auto-execute tools
            context=react_context
        )

        # Route to streaming or non-streaming
        if body.stream:
            return StreamingResponse(
                generate_streaming_response(service, react_request, body),
                media_type="text/event-stream"
            )
        else:
            return await generate_non_streaming_response(
                service,
                react_request,
                body,
                timeout=body.timeout or 300.0
            )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error in OpenAI-compat endpoint: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


# ==================== Non-Streaming Implementation ====================

async def generate_non_streaming_response(
    service: SimaCodeService,
    react_request: ReActRequest,
    original_request: OpenAIChatCompletionRequest,
    timeout: float = 300.0
) -> Dict[str, Any]:
    """
    Generate non-streaming OpenAI-compatible response.

    Process flow:
    1. Execute ReAct engine with stream=True to collect chunks
    2. Aggregate all chunks into single content
    3. Extract tool calls if any
    4. Format as OpenAI ChatCompletion

    Args:
        service: SimaCode service
        react_request: Internal ReAct request
        original_request: Original OpenAI request (for metadata)
        timeout: Maximum execution time in seconds (default: 300s)

    Returns:
        ChatCompletion dict in OpenAI format

    Raises:
        HTTPException: On timeout or execution error
    """
    completion_id = generate_completion_id()
    created_timestamp = int(time.time())

    try:
        # Collect all chunks with timeout
        chunks = await asyncio.wait_for(
            _collect_all_chunks(service, react_request),
            timeout=timeout
        )

        logger.debug(f"Collected {len(chunks)} chunks from ReAct engine")

        # Aggregate chunks to content
        content = await aggregate_chunks_to_content(chunks)

        # Extract tool calls
        tool_calls = extract_tool_calls(chunks)

        # Determine finish reason
        finish_reason = _determine_finish_reason(chunks, content)

        # Build ChatCompletion response
        response = {
            "id": completion_id,
            "object": "chat.completion",
            "created": created_timestamp,
            "model": original_request.model,
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": content,
                        **({"tool_calls": tool_calls} if tool_calls else {})
                    },
                    "finish_reason": finish_reason,
                    "logprobs": None
                }
            ],
            "usage": {
                "prompt_tokens": estimate_tokens(react_request.task),
                "completion_tokens": estimate_tokens(content),
                "total_tokens": estimate_tokens(react_request.task) + estimate_tokens(content)
            },
            "system_fingerprint": f"simacode-react-{service.config.ai.model}"
        }

        logger.info(f"OpenAI-compat: Generated response with {len(content)} chars, "
                   f"finish_reason={finish_reason}")

        return response

    except asyncio.TimeoutError:
        logger.error(f"ReAct execution timeout after {timeout}s")
        raise HTTPException(
            status_code=504,
            detail=f"Request timeout after {timeout}s. Try breaking down the task or increasing timeout."
        )
    except Exception as e:
        logger.error(f"Error generating non-streaming response: {e}", exc_info=True)

        # Return error as chat completion content
        return {
            "id": completion_id,
            "object": "chat.completion",
            "created": created_timestamp,
            "model": original_request.model,
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": f"Error executing task: {str(e)}"
                    },
                    "finish_reason": "stop",
                    "logprobs": None
                }
            ],
            "usage": {
                "prompt_tokens": estimate_tokens(react_request.task),
                "completion_tokens": estimate_tokens(str(e)),
                "total_tokens": estimate_tokens(react_request.task) + estimate_tokens(str(e))
            }
        }


async def _collect_all_chunks(
    service: SimaCodeService,
    react_request: ReActRequest
) -> List[Dict[str, Any]]:
    """
    Collect all chunks from ReAct engine streaming output.

    Args:
        service: SimaCode service
        react_request: ReAct request

    Returns:
        List of all chunk dictionaries
    """
    chunks = []

    # Process ReAct with streaming to capture all chunks
    result_gen = await service.process_react(react_request, stream=True)
    async for chunk in result_gen:
        chunks.append(chunk)
        logger.debug(f"Chunk collected: type={chunk.get('type')}, "
                    f"content_len={len(chunk.get('content', ''))}")

    return chunks


def _determine_finish_reason(chunks: List[Dict[str, Any]], content: str) -> str:
    """
    Determine OpenAI finish_reason from chunks.

    Args:
        chunks: List of chunks
        content: Aggregated content

    Returns:
        OpenAI finish_reason: "stop", "length", "tool_calls", or "content_filter"
    """
    # Check for errors
    for chunk in chunks:
        if chunk.get("type") == "error":
            return "stop"  # Errors result in natural stop

    # Check for tool calls (though SimaCode auto-executes)
    has_tool_calls = any(c.get("type") == "tool_execution" for c in chunks)
    if has_tool_calls:
        # Note: We return "stop" not "tool_calls" because tools are already executed
        # OpenAI's "tool_calls" implies waiting for user to provide tool results
        return "stop"

    # Default: Natural completion
    return "stop"


# ==================== Streaming Implementation ====================

async def generate_streaming_response(
    service: SimaCodeService,
    react_request: ReActRequest,
    original_request: OpenAIChatCompletionRequest
) -> AsyncGenerator[str, None]:
    """
    Generate streaming OpenAI-compatible response (SSE format).

    Process flow:
    1. Get ReAct chunks stream
    2. Convert each chunk to ChatCompletionChunk (delta format)
    3. Output as SSE: data: {json}\n\n
    4. Send final chunk with finish_reason
    5. Send data: [DONE]

    Args:
        service: SimaCode service
        react_request: Internal ReAct request
        original_request: Original OpenAI request

    Yields:
        SSE formatted strings
    """
    completion_id = generate_completion_id()
    created_timestamp = int(time.time())
    is_first_chunk = True
    has_content = False

    try:
        # Get streaming chunks from ReAct engine
        result_gen = await service.process_react(react_request, stream=True)

        async for chunk in result_gen:
            chunk_type = chunk.get("type", "")
            content = chunk.get("content", "").strip()

            # DEBUG: Log all chunks to diagnose empty response issue
            logger.debug(f"[STREAM DEBUG] Received chunk: type={chunk_type}, content_length={len(content)}, "
                       f"content_preview={content[:100] if content else 'EMPTY'}")

            # Skip internal/non-content chunks
            if chunk_type in ["status", "planning", "confirmation_request",
                             "confirmation_received", "task_init", "task_accepted",
                             "reasoning", "planning_timeout_reset"]:
                logger.debug(f"[STREAM DEBUG] Skipping internal chunk type: {chunk_type}")
                continue

            # Extract delta content from meaningful chunks
            delta_content = _extract_delta_content(chunk, chunk_type)
            logger.debug(f"[STREAM DEBUG] Extracted delta_content: length={len(delta_content) if delta_content else 0}, "
                       f"preview={delta_content[:100] if delta_content else 'NONE'}")

            if not delta_content:
                continue

            # Build ChatCompletionChunk
            chunk_data = {
                "id": completion_id,
                "object": "chat.completion.chunk",
                "created": created_timestamp,
                "model": original_request.model,
                "choices": [{
                    "index": 0,
                    "delta": {},
                    "finish_reason": None,
                    "logprobs": None
                }]
            }

            # First chunk includes role
            if is_first_chunk:
                chunk_data["choices"][0]["delta"]["role"] = "assistant"
                is_first_chunk = False

            # Add content to delta
            chunk_data["choices"][0]["delta"]["content"] = delta_content
            has_content = True

            # Output as SSE
            yield f"data: {json.dumps(chunk_data)}\n\n"

        # Send final chunk with finish_reason
        final_chunk = {
            "id": completion_id,
            "object": "chat.completion.chunk",
            "created": created_timestamp,
            "model": original_request.model,
            "choices": [{
                "index": 0,
                "delta": {},
                "finish_reason": "stop",
                "logprobs": None
            }]
        }
        yield f"data: {json.dumps(final_chunk)}\n\n"

        # Send [DONE] marker
        yield "data: [DONE]\n\n"

        logger.info(f"Streaming completed for {completion_id}")

    except Exception as e:
        logger.error(f"Error in streaming response: {e}", exc_info=True)

        # Send error chunk
        error_chunk = {
            "id": completion_id,
            "object": "chat.completion.chunk",
            "created": created_timestamp,
            "model": original_request.model,
            "choices": [{
                "index": 0,
                "delta": {"content": f"\n\nError: {str(e)}"},
                "finish_reason": "stop",
                "logprobs": None
            }]
        }
        yield f"data: {json.dumps(error_chunk)}\n\n"
        yield "data: [DONE]\n\n"


def _extract_delta_content(chunk: Dict[str, Any], chunk_type: str) -> Optional[str]:
    """
    Extract delta content from a ReAct chunk.

    Strategy:
    - content, conversational_response: Use as-is
    - tool_output: Format with tool name
    - task_summary: Include as summary
    - error: Include as error message
    - Others: Skip

    Args:
        chunk: ReAct chunk dictionary
        chunk_type: Type of the chunk

    Returns:
        Delta content string or None to skip
    """
    content = chunk.get("content", "").strip()

    if not content:
        return None

    # Direct content chunks
    if chunk_type in ["content", "conversational_response"]:
        return content

    # Tool output with formatting
    elif chunk_type == "tool_output":
        tool_name = chunk.get("metadata", {}).get("tool_name", "tool")
        return f"\n\n[Tool: {tool_name}]\n{content}"

    # Task summary
    elif chunk_type == "task_summary":
        return f"\n\n{content}"

    # Error messages
    elif chunk_type == "error":
        return f"\n\nError: {content}"

    # Completion marker (no content)
    elif chunk_type == "completion":
        return None

    # Unknown types - skip
    else:
        logger.debug(f"Skipping unknown chunk type for streaming: {chunk_type}")
        return None
