from openai import OpenAI
from anthropic import Anthropic
from google import genai
from google.genai import types
from groq import Groq
import os
import json
import re
from PIL import Image
import time
import random
from anthropic import APIError
from agents import Agent, handoff, ModelSettings
from agents.extensions.handoff_prompt import prompt_with_handoff_instructions
from agents.extensions.models.litellm_model import LitellmModel
from agents.run import Runner          

import pikepdf
import tempfile
import base64
import PyPDF2
import io
from concurrent.futures import ThreadPoolExecutor, as_completed

from .config import Config

from dotenv import load_dotenv
load_dotenv()

import logging
logger = logging.getLogger(__name__)

class AIAgent:
    """
    Universal AI Agent supporting multiple LLM providers with native SDKs
    
    Supported providers: anthropic, openai, gemini, groq, deepseek
        
    Environment variables (based on provider):
    - ANTHROPIC_API_KEY
    - OPENAI_API_KEY
    - GEMINI_API_KEY
    - GROQ_API_KEY
    - DEEPSEEK_API_KEY
    - MAX_TOKENS (optional, default: 5000)
    - TEMPERATURE (optional, default: 0.1)
    
    Usage:
        # Single provider
        agent = AIAgent(provider="groq")
    """

    def __init__(self, provider: str = "anthropic", verbose: bool = False):
        """
        Initialize AIAgent with a single provider.
        
        Args:
            provider: LLM provider name (anthropic, openai, gemini, groq, deepseek)
            verbose: If True, log initialization details
        """
        self.verbose = verbose
        self.providers_cfg = Config().llm_config

        if provider not in self.providers_cfg:
            available = ', '.join(self.providers_cfg.keys())
            raise ValueError(
                f"Unknown provider '{provider}'. Available providers: {available}"
            )

        provider_cfg = self.providers_cfg[provider]

        try:
            self.llm_client = self._init_client(provider_cfg)
        except Exception as e:
            raise RuntimeError(f"Failed to initialize {provider} client: {str(e)}") from e
        
        self.provider = provider
        self.smart_model = provider_cfg["smart_model"]
        self.fast_model = provider_cfg["fast_model"]
        self.reasoning_model = provider_cfg["reasoning_model"]

        self.smart_config = provider_cfg
        self.fast_config = provider_cfg
        self.reasoning_config = provider_cfg

        self.smart_llm_client = self.llm_client
        self.fast_llm_client = self.llm_client
        self.reasoning_llm_client = self.llm_client

        self.max_tokens = int(os.environ.get("MAX_TOKENS", 5000))
        self.temperature = float(os.environ.get("TEMPERATURE", 0.1))

        if self.verbose:
            logger.info(f"Initialized AIAgent with provider={provider}, smart={self.smart_model}, "
                       f"fast={self.fast_model}, reasoning={self.reasoning_model}")

    # ============================================================================================== #
    # ======================================== HELPERS ============================================= #
    # ============================================================================================== #

    def _init_client(self, config):
        """
        Initialize the appropriate native SDK client.
        
        Args:
            config: Provider configuration dictionary
            
        Returns:
            Initialized client object
            
        Raises:
            ValueError: If API key is missing or SDK type is unsupported
        """
        sdk = config["sdk"]
        api_key_env = config["api_key_env"]
        api_key = os.environ.get(api_key_env)
        
        if not api_key:
            raise ValueError(
                f"API key not found. Set {api_key_env} environment variable"
            )
        
        try:
            if sdk == "anthropic":
                return Anthropic(api_key=api_key)
            
            elif sdk == "openai":
                kwargs = {"api_key": api_key}
                if "base_url" in config:
                    kwargs["base_url"] = config["base_url"]
                return OpenAI(**kwargs)
            
            elif sdk == "gemini":
                return genai.Client(api_key=api_key)
            
            elif sdk == "groq":
                return Groq(api_key=api_key)
            
            else:
                raise ValueError(f"Unsupported SDK type: {sdk}")
                
        except Exception as e:
            raise RuntimeError(f"Failed to create {sdk} client: {str(e)}") from e

    def _extract_response_text(self, response, config):
        """
        Extract text from response using config path.
        
        Args:
            response: LLM response object
            config: Provider configuration dictionary
            
        Returns:
            Extracted text string
            
        Raises:
            ValueError: If response path is unknown
            AttributeError: If response structure doesn't match expected path
        """
        path = config["response_path"]
        
        try:
            if path == "content[0].text":
                return response.content[0].text
            elif path == "choices[0].message.content":
                return response.choices[0].message.content
            elif path == "text":
                return response.text
            else:
                raise ValueError(f"Unknown response path: {path}")
        except (AttributeError, IndexError) as e:
            raise AttributeError(
                f"Failed to extract text using path '{path}': {str(e)}"
            ) from e

    def _extract_stream_text(self, chunk, config):
        """
        Extract text from stream chunk using config path.
        
        Args:
            chunk: Stream chunk object
            config: Provider configuration dictionary
            
        Returns:
            Extracted text string or None if no text in chunk
        """
        path = config["stream_path"]
        
        try:
            if path == "delta.text":
                return getattr(chunk.delta, 'text', None) if hasattr(chunk, 'delta') else None
            elif path == "choices[0].delta.content":
                if hasattr(chunk, 'choices') and chunk.choices:
                    return chunk.choices[0].delta.content if hasattr(chunk.choices[0].delta, 'content') else None
            elif path == "text":
                return chunk.text if hasattr(chunk, 'text') else None
        except (AttributeError, IndexError):
            pass
        
        return None
    
    def _select_scope(self, scope: str):
        """
        Returns (client, model, config) according to the requested scope.
        
        Args:
            scope: One of "smart", "fast", or "reasoning"
            
        Returns:
            Tuple of (client, model_name, config_dict)
        """
        scope = scope.lower()
        if scope == "smart":
            return self.smart_llm_client, self.smart_model, self.smart_config
        if scope == "reasoning":
            return self.reasoning_llm_client, self.reasoning_model, self.reasoning_config
        return self.fast_llm_client, self.fast_model, self.fast_config
    
    def normalize_stream(self, stream):
        """
        Normalize streaming responses to yield only text content.
        
        Args:
            stream: The streaming response object from the LLM
        
        Yields:
            str: Text chunks from the stream
        """
        config = self.providers_cfg[self.provider]
        
        for chunk in stream:
            text = self._extract_stream_text(chunk, config)
            if text:
                yield text
    
    # ============================================================================================== #
    # ======================================== SMART =============================================== #
    # ============================================================================================== #

    def smart_completion(self,
                         user_prompt,
                         system_prompt=None,
                         max_tokens=None,
                         temperature=None,
                         stream=False,
                         json_output=False):
        """
        Run completion using the smart model.
        
        Args:
            user_prompt: User's input text
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            
        Returns:
            Generated text or stream object
        """
        if not max_tokens:
            max_tokens = self.max_tokens
        if not temperature:
            temperature = self.temperature

        return self._completion(
            client=self.smart_llm_client,
            model=self.smart_model,
            config=self.smart_config,
            user_prompt=user_prompt,
            system_prompt=system_prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=stream,
            json_output=json_output
        )
    
    # ============================================================================================== #
    # ========================================= FAST =============================================== #
    # ============================================================================================== #

    def fast_completion(self,
                        user_prompt,
                        system_prompt=None,
                        max_tokens=None,
                        temperature=None,
                        stream=False,
                        json_output=False):
        """
        Run completion using the fast model.
        
        Args:
            user_prompt: User's input text
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            
        Returns:
            Generated text or stream object
        """
        if not max_tokens:
            max_tokens = self.max_tokens
        if not temperature:
            temperature = self.temperature

        return self._completion(
            client=self.fast_llm_client,
            model=self.fast_model,
            config=self.fast_config,
            user_prompt=user_prompt,
            system_prompt=system_prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=stream,
            json_output=json_output
        )
    
    # ============================================================================================== #
    # ======================================= REASONING ============================================ #
    # ============================================================================================== #

    def reasoning_completion(self,
                             user_prompt,
                             system_prompt=None,
                             max_tokens=None,
                             temperature=None,
                             stream=False,
                             json_output=False):
        """
        Run completion using the reasoning model.
        
        Args:
            user_prompt: User's input text
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            
        Returns:
            Generated text or stream object
        """
        if not max_tokens:
            max_tokens = self.max_tokens
        if not temperature:
            temperature = self.temperature

        return self._completion(
            client=self.reasoning_llm_client,
            model=self.reasoning_model,
            config=self.reasoning_config,
            user_prompt=user_prompt,
            system_prompt=system_prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=stream,
            json_output=json_output
        )
    
    # ============================================================================================== #
    # ==================================== UNIVERSAL METHOD ======================================== #
    # ============================================================================================== #

    def _completion(self,
                    client,
                    model,
                    config,
                    user_prompt,
                    system_prompt=None,
                    max_tokens=None,
                    temperature=None,
                    stream=False,
                    json_output=False):
        """
        Universal completion method that works with all providers.
        
        Args:
            client: Initialized LLM client
            model: Model name to use
            config: Provider configuration dictionary
            user_prompt: User's input text
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            
        Returns:
            Generated text, parsed JSON dict, or stream object
            
        Raises:
            RuntimeError: If completion fails
            ValueError: If SDK is unsupported
        """
        try:
            sdk = config["sdk"]
            
            # Anthropic
            if sdk == "anthropic":
                kwargs = {
                    "model": model,
                    "messages": [{"role": "user", "content": user_prompt}],
                    "max_tokens": max_tokens,
                    "temperature": temperature,
                    "stream": stream
                }
                if system_prompt:
                    kwargs["system"] = system_prompt

                response = client.messages.create(**kwargs)
                
                if stream:
                    return response
                else:
                    text = self._extract_response_text(response, config)
                    return self.clean_json_output(text) if json_output else text
            
            # OpenAI or OpenAI-compatible
            elif sdk in ["openai", "groq"]:
                max_tokens_arg = "max_completion_tokens" if sdk == "openai" else "max_tokens"
                messages = []
                if system_prompt:
                    messages.append({"role": "system", "content": system_prompt})
                messages.append({"role": "user", "content": user_prompt})

                kwargs = {
                    "model": model,
                    "messages": messages,
                    max_tokens_arg: max_tokens,
                    "temperature": 1,
                    "stream": stream
                }
                
                if json_output:
                    kwargs["response_format"] = {"type": "json_object"}

                response = client.chat.completions.create(**kwargs)
                
                if stream:
                    return response
                else:
                    text = self._extract_response_text(response, config)
                    return self.clean_json_output(text) if json_output else text
            
            # Gemini
            elif sdk == "gemini":
                gen_config = types.GenerateContentConfig(
                    max_output_tokens=max_tokens,
                    temperature=temperature
                )
                if system_prompt:
                    gen_config.system_instruction = system_prompt
                if json_output:
                    gen_config.response_mime_type = 'application/json'

                if stream:
                    response = client.models.generate_content_stream(
                        model=model,
                        contents=user_prompt,
                        config=gen_config
                    )
                    return response
                else:
                    response = client.models.generate_content(
                        model=model,
                        contents=user_prompt,
                        config=gen_config
                    )
                    text = self._extract_response_text(response, config)
                    return self.clean_json_output(text) if json_output else text
            
            else:
                raise ValueError(f"Unsupported SDK: {sdk}")

        except Exception as e:
            raise RuntimeError(
                f"Completion failed for {sdk} provider with model {model}: {str(e)}"
            ) from e
        
    # ============================================================================================== #
    # ========================================== IMAGES ============================================ #
    # ============================================================================================== #

    def image_completion(self,
                         user_prompt,
                         image,
                         file_path=True,
                         smart_model=False,
                         system_prompt=None,
                         max_tokens=None,
                         temperature=None,
                         stream=False,
                         json_output=False):
        """
        Run completion with image input.
        
        Args:
            user_prompt: User's text prompt
            image: Image file path or base64 encoded data
            file_path: If True, image is a file path; if False, it's base64 data
            smart_model: If True, use smart model; otherwise use fast model
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            
        Returns:
            Generated text, parsed JSON dict, or stream object
            
        Raises:
            NotImplementedError: If provider doesn't support vision
            RuntimeError: If image processing fails
        """
        if not max_tokens:
            max_tokens = self.max_tokens
        if not temperature:
            temperature = self.temperature

        if smart_model:
            client = self.smart_llm_client
            config = self.smart_config
            model = config.get("vision_model") or config["smart_model"]
        else:
            client = self.fast_llm_client
            config = self.fast_config
            model = config.get("vision_model") or config["fast_model"]

        if not config["supports_vision"]:
            raise NotImplementedError(
                f"Vision not supported for {config['sdk']} provider"
            )

        try:
            sdk = config["sdk"]
            
            # Anthropic vision
            if sdk == "anthropic":
                if file_path:
                    try:
                        with open(image, 'rb') as img_file:
                            image_data = base64.b64encode(img_file.read()).decode('utf-8')
                    except FileNotFoundError:
                        raise FileNotFoundError(f"Image file not found: {image}")
                    except Exception as e:
                        raise IOError(f"Failed to read image file: {str(e)}") from e
                else:
                    image_data = image
                
                media_type = "image/jpeg"
                if file_path:
                    if image.lower().endswith('.png'):
                        media_type = "image/png"
                    elif image.lower().endswith('.webp'):
                        media_type = "image/webp"
                    elif image.lower().endswith('.gif'):
                        media_type = "image/gif"

                messages = [{
                    "role": "user",
                    "content": [
                        {"type": "text", "text": user_prompt},
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": image_data
                            }
                        }
                    ]
                }]

                kwargs = {
                    "model": model,
                    "messages": messages,
                    "max_tokens": max_tokens,
                    "temperature": temperature,
                    "stream": stream
                }
                if system_prompt:
                    kwargs["system"] = system_prompt

                response = client.messages.create(**kwargs)
                
                if stream:
                    return response
                text = self._extract_response_text(response, config)
                return self.clean_json_output(text) if json_output else text
            
            # OpenAI/Groq vision
            elif sdk in ["openai", "groq"]:
                if file_path:
                    try:
                        with open(image, 'rb') as img_file:
                            image_data = base64.b64encode(img_file.read()).decode('utf-8')
                    except FileNotFoundError:
                        raise FileNotFoundError(f"Image file not found: {image}")
                    except Exception as e:
                        raise IOError(f"Failed to read image file: {str(e)}") from e
                else:
                    image_data = image

                messages = []
                if system_prompt:
                    messages.append({"role": "system", "content": system_prompt})

                messages.append({
                    "role": "user",
                    "content": [
                        {"type": "text", "text": user_prompt},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
                        }
                    ]
                })

                kwargs = {
                    "model": model,
                    "messages": messages,
                    "max_tokens": max_tokens,
                    "temperature": temperature,
                    "stream": stream
                }
                if json_output:
                    kwargs["response_format"] = {"type": "json_object"}

                response = client.chat.completions.create(**kwargs)
                
                if stream:
                    return response
                text = self._extract_response_text(response, config)
                return self.clean_json_output(text) if json_output else text
            
            # Gemini vision
            elif sdk == "gemini":
                try:
                    image_obj = Image.open(image) if file_path else image
                except FileNotFoundError:
                    raise FileNotFoundError(f"Image file not found: {image}")
                except Exception as e:
                    raise IOError(f"Failed to open image: {str(e)}") from e
                
                gen_config = types.GenerateContentConfig(
                    max_output_tokens=max_tokens,
                    temperature=temperature
                )
                if system_prompt:
                    gen_config.system_instruction = system_prompt
                if json_output:
                    gen_config.response_mime_type = 'application/json'

                contents = [user_prompt, image_obj]

                if stream:
                    response = client.models.generate_content_stream(
                        model=model,
                        contents=contents,
                        config=gen_config
                    )
                    return response
                else:
                    response = client.models.generate_content(
                        model=model,
                        contents=contents,
                        config=gen_config
                    )
                    text = self._extract_response_text(response, config)
                    return self.clean_json_output(text) if json_output else text
            
            else:
                raise NotImplementedError(f"Vision not supported for SDK: {sdk}")

        except Exception as e:
            if isinstance(e, (NotImplementedError, FileNotFoundError, IOError)):
                raise
            raise RuntimeError(
                f"Image completion failed for {sdk} provider: {str(e)}"
            ) from e
        
    # ============================================================================================== #
    # ========================================= DOCUMENTS ========================================== #
    # ============================================================================================== #

    def document_completion(
        self,
        user_prompt,
        document,
        file_path: bool = True,
        smart_model: bool = False,
        system_prompt: str | None = None,
        max_tokens: int | None = None,
        temperature: float | None = None,
        stream: bool = False,
        json_output: bool = False,
        split_into_pages: bool = False,
    ):
        """
        Run completion with document input (typically PDF).
        
        Args:
            user_prompt: User's text prompt
            document: Document file path or bytes
            file_path: If True, document is a file path
            smart_model: If True, use smart model; otherwise use fast model
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            split_into_pages: If True, process PDF pages independently and return dict
            
        Returns:
            Generated text, parsed JSON dict, stream object, or dict of page results
            
        Raises:
            ValueError: If document input is invalid
            RuntimeError: If document processing fails
        """
        if not max_tokens:
            max_tokens = self.max_tokens
        if not temperature:
            temperature = self.temperature

        if smart_model:
            client = self.smart_llm_client
            config = self.smart_config
            model = config["smart_model"]
        else:
            client = self.fast_llm_client
            config = self.fast_config
            model = config["fast_model"]

        # Handle page splitting
        if split_into_pages and file_path and document.lower().endswith(".pdf"):
            try:
                with pikepdf.open(document, allow_overwriting_input=True) as pdf:
                    total_pages = len(pdf.pages)
                    temp_files = []
                    page_info = []

                    for page_num, page in enumerate(pdf.pages, start=1):
                        tmp_file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
                        single = pikepdf.Pdf.new()
                        single.pages.append(page)
                        single.save(tmp_file.name)
                        tmp_file.close()

                        temp_files.append(tmp_file.name)
                        page_info.append({
                            'page_num': page_num,
                            'temp_path': tmp_file.name,
                            'prompt': f"Page {page_num} of {total_pages}:\n\n{user_prompt}"
                        })

                    def process_page(page_data):
                        max_retries = 3
                        base_delay = 1

                        for attempt in range(max_retries + 1):
                            try:
                                result = self._process_single_document(
                                    client=client,
                                    model=model,
                                    config=config,
                                    user_prompt=page_data['prompt'],
                                    document=page_data['temp_path'],
                                    file_path=True,
                                    system_prompt=system_prompt,
                                    max_tokens=max_tokens,
                                    temperature=temperature,
                                    stream=stream,
                                    json_output=json_output,
                                )
                                return page_data['page_num'], result

                            except Exception as e:
                                if attempt < max_retries:
                                    delay = base_delay * (2**attempt) + random.uniform(0, 1)
                                    logger.debug(
                                        f"Page {page_data['page_num']} processing failed, "
                                        f"retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
                                    )
                                    time.sleep(delay)
                                    continue
                                else:
                                    return page_data['page_num'], f"Error: {str(e)}"

                        return page_data['page_num'], "Unexpected error"

                    try:
                        with ThreadPoolExecutor(max_workers=min(total_pages, 10)) as executor:
                            future_to_page = {
                                executor.submit(process_page, page_data): page_data['page_num']
                                for page_data in page_info
                            }

                            page_results = {}
                            for future in as_completed(future_to_page):
                                page_num, result = future.result()
                                page_results[page_num] = result

                                if stream:
                                    return result

                        all_results = []
                        for page_num in sorted(page_results.keys()):
                            result = page_results[page_num]
                            if json_output and isinstance(result, str):
                                try:
                                    result = self.clean_json_output(result)
                                except:
                                    pass
                            all_results.append({"page": page_num, "result": result})

                    finally:
                        for temp_path in temp_files:
                            try:
                                os.unlink(temp_path)
                            except OSError:
                                pass

                    combined = {}
                    for item in all_results:
                        combined[item["page"]] = item["result"]
                    return combined

            except FileNotFoundError:
                raise FileNotFoundError(f"PDF file not found: {document}")
            except Exception as e:
                raise RuntimeError(f"Failed to split PDF into pages: {str(e)}") from e

        # Process as single document
        return self._process_single_document(
            client=client,
            model=model,
            config=config,
            user_prompt=user_prompt,
            document=document,
            file_path=file_path,
            system_prompt=system_prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=stream,
            json_output=json_output
        )

    def _process_single_document(
        self,
        client,
        model,
        config,
        user_prompt,
        document,
        *,
        file_path: bool = True,
        system_prompt: str | None = None,
        max_tokens: int | None = None,
        temperature: float | None = None,
        stream: bool = False,
        json_output: bool = False,
    ):
        """
        Upload the PDF when the provider supports a Files API; otherwise
        fall back to local text extraction.
        
        Args:
            client: Initialized LLM client
            model: Model name to use
            config: Provider configuration dictionary
            user_prompt: User's text prompt
            document: Document file path or bytes
            file_path: If True, document is a file path
            system_prompt: Optional system instructions
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            stream: If True, return streaming response
            json_output: If True, request JSON formatted output
            
        Returns:
            Generated text, parsed JSON dict, or stream object
            
        Raises:
            ValueError: If document input is invalid
            RuntimeError: If document processing fails
        """
        sdk = config["sdk"]
        supports_documents = bool(config.get("supports_documents", False))

        def _safe_name(name: str, default: str = "attachment.pdf") -> str:
            name = re.sub(r"[^\w.\-]", "_", name or default)
            if not name.lower().endswith(".pdf"):
                name += ".pdf"
            return name[:128] or default

        def _read_pdf_bytes():
            try:
                if file_path and isinstance(document, str):
                    with open(document, "rb") as f:
                        return f.read(), os.path.basename(document)
                if hasattr(document, "read"):
                    data = document.read()
                    return data, getattr(document, "name", "attachment.pdf")
                if isinstance(document, (bytes, bytearray)):
                    return bytes(document), "attachment.pdf"
                raise ValueError("Document must be a file path, file object, or bytes")
            except FileNotFoundError:
                raise FileNotFoundError(f"Document file not found: {document}")
            except Exception as e:
                raise IOError(f"Failed to read document: {str(e)}") from e

        def _with_retries(fn, *a, **kw):
            for retry in range(3):
                try:
                    return fn(*a, **kw)
                except Exception as exc:
                    if retry == 2:
                        raise
                    time.sleep(2 ** retry + random.random())

        def _fallback_local_extraction():
            try:
                data, _ = _read_pdf_bytes()
                reader = PyPDF2.PdfReader(io.BytesIO(data))
                text = "\n\n".join(page.extract_text() or "" for page in reader.pages)
                combined = f"Document content:\n\n{text}\n\nUser query: {user_prompt}"
                return self._completion(
                    client, model, config,
                    user_prompt=combined,
                    system_prompt=system_prompt,
                    max_tokens=max_tokens,
                    temperature=temperature,
                    stream=stream,
                    json_output=json_output,
                )
            except Exception as e:
                raise RuntimeError(f"Local PDF text extraction failed: {str(e)}") from e

        try:
            # OpenAI
            if sdk in ["openai", "groq"] and supports_documents:
                data, name = _read_pdf_bytes()
                buf = io.BytesIO(data)
                buf.name = _safe_name(name)
                
                try:
                    file_obj = _with_retries(client.files.create,
                                            file=buf, purpose="assistants")
                except Exception as e:
                    raise RuntimeError(f"Failed to upload document to {sdk}: {str(e)}") from e
                
                file_id = file_obj.id

                content = [
                    {"type": "text", "text": user_prompt},
                    {"type": "file", "file": {"file_id": file_id}},
                ]
                messages = [{"role": "user", "content": content}]
                if system_prompt:
                    messages.insert(0, {"role": "system", "content": system_prompt})

                kwargs = dict(
                    model=model,
                    messages=messages,
                    max_tokens=max_tokens,
                    temperature=temperature,
                    stream=stream,
                )
                if json_output:
                    kwargs["response_format"] = {"type": "json_object"}

                resp = client.chat.completions.create(**kwargs)
                if stream:
                    return resp
                text = self._extract_response_text(resp, config)
                return self.clean_json_output(text) if json_output else text

            # Anthropic
            elif sdk == "anthropic" and supports_documents:
                extra_headers = {
                    "anthropic-beta": "messages-2023-12-15,files-api-2025-04-14"
                }
                data, name = _read_pdf_bytes()
                buf = io.BytesIO(data)
                buf.name = _safe_name(name)
                
                try:
                    file_ref = _with_retries(client.beta.files.upload, file=buf)
                except Exception as e:
                    raise RuntimeError(f"Failed to upload document to Anthropic: {str(e)}") from e
                
                doc_block = {"type": "document",
                            "source": {"type": "file", "file_id": file_ref.id}}

                msg_kwargs = dict(
                    model=model,
                    messages=[{
                        "role": "user",
                        "content": [
                            {"type": "text", "text": user_prompt},
                            doc_block,
                        ]
                    }],
                    max_tokens=max_tokens,
                    temperature=temperature,
                    stream=stream,
                    extra_headers=extra_headers,
                )
                if system_prompt:
                    msg_kwargs["system"] = system_prompt

                resp = client.beta.messages.create(**msg_kwargs)
                if stream:
                    return resp
                text = self._extract_response_text(resp, config)
                return self.clean_json_output(text) if json_output else text
          
            # Gemini
            elif sdk == "gemini" and supports_documents:
                if file_path and isinstance(document, str):
                    file_path_to_upload = document
                else:
                    data, name = _read_pdf_bytes()
                    tmp_file = tempfile.NamedTemporaryFile(mode='wb', suffix='.pdf', delete=False)
                    tmp_file.write(data)
                    tmp_file.close()
                    file_path_to_upload = tmp_file.name
                
                try:
                    file_ref = _with_retries(
                        client.files.upload,
                        file=file_path_to_upload
                    )
                except Exception as e:
                    raise RuntimeError(f"Failed to upload document to Gemini: {str(e)}") from e
                finally:
                    if not (file_path and isinstance(document, str)):
                        try:
                            os.unlink(file_path_to_upload)
                        except:
                            pass
                
                # Wait for file processing
                max_wait = 30
                wait_interval = 1
                for _ in range(max_wait):
                    file_status = client.files.get(name=file_ref.name)
                    if hasattr(file_status, 'state') and file_status.state == 'ACTIVE':
                        break
                    elif hasattr(file_status, 'state') and file_status.state == 'FAILED':
                        raise RuntimeError(f"Gemini file processing failed: {file_ref.name}")
                    time.sleep(wait_interval)

                gen_cfg = types.GenerateContentConfig(
                    max_output_tokens=max_tokens,
                    temperature=temperature,
                    response_mime_type="application/json" if json_output else None,
                    system_instruction=system_prompt,
                )
                
                contents = [user_prompt, file_ref]
                
                call = client.models.generate_content_stream if stream else \
                    client.models.generate_content
                resp = call(
                    model=model,
                    contents=contents,
                    config=gen_cfg,
                )
                if stream:
                    return resp
                text = self._extract_response_text(resp, config)
                return self.clean_json_output(text) if json_output else text

            # Fallback
            else:
                return _fallback_local_extraction()

        except Exception as e:
            if isinstance(e, (ValueError, FileNotFoundError, IOError, RuntimeError)):
                raise
            raise RuntimeError(f"Document processing failed for {sdk}: {str(e)}") from e

    # ============================================================================================== #
    # ======================================== WEB SEARCH ========================================== #
    # ============================================================================================== #
        
    def web_search(
        self,
        query: str,
        system: str = None,
        scope: str = "fast",
        *,
        max_tokens: int | None = 10000,
        temperature: float | None = None,
        max_results: int = 20,   
        thinking_budget: int | None = 5000, 
        country_code: str = None,
        city: str = None
    ):
        """
        Run a real-time web search through the provider's native tool interface.

        Args:
            query: Search query text
            system: Optional system prompt
            scope: "smart", "fast", or "reasoning" (default: "fast")
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            max_results: Hint for providers that accept a results limit
            thinking_budget: Token budget for thinking (Anthropic only)
            country_code: Optional country code for location-based search
            city: Optional city name for location-based search
            
        Returns:
            Generated text response with web search results
            
        Raises:
            NotImplementedError: If provider doesn't support web search
            RuntimeError: If web search fails
        """
        if max_tokens is None:
            max_tokens = self.max_tokens
        if temperature is None:
            temperature = self.temperature

        client, model, config = self._select_scope(scope)
        sdk = config["sdk"]

        try:
            # OpenAI, DeepSeek
            if sdk == "openai" or (sdk == "openai" and config.get("base_url")):
                kwargs = {
                    "model": "gpt-4o-search-preview",
                    "messages": [{"role": "user", "content": query}],
                }
                
                if system:
                    kwargs["messages"].insert(0, {"role": "system", "content": system})
                
                if country_code or city:
                    web_search_options = {"user_location": {"type": "approximate", "approximate": {}}}
                    if country_code:
                        web_search_options["user_location"]["approximate"]["country"] = country_code
                    if city:
                        web_search_options["user_location"]["approximate"]["city"] = city
                    kwargs["web_search_options"] = web_search_options
                
                completion = client.chat.completions.create(**kwargs)
                return self._extract_response_text(completion, config)

            # Anthropic
            elif sdk == "anthropic":
                content_blocks = [{"type": "text", "text": query}]

                tool_config = {
                    "name": "web_search",
                    "type": "web_search_20250305",
                }
                
                if country_code or city:
                    user_location = {"type": "approximate"}
                    if city:
                        user_location["city"] = city
                    if country_code:
                        user_location["country"] = country_code
                    tool_config["user_location"] = user_location

                kwargs = {
                    "model": model,              
                    "max_tokens": max_tokens,
                    "temperature": 1,
                    "messages": [{"role": "user", "content": content_blocks}],
                    "tools": [tool_config],
                }

                if system:
                    kwargs["system"] = system
                if thinking_budget and thinking_budget > 0:
                    kwargs["thinking"] = {"type": "enabled", "budget_tokens": int(thinking_budget)}

                message = client.messages.create(**kwargs)

                parts = []
                for block in getattr(message, "content", []):
                    if getattr(block, "type", None) == "text" and hasattr(block, "text"):
                        parts.append(block.text)
                return "".join(parts)

            # Gemini
            elif sdk == "gemini":
                grounding_tool = types.Tool(
                    google_search=types.GoogleSearch()
                )
                
                gen_config = types.GenerateContentConfig(
                    tools=[grounding_tool],
                    system_instruction=system
                )
                
                resp = client.models.generate_content(
                    model=model,
                    contents=query,
                    config=gen_config
                )
                
                return self._extract_response_text(resp, config)
            
            # Groq
            elif sdk == "groq":
                kwargs = {
                    "model": "groq/compound",
                    "messages": [{"role": "user", "content": query}],
                }
                
                if system:
                    kwargs["messages"].insert(0, {"role": "system", "content": system})
                
                if country_code:
                    try:
                        import pycountry
                        country = pycountry.countries.get(alpha_2=country_code).name.lower()
                        kwargs["search_settings"] = {"country": country}
                    except Exception as e:
                        logger.warning(f"Failed to convert country code to name: {e}")
                
                completion = client.chat.completions.create(**kwargs)
                return self._extract_response_text(completion, config)

            else:
                raise NotImplementedError(f"Web search not supported for {sdk} provider")

        except NotImplementedError:
            raise
        except Exception as e:
            raise RuntimeError(f"Web search failed for {sdk} provider: {str(e)}") from e

    def clean_json_output(self, text):
        """
        Clean and parse JSON output from LLM response.
        
        Args:
            text: Raw text response from LLM
            
        Returns:
            Parsed JSON as dict or empty dict if parsing fails
        """
        if not text or not text.strip():
            return {}

        try:
            return json.loads(text)
        except json.JSONDecodeError:
            try:
                # Find JSON boundaries
                open_brace = text.find('{')
                open_bracket = text.find('[')

                if open_brace >= 0 and open_bracket >= 0:
                    start_idx = min(open_brace, open_bracket)
                elif open_brace >= 0:
                    start_idx = open_brace
                elif open_bracket >= 0:
                    start_idx = open_bracket
                else:
                    return {}

                close_brace = text.rfind('}')
                close_bracket = text.rfind(']')

                if close_brace >= 0 and close_bracket >= 0:
                    end_idx = max(close_brace, close_bracket)
                elif close_brace >= 0:
                    end_idx = close_brace
                elif close_bracket >= 0:
                    end_idx = close_bracket
                else:
                    return {}

                if start_idx >= 0 and end_idx > start_idx:
                    json_content = text[start_idx:end_idx + 1]
                    return json.loads(json_content)

            except Exception:
                pass

            return {}
        
    # ============================================================================================== #
    # ======================================= AGENT TEAM =========================================== #
    # ============================================================================================== #

    def create_agent_sdk_agent(self, name, instructions, agent_type="smart", handoffs=[], store=True, **agent_kwargs):
        """
        Create an OpenAI Agents SDK agent using LiteLLM for multiple model support.
                
        Args:
            name: Name of the agent (required)
            instructions: System instructions for the agent (required)
            agent_type: "smart", "fast", or "reasoning" - determines which model to use
            handoffs: Optional list of agents or handoff objects for delegation
            store: If True, stores agent as class attribute (default: True)
            **agent_kwargs: Additional Agent parameters (tools, model_settings, etc.)
        
        Returns:
            OpenAI Agents SDK Agent configured with LiteLLM
            
        Raises:
            ValueError: If API key is missing or agent configuration is invalid
            RuntimeError: If agent creation fails
        
        Examples:
            # Create and store an agent
            ai_agent = AIAgent(provider="groq")
            ai_agent.create_agent_sdk_agent(
                name="assistant",
                instructions="You are a helpful assistant. Be concise and friendly."
            )
            
            # Create agent with handoffs
            ai_agent.create_agent_sdk_agent(
                name="triage",
                instructions="Route customer inquiries to the appropriate department.",
                handoffs=[ai_agent.billing_agent, ai_agent.technical_agent]
            )
        """
        if not hasattr(self, 'agents'):
            self.agents = {}
        
        client, model_name, config = self._select_scope(agent_type)
        provider = self.provider
        
        api_key = os.environ.get(config["api_key_env"])
        if not api_key:
            raise ValueError(
                f"API key not found. Set {config['api_key_env']} environment variable"
            )
        
        litellm_model_mapping = {
            "anthropic": f"anthropic/{model_name}",
            "openai": model_name,
            "gemini": f"gemini/{model_name}",
            "groq": f"groq/{model_name}",
            "deepseek": f"deepseek/{model_name}",
        }
        
        litellm_model = litellm_model_mapping.get(provider, f"{provider}/{model_name}")
        
        model_kwargs = {
            "model": litellm_model,
            "api_key": api_key,
        }
        
        if provider == "deepseek" and "base_url" in config:
            model_kwargs["base_url"] = config["base_url"]
        
        try:
            model = LitellmModel(**model_kwargs)
        except Exception as e:
            raise RuntimeError(f"Failed to create LiteLLM model: {str(e)}") from e
        
        if "model_settings" not in agent_kwargs:
            agent_kwargs["model_settings"] = ModelSettings(include_usage=True)
        
        # Process handoffs
        processed_handoffs = []
        if handoffs:
            for h in handoffs:
                if isinstance(h, str):
                    if h in self.agents:
                        processed_handoffs.append(self.agents[h])
                    else:
                        logger.warning(f"Agent '{h}' not found in self.agents")
                elif isinstance(h, dict):
                    target_agent = h.get("agent")
                    if isinstance(target_agent, str) and target_agent in self.agents:
                        target_agent = self.agents[target_agent]
                    
                    settings = h.get("settings", {})
                    processed_handoffs.append(
                        handoff(agent=target_agent, **settings)
                    )
                else:
                    processed_handoffs.append(h)
        
        if processed_handoffs:
            instructions = prompt_with_handoff_instructions(instructions, processed_handoffs)
        
        if "tools" not in agent_kwargs or agent_kwargs["tools"] is None:
            agent_kwargs["tools"] = []
        
        try:
            agent = Agent(
                name=name,
                instructions=instructions,
                model=model,
                handoffs=processed_handoffs,
                **agent_kwargs
            )
        except Exception as e:
            raise RuntimeError(f"Failed to create agent '{name}': {str(e)}") from e
        
        if store:
            self.agents[name] = agent
            attr_name = f"{name.lower().replace(' ', '_').replace('-', '_')}_agent"
            setattr(self, attr_name, agent)
        
        if self.verbose:
            handoff_names = [
                h.agent.name if hasattr(h, 'agent') else 
                (h.name if hasattr(h, 'name') else 'Unknown')
                for h in processed_handoffs
            ]
            logger.info(
                f"Created agent: name={name}, model={litellm_model}, type={agent_type}, "
                f"handoffs={handoff_names if handoff_names else 'none'}"
            )
        
        return agent

    def get_agent(self, name):
        """
        Retrieve a stored agent by name.
        
        Args:
            name: Name of the agent to retrieve
        
        Returns:
            Agent object or None if not found
        """
        if not hasattr(self, 'agents'):
            return None
        return self.agents.get(name)

    def list_agents(self):
        """
        List all stored agents.
        
        Returns:
            List of agent names
        """
        if not hasattr(self, 'agents'):
            return []
        return list(self.agents.keys())

    def create_handoff_team(self, team_config):
        """
        Create a team of agents with predefined handoff relationships.
        
        Args:
            team_config: List of agent configurations, each containing:
                - name: Agent name (required)
                - instructions: Agent instructions (required)
                - type: "smart", "fast", or "reasoning" (optional, default: "fast")
                - handoffs_to: List of agent names this agent can hand off to (optional)
                - tools: List of tools for the agent (optional)
        
        Returns:
            Dict of agent name -> Agent object
            
        Raises:
            ValueError: If agent configuration is invalid
            RuntimeError: If team creation fails
        
        Example:
            ai_agent.create_handoff_team([
                {
                    "name": "triage",
                    "instructions": "Route inquiries appropriately.",
                    "type": "smart",
                    "handoffs_to": ["billing", "technical"]
                },
                {
                    "name": "billing",
                    "instructions": "Handle billing questions.",
                    "type": "fast"
                }
            ])
        """
        try:
            # First pass: Create all agents without handoffs
            for agent_config in team_config:
                if "name" not in agent_config or "instructions" not in agent_config:
                    raise ValueError(
                        "Each agent config must have 'name' and 'instructions' fields"
                    )
                
                self.create_agent_sdk_agent(
                    name=agent_config["name"],
                    instructions=agent_config["instructions"],
                    agent_type=agent_config.get("type", "fast"),
                    tools=agent_config.get("tools"),
                    store=True
                )
            
            # Second pass: Update agents with handoffs
            for agent_config in team_config:
                if "handoffs_to" in agent_config:
                    agent = self.agents[agent_config["name"]]
                    
                    handoffs = []
                    for target_name in agent_config["handoffs_to"]:
                        if target_name in self.agents:
                            target_agent = self.agents[target_name]
                            handoff_settings = agent_config.get(f"handoff_settings_{target_name}", {})
                            
                            if handoff_settings:
                                handoffs.append(
                                    handoff(agent=target_agent, **handoff_settings)
                                )
                            else:
                                handoffs.append(target_agent)
                        else:
                            logger.warning(f"Target agent '{target_name}' not found")
                    
                    if handoffs:
                        agent.handoffs = handoffs
            
            if self.verbose:
                logger.info(f"Created handoff team with {len(self.agents)} agents")
            
            return self.agents
            
        except Exception as e:
            raise RuntimeError(f"Failed to create handoff team: {str(e)}") from e
    
    def run_agent(
        self,
        user_prompt: str,
        *,
        agent = None,
        agent_name: str | None = None,
        async_mode: bool = False,
        **runner_kwargs,
    ):
        """
        Execute a stored agent through openai-agents' Runner.

        Parameters:
            user_prompt: The user's input
            agent: Provide the Agent object directly (optional)
            agent_name: Or give the name you stored it under (optional)
            async_mode: If False, run synchronously; if True, return coroutine
            **runner_kwargs: Extra arguments forwarded to Runner.run / run_sync
        
        Returns:
            str (sync) or coroutine → str (async)
            
        Raises:
            ValueError: If neither agent nor agent_name is provided, or agent not found
            RuntimeError: If agent execution fails
        """
        if agent is None:
            if agent_name is None:
                raise ValueError("Specify either agent=<Agent> or agent_name='...'")
            if not hasattr(self, "agents") or agent_name not in self.agents:
                raise ValueError(f"Agent '{agent_name}' not found in self.agents")
            agent = self.agents[agent_name]

        runner = Runner()

        if not async_mode:
            try:
                raw_result = runner.run_sync(
                    starting_agent=agent,
                    input=user_prompt,
                    **runner_kwargs,
                )
                return self._extract_runner_text(raw_result)
            except Exception as e:
                raise RuntimeError(f"Agent execution failed: {str(e)}") from e

        async def _arun():
            try:
                raw_result = await runner.run(
                    starting_agent=agent,
                    input=user_prompt,
                    **runner_kwargs,
                )
                return self._extract_runner_text(raw_result)
            except Exception as e:
                raise RuntimeError(f"Async agent execution failed: {str(e)}") from e

        return _arun()

    def _extract_runner_text(self, result):
        """
        Best-effort extraction of the assistant's final text from Runner result.
        
        Args:
            result: Result object from Runner
            
        Returns:
            Extracted text string
        """
        if isinstance(result, str):
            return result

        for attr in ("response", "output", "result", "value"):
            if hasattr(result, attr):
                maybe = getattr(result, attr)
                if isinstance(maybe, str):
                    return maybe

        if isinstance(result, (list, tuple)):
            for msg in reversed(result):
                if isinstance(msg, str):
                    return msg
                if isinstance(msg, dict):
                    if "content" in msg and isinstance(msg["content"], str):
                        return msg["content"]
                    if "text" in msg and isinstance(msg["text"], str):
                        return msg["text"]
                for key in ("content", "text"):
                    if hasattr(msg, key):
                        maybe = getattr(msg, key)
                        if isinstance(maybe, str):
                            return maybe

        return str(result)