"""Simple dataset evaluation example with Gentrace."""

import os
import asyncio
from typing import Any, Dict

from dotenv import load_dotenv
from openai import AsyncOpenAI

from gentrace import TestInput, init, experiment, interaction, eval_dataset

load_dotenv()

init(
    api_key=os.getenv("GENTRACE_API_KEY"),
    base_url=os.getenv("GENTRACE_BASE_URL", "https://gentrace.ai/api"),
)

PIPELINE_ID = os.getenv("GENTRACE_PIPELINE_ID", "")
DATASET_ID = os.getenv("GENTRACE_DATASET_ID", "")

openai = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))


async def process_ai_request(inputs: Dict[str, Any]) -> Dict[str, Any]:
    """Process AI request using OpenAI."""
    # test_case.name # throwing exception

    # Extract the prompt from inputs
    prompt = inputs.get("prompt", "Hey, how are you?")

    # Call OpenAI
    response = await openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
    )

    result = response.choices[0].message.content

    return {
        "result": result,
        "metadata": {"model": response.model, "usage": response.usage.model_dump() if response.usage else None},
    }


@interaction(pipeline_id=PIPELINE_ID, name="Process AI Request")
async def traced_process_ai_request(inputs: Dict[str, Any]) -> Dict[str, Any]:
    """Traced version of process_ai_request."""
    return await process_ai_request(inputs)


@experiment(pipeline_id=PIPELINE_ID)
async def dataset_evaluation() -> None:
    """Run evaluation on a dataset."""

    await eval_dataset(
        data=[
            TestInput(name="greeting", inputs={"prompt": "Hello! How are you doing today?"}),
            TestInput(name="factual_question", inputs={"prompt": "What is the capital of France?"}),
            TestInput(name="math_problem", inputs={"prompt": "What is 25 * 4?"}),
            TestInput(name="creative_writing", inputs={"prompt": "Write a haiku about artificial intelligence"}),
        ],
        interaction=traced_process_ai_request,
        max_concurrency=30,
    )

    print("Dataset evaluation completed! Check your Gentrace dashboard for results.")


if __name__ == "__main__":
    # Run the experiment
    asyncio.run(dataset_evaluation())
