#!/usr/bin/env python3
"""
Test script to measure Cerebras API response time directly
"""

import os
import time
import json
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

CEREBRAS_API_KEY = os.getenv('CEREBRAS_API_KEY', '')
CEREBRAS_BASE_URL = os.getenv('CEREBRAS_BASE_URL', 'https://api.cerebras.ai/v1')
CEREBRAS_MODEL = os.getenv('CEREBRAS_MODEL', 'gpt-oss-120b')

if not CEREBRAS_API_KEY:
    print('Error: CEREBRAS_API_KEY not set')
    exit(1)

client = OpenAI(
    api_key=CEREBRAS_API_KEY,
    base_url=CEREBRAS_BASE_URL
)

def test_cerebras_speed():
    print('Testing Cerebras API speed...')
    print(f'Model: {CEREBRAS_MODEL}')
    print(f'Base URL: {CEREBRAS_BASE_URL}')
    print('---')

    # Simple test
    print('\n1. Simple completion test:')
    start1 = time.time()
    try:
        response1 = client.chat.completions.create(
            model=CEREBRAS_MODEL,
            messages=[
                {'role': 'system', 'content': 'You are a helpful assistant.'},
                {'role': 'user', 'content': 'Say "Hello World"'}
            ],
            temperature=0.0
        )
        elapsed1 = (time.time() - start1) * 1000
        print(f'Response: {response1.choices[0].message.content}')
        print(f'Time: {elapsed1:.2f}ms')
        print(f'Tokens: {response1.usage.total_tokens if response1.usage else "N/A"}')
    except Exception as e:
        print(f'Error: {e}')

    # JSON schema test (similar to reranking)
    print('\n2. JSON schema test (like reranking):')
    start2 = time.time()
    try:
        response2 = client.chat.completions.create(
            model=CEREBRAS_MODEL,
            messages=[
                {'role': 'system', 'content': 'You are a code ranking assistant.'},
                {'role': 'user', 'content': 'Rank these 3 code snippets by relevance to "authentication": [{"id": 0, "text": "function login() {}"}, {"id": 1, "text": "function logout() {}"}, {"id": 2, "text": "const x = 5;"}]'}
            ],
            temperature=0.0,
            response_format={
                'type': 'json_schema',
                'json_schema': {
                    'name': 'ranking',
                    'schema': {
                        'type': 'object',
                        'properties': {
                            'results': {
                                'type': 'array',
                                'items': {
                                    'type': 'object',
                                    'properties': {
                                        'id': {'type': 'integer'},
                                        'score': {'type': 'number'}
                                    },
                                    'required': ['id', 'score']
                                }
                            }
                        },
                        'required': ['results']
                    }
                }
            }
        )
        elapsed2 = (time.time() - start2) * 1000
        content = response2.choices[0].message.content
        print(f'Response: {content[:100]}...')
        print(f'Time: {elapsed2:.2f}ms')
        print(f'Tokens: {response2.usage.total_tokens if response2.usage else "N/A"}')
    except Exception as e:
        print(f'Error: {e}')

    # Test with reasoning_effort (if gpt-oss-120b)
    if CEREBRAS_MODEL == 'gpt-oss-120b':
        print('\n3. With reasoning_effort=low:')
        start3 = time.time()
        try:
            response3 = client.chat.completions.create(
                model=CEREBRAS_MODEL,
                messages=[
                    {'role': 'system', 'content': 'You are a code ranking assistant.'},
                    {'role': 'user', 'content': 'Rank these 3 code snippets by relevance to "authentication": [{"id": 0, "text": "function login() {}"}, {"id": 1, "text": "function logout() {}"}, {"id": 2, "text": "const x = 5;"}]'}
                ],
                temperature=0.0,
                reasoning_effort='low',
                response_format={
                    'type': 'json_schema',
                    'json_schema': {
                        'name': 'ranking',
                        'schema': {
                            'type': 'object',
                            'properties': {
                                'results': {
                                    'type': 'array',
                                    'items': {
                                        'type': 'object',
                                        'properties': {
                                            'id': {'type': 'integer'},
                                            'score': {'type': 'number'}
                                        },
                                        'required': ['id', 'score']
                                    }
                                }
                            },
                            'required': ['results']
                        }
                    }
                }
            )
            elapsed3 = (time.time() - start3) * 1000
            content = response3.choices[0].message.content
            print(f'Response: {content[:100]}...')
            print(f'Time: {elapsed3:.2f}ms')
            print(f'Tokens: {response3.usage.total_tokens if response3.usage else "N/A"}')
        except Exception as e:
            print(f'Error: {e}')

    print('\n---')
    print('Test complete!')

if __name__ == '__main__':
    test_cerebras_speed()
