"""
Test script to compare scrape_api_events (NHL API) with scrape_espn_events
"""

import sys
import os
import pandas as pd
import numpy as np

# Add the scraper directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

# Import the new function
from scrape_nhl_api_events import scrape_api_events

# Import the original ESPN function
from TopDownHockey_Scraper.TopDownHockey_NHL_Scraper_optimized import scrape_espn_events, scrape_espn_ids_single_game

def test_comparison(game_id, game_date, home_team, away_team):
    """Compare NHL API scraper with ESPN scraper"""
    
    print(f"\n{'='*80}")
    print(f"Testing Game ID: {game_id}")
    print(f"Date: {game_date}, {away_team} @ {home_team}")
    print(f"{'='*80}\n")
    
    # Test NHL API scraper
    print("1. Testing NHL API scraper...")
    try:
        nhl_df = scrape_api_events(game_id, drop_description=False)
        print(f"   ✓ NHL API returned {len(nhl_df)} events")
        print(f"   Columns: {list(nhl_df.columns)}")
        if len(nhl_df) > 0:
            print(f"   Sample events:")
            print(nhl_df.head(10).to_string())
            print(f"\n   Event types: {nhl_df['event'].value_counts().to_dict()}")
    except Exception as e:
        print(f"   ✗ NHL API scraper failed: {e}")
        import traceback
        traceback.print_exc()
        return
    
    # Test ESPN scraper
    print("\n2. Testing ESPN scraper...")
    try:
        # Get ESPN game ID
        espn_ids = scrape_espn_ids_single_game(str(game_date), home_team, away_team)
        if len(espn_ids) == 0:
            print(f"   ✗ Could not find ESPN game ID")
            return
        espn_game_id = espn_ids.espn_id.iloc[0]
        print(f"   Found ESPN game ID: {espn_game_id}")
        
        espn_df = scrape_espn_events(int(espn_game_id), drop_description=False)
        print(f"   ✓ ESPN returned {len(espn_df)} events")
        print(f"   Columns: {list(espn_df.columns)}")
        if len(espn_df) > 0:
            print(f"   Sample events:")
            print(espn_df.head(10).to_string())
            print(f"\n   Event types: {espn_df['event'].value_counts().to_dict()}")
    except Exception as e:
        print(f"   ✗ ESPN scraper failed: {e}")
        import traceback
        traceback.print_exc()
        return
    
    # Compare results
    print("\n3. Comparing results...")
    print(f"   NHL API events: {len(nhl_df)}")
    print(f"   ESPN events: {len(espn_df)}")
    print(f"   Difference: {len(nhl_df) - len(espn_df)}")
    
    # Check column structure
    nhl_cols = set(nhl_df.columns)
    espn_cols = set(espn_df.columns)
    print(f"\n   Column comparison:")
    print(f"   NHL API columns: {nhl_cols}")
    print(f"   ESPN columns: {espn_cols}")
    print(f"   Missing in NHL: {espn_cols - nhl_cols}")
    print(f"   Extra in NHL: {nhl_cols - espn_cols}")
    
    # Compare event types
    if len(nhl_df) > 0 and len(espn_df) > 0:
        nhl_events = nhl_df['event'].value_counts()
        espn_events = espn_df['event'].value_counts()
        print(f"\n   Event type comparison:")
        all_events = set(nhl_events.index) | set(espn_events.index)
        for event in sorted(all_events):
            nhl_count = nhl_events.get(event, 0)
            espn_count = espn_events.get(event, 0)
            diff = nhl_count - espn_count
            marker = "✓" if diff == 0 else "⚠"
            print(f"   {marker} {event}: NHL={nhl_count}, ESPN={espn_count}, diff={diff}")
    
    # Try to merge and compare matching events
    print(f"\n4. Comparing matching events...")
    if len(nhl_df) > 0 and len(espn_df) > 0:
        # Merge on the key columns
        merge_cols = ['event_player_1', 'game_seconds', 'period', 'version', 'event']
        
        # Check for matching events
        nhl_df_merge = nhl_df[merge_cols + ['coords_x', 'coords_y']].copy()
        espn_df_merge = espn_df[merge_cols + ['coords_x', 'coords_y']].copy()
        
        merged = nhl_df_merge.merge(
            espn_df_merge, 
            on=merge_cols, 
            how='inner',
            suffixes=('_nhl', '_espn')
        )
        
        print(f"   Matching events (same player, time, period, version, event): {len(merged)}")
        
        if len(merged) > 0:
            # Compare coordinates
            coord_diff_x = (merged['coords_x_nhl'] - merged['coords_x_espn']).abs()
            coord_diff_y = (merged['coords_y_nhl'] - merged['coords_y_espn']).abs()
            
            print(f"   Coordinate differences:")
            print(f"   X coord - Mean diff: {coord_diff_x.mean():.2f}, Max diff: {coord_diff_x.max()}")
            print(f"   Y coord - Mean diff: {coord_diff_y.mean():.2f}, Max diff: {coord_diff_y.max()}")
            
            # Show events with large coordinate differences
            large_diff = (coord_diff_x > 5) | (coord_diff_y > 5)
            if large_diff.sum() > 0:
                print(f"\n   ⚠ Events with coordinate differences > 5:")
                print(merged[large_diff][merge_cols + ['coords_x_nhl', 'coords_x_espn', 'coords_y_nhl', 'coords_y_espn']].head(10).to_string())
        
        # Check for events only in NHL API
        nhl_only = nhl_df_merge.merge(
            espn_df_merge,
            on=merge_cols,
            how='left',
            indicator=True
        )
        nhl_only_count = (nhl_only['_merge'] == 'left_only').sum()
        print(f"\n   Events only in NHL API: {nhl_only_count}")
        if nhl_only_count > 0:
            print("   Sample NHL-only events:")
            print(nhl_only[nhl_only['_merge'] == 'left_only'][merge_cols].head(10).to_string())
        
        # Check for events only in ESPN
        espn_only = espn_df_merge.merge(
            nhl_df_merge,
            on=merge_cols,
            how='left',
            indicator=True
        )
        espn_only_count = (espn_only['_merge'] == 'left_only').sum()
        print(f"\n   Events only in ESPN: {espn_only_count}")
        if espn_only_count > 0:
            print("   Sample ESPN-only events:")
            print(espn_only[espn_only['_merge'] == 'left_only'][merge_cols].head(10).to_string())

if __name__ == "__main__":
    # Test with the game from the notebook
    test_game_id = 2025020331
    
    # We need to get the game date and teams - let's try to fetch from API
    import requests
    import json
    
    try:
        api_url = f"https://api-web.nhle.com/v1/gamecenter/{test_game_id}/play-by-play"
        response = requests.get(api_url, timeout=30)
        api_data = json.loads(response.content)
        
        # Extract game info from the API response
        game_date_str = api_data.get('gameDate', '')
        home_team = api_data.get('homeTeam', {}).get('abbrev', '')
        away_team = api_data.get('awayTeam', {}).get('abbrev', '')
        
        # Parse date (format: YYYY-MM-DD)
        from datetime import datetime
        if game_date_str:
            try:
                game_date = datetime.strptime(game_date_str, '%Y-%m-%d').date()
            except:
                # Try alternative format
                game_date = datetime.fromisoformat(game_date_str.split('T')[0]).date()
        else:
            game_date = None
        
        print(f"Game Info from API:")
        print(f"  Date: {game_date}")
        print(f"  Home: {home_team}")
        print(f"  Away: {away_team}")
        
        if game_date and home_team and away_team:
            test_comparison(test_game_id, game_date, home_team, away_team)
        else:
            print("Could not extract all required game info from API")
            print(f"  gameDate: {game_date_str}")
            print(f"  homeTeam: {api_data.get('homeTeam', {})}")
            print(f"  awayTeam: {api_data.get('awayTeam', {})}")
        
    except Exception as e:
        print(f"Failed to get game info: {e}")
        import traceback
        traceback.print_exc()
        
        # Fallback: try with a known game
        print("\nTrying with hardcoded values...")
        # You may need to provide these manually
        print("Please provide game_date, home_team, and away_team for game_id", test_game_id)

