"""Integration test: Real-world analysis of Oxen.ai blog"""

import sys
from pathlib import Path

# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from pantsonfire.factoid import FactoidExtractor, TimestampDetector, ContentTypeClassifier, FactoidDeduplicator
from pantsonfire.truth_registry import TruthRegistry, ConsensusEngine
from pantsonfire.model_manager import ModelManager
from pantsonfire.remediation import RemediationOrchestrator, ContentUpdater
from pantsonfire.config import Config
from pantsonfire.llm import LLMClient
from pantsonfire.extractors.external import ExternalExtractor
import os


def test_real_oxen_blog_analysis():
    """Test real-world factoid extraction from Oxen.ai blog"""
    print("\n" + "="*80)
    print("🔥 INTEGRATION TEST: Oxen.ai Blog Analysis")
    print("="*80)
    
    # Setup
    config = Config()
    config.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
    
    if not config.openrouter_api_key:
        print("\n⚠️  No OPENROUTER_API_KEY found - using pattern-based extraction only")
    
    llm_client = LLMClient(config)
    extractor_engine = FactoidExtractor(llm_client, config)
    classifier = ContentTypeClassifier()
    timestamp_detector = TimestampDetector()
    content_extractor = ExternalExtractor()
    
    # Test URLs
    test_url = "https://www.oxen.ai/entry/fine-tuning-a-with-oxen-ai"
    truth_url = "https://docs.oxen.ai/examples/fine-tuning/image_editing"
    
    print(f"\n📄 Analyzing: {test_url}")
    print(f"📚 Truth Source: {truth_url}")
    
    # Extract content
    print("\n🔍 Step 1: Extracting content...")
    blog_content = content_extractor.extract(test_url)
    truth_content = content_extractor.extract(truth_url)
    
    if not blog_content:
        print("❌ Failed to extract blog content")
        return False
    
    print(f"✅ Extracted {len(blog_content)} chars from blog")
    print(f"✅ Extracted {len(truth_content)} chars from truth source")
    
    # Classify content
    print("\n🏷️  Step 2: Classifying content...")
    blog_type = classifier.classify(test_url, blog_content)
    truth_type = classifier.classify(truth_url, truth_content)
    blog_trust = classifier.get_trust_weight(blog_type)
    truth_trust = classifier.get_trust_weight(truth_type)
    
    print(f"  Blog: {blog_type} (trust={blog_trust})")
    print(f"  Truth: {truth_type} (trust={truth_trust})")
    
    # Detect timestamps
    print("\n📅 Step 3: Detecting timestamps...")
    blog_timestamp = timestamp_detector.detect_timestamp(blog_content)
    if blog_timestamp:
        staleness = timestamp_detector.calculate_staleness(blog_timestamp)
        print(f"  Blog published: {blog_timestamp.strftime('%Y-%m-%d')}")
        print(f"  Staleness score: {staleness:.2f} (1.0=fresh, 0.0=very stale)")
    else:
        print(f"  No timestamp detected")
    
    # Extract factoids
    print("\n🧠 Step 4: Extracting factoids...")
    blog_factoids = extractor_engine.extract_factoids(
        blog_content,
        test_url,
        blog_type,
        granularity="paragraph"
    )
    
    truth_factoids = extractor_engine.extract_factoids(
        truth_content,
        truth_url,
        truth_type,
        granularity="paragraph"
    )
    
    print(f"✅ Extracted {len(blog_factoids)} factoids from blog")
    print(f"✅ Extracted {len(truth_factoids)} factoids from truth source")
    
    # Show some examples
    print(f"\n📋 Example Blog Factoids:")
    for i, factoid in enumerate(blog_factoids[:5], 1):
        print(f"  {i}. [{factoid.category}] {factoid.claim_text[:80]}...")
        print(f"     Confidence: {factoid.confidence:.2f}, Trust: {factoid.trust_weight:.2f}")
    
    # Deduplicate
    print(f"\n🔄 Step 5: Deduplicating factoids...")
    deduplicator = FactoidDeduplicator()
    all_factoids = blog_factoids + truth_factoids
    unique_factoids = deduplicator.deduplicate(all_factoids)
    
    print(f"  Before: {len(all_factoids)} factoids")
    print(f"  After: {len(unique_factoids)} unique factoids")
    
    # Build truth registry
    print(f"\n📊 Step 6: Building truth registry...")
    registry = TruthRegistry(config, "oxen-blog-test")
    registry.add_factoids(unique_factoids)
    
    # Calculate accuracy
    print(f"\n🎯 Step 7: Calculating accuracy scores...")
    
    # Get accuracy for the blog URL
    blog_accuracy = registry.get_accuracy_by_url(test_url)
    print(f"\n  Blog URL Accuracy:")
    print(f"    URL: {blog_accuracy['url']}")
    print(f"    Score: {blog_accuracy['accuracy_score']:.1f}%")
    print(f"    Total Factoids: {blog_accuracy['total_factoids']}")
    print(f"    True: {blog_accuracy['true_count']}, False: {blog_accuracy['false_count']}, Uncertain: {blog_accuracy['uncertain_count']}")
    
    # Get domain-level accuracy
    domain_accuracy = registry.get_accuracy_by_domain("www.oxen.ai")
    print(f"\n  Domain Accuracy (www.oxen.ai):")
    print(f"    Score: {domain_accuracy['accuracy_score']:.1f}%")
    print(f"    Total URLs: {domain_accuracy['total_urls']}")
    print(f"    Total Factoids: {domain_accuracy['total_factoids']}")
    
    # Find low-truth factoids
    print(f"\n🔍 Step 8: Identifying problematic factoids...")
    low_truth = registry.query_factoids(max_truth_score=0.5)
    print(f"  Found {len(low_truth)} low-truth factoids (score < 0.5)")
    
    if low_truth:
        print(f"\n  ⚠️  Top Issues:")
        for i, factoid in enumerate(low_truth[:3], 1):
            print(f"    {i}. [{factoid.category}] {factoid.claim_text[:60]}...")
            print(f"       Truth Score: {factoid.truth_score:.2f}, Source: {factoid.source_url}")
    
    # Test remediation
    print(f"\n🔧 Step 9: Testing remediation modes...")
    
    if config.openrouter_api_key and low_truth:
        remediation = RemediationOrchestrator(config, llm_client, mode="suggestions")
        truth_map = {f.factoid_id: "See official docs for current information" for f in low_truth}
        
        results = remediation.remediate_factoids(low_truth[:2], truth_map, dry_run=True)
        print(f"  Generated {len(results['updates'])} content update suggestions")
        
        if results['updates']:
            print(f"\n  Example Update:")
            update = results['updates'][0]
            print(f"    URL: {update['url']}")
            print(f"    Current: {update['current_text'][:60]}...")
            print(f"    Suggested: {update['suggested_text'][:60]}...")
            print(f"    Priority: {update['priority']}")
    
    # Summary
    print(f"\n" + "="*80)
    print(f"✅ INTEGRATION TEST COMPLETE")
    print(f"="*80)
    print(f"  Total Factoids Extracted: {len(all_factoids)}")
    print(f"  Unique Factoids: {len(unique_factoids)}")
    print(f"  Blog Accuracy: {blog_accuracy['accuracy_score']:.1f}%")
    print(f"  Low-Truth Factoids: {len(low_truth)}")
    print(f"  Test Result: {'PASS ✅' if len(blog_factoids) > 0 else 'FAIL ❌'}")
    print(f"="*80 + "\n")
    
    return len(blog_factoids) > 0


def test_model_manager():
    """Test model selection and cost estimation"""
    print("\n" + "="*80)
    print("🤖 TEST: Model Manager")
    print("="*80)
    
    config = Config()
    manager = ModelManager(config)
    
    # Test model selection for different tasks
    print("\n📋 Model Selection by Task:")
    tasks = [
        "factoid_extraction",
        "fact_verification",
        "conflict_resolution"
    ]
    
    for task in tasks:
        model = manager.get_model_for_task(task)
        model_config = manager.MODELS.get(model)
        if model_config:
            print(f"  {task} → {model} (tier={model_config.tier.value}, cost=${model_config.cost_per_1k_tokens:.5f}/1k)")
    
    # Test cost estimation
    print(f"\n💰 Cost Estimation:")
    estimates = [
        (10, False, "simple"),
        (50, True, "medium"),
        (100, True, "complex")
    ]
    
    for num_factoids, use_search, complexity in estimates:
        estimate = manager.estimate_cost(num_factoids, use_search, complexity)
        print(f"  {num_factoids} factoids ({complexity}, search={use_search}): ${estimate['total_cost']:.4f}")
        print(f"    Models: {estimate['models_used']}")
    
    print(f"\n✅ Model Manager Test Complete")
    return True


def test_consensus_engine():
    """Test consensus calculation"""
    print("\n" + "="*80)
    print("🎯 TEST: Consensus Engine")
    print("="*80)
    
    from pantsonfire.factoid import Factoid
    from datetime import datetime, timedelta
    
    config = Config()
    engine = ConsensusEngine(config)
    
    # Create test factoids with varying characteristics
    factoids = [
        Factoid(
            factoid_id="f1",
            claim_text="Python 3.9+ required",
            source_url="https://docs.oxen.ai/install",
            domain="docs.oxen.ai",
            content_type="docs",
            truth_score=0.9,
            published_at=datetime.now() - timedelta(days=30)
        ),
        Factoid(
            factoid_id="f2",
            claim_text="Python 3.9+ required",
            source_url="https://oxen.ai/blog/install-guide",
            domain="oxen.ai",
            content_type="blog",
            truth_score=0.85,
            published_at=datetime.now() - timedelta(days=200)
        ),
        Factoid(
            factoid_id="f3",
            claim_text="Python 3.9+ required",
            source_url="https://github.com/Oxen-AI/Oxen",
            domain="github.com",
            content_type="readme",
            truth_score=0.88,
            published_at=datetime.now() - timedelta(days=10)
        )
    ]
    
    # Calculate consensus
    consensus = engine.calculate_consensus_score(factoids)
    
    print(f"\n📊 Consensus Analysis:")
    print(f"  Factoids analyzed: {len(factoids)}")
    print(f"  Consensus score: {consensus:.3f}")
    print(f"  Breakdown:")
    print(f"    - Official docs: {engine._calculate_official_docs_score(factoids):.2f}")
    print(f"    - Web search: {engine._calculate_web_search_score(factoids):.2f}")
    print(f"    - GitHub: {engine._calculate_github_score(factoids):.2f}")
    print(f"    - Recency: {engine._calculate_recency_score(factoids):.2f}")
    
    print(f"\n✅ Consensus Engine Test Complete")
    return True


def run_integration_tests():
    """Run all integration tests"""
    print("\n" + "="*80)
    print("🔬 PANTSONFIRE V2.0 - INTEGRATION TESTS")
    print("="*80)
    
    results = {}
    
    try:
        results["Oxen Blog Analysis"] = test_real_oxen_blog_analysis()
    except Exception as e:
        print(f"\n❌ Oxen Blog Analysis failed: {e}")
        import traceback
        traceback.print_exc()
        results["Oxen Blog Analysis"] = False
    
    try:
        results["Model Manager"] = test_model_manager()
    except Exception as e:
        print(f"\n❌ Model Manager test failed: {e}")
        results["Model Manager"] = False
    
    try:
        results["Consensus Engine"] = test_consensus_engine()
    except Exception as e:
        print(f"\n❌ Consensus Engine test failed: {e}")
        results["Consensus Engine"] = False
    
    # Final summary
    print("\n" + "="*80)
    print("INTEGRATION TEST SUMMARY")
    print("="*80)
    
    passed = sum(1 for v in results.values() if v)
    total = len(results)
    
    for test_name, result in results.items():
        status = "✅ PASS" if result else "❌ FAIL"
        print(f"{status}: {test_name}")
    
    print(f"\nTotal: {passed}/{total} tests passed")
    
    if passed == total:
        print("\n🎉 ALL INTEGRATION TESTS PASSED!")
    else:
        print(f"\n⚠️  {total - passed} test(s) failed")
    
    print("="*80 + "\n")
    
    return passed == total


if __name__ == "__main__":
    success = run_integration_tests()
    sys.exit(0 if success else 1)

