"""Tests for factoid extraction and truth registry"""

import sys
from pathlib import Path

# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from pantsonfire.factoid import FactoidExtractor, TimestampDetector, ContentTypeClassifier
from pantsonfire.truth_registry import TruthRegistry, ConsensusEngine
from pantsonfire.config import Config
from pantsonfire.llm import LLMClient


def test_content_type_classifier():
    """Test content type classification"""
    print("\n🧪 Testing ContentTypeClassifier...")
    
    classifier = ContentTypeClassifier()
    
    test_cases = [
        ("https://oxen.ai/blog/fine-tuning-fridays", "Blog content here", "blog"),
        ("https://docs.oxen.ai/api/endpoints", "GET /api/v1/data", "api"),
        ("https://github.com/Oxen-AI/Oxen/blob/main/README.md", "# Oxen", "readme"),
    ]
    
    passed = 0
    for url, content, expected in test_cases:
        result = classifier.classify(url, content)
        status = "✅" if result == expected else "❌"
        print(f"  {status} {url} → {result} (expected: {expected})")
        if result == expected:
            passed += 1
    
    print(f"  Passed: {passed}/{len(test_cases)}")
    return passed == len(test_cases)


def test_timestamp_detector():
    """Test timestamp detection"""
    print("\n🧪 Testing TimestampDetector...")
    
    detector = TimestampDetector()
    
    test_cases = [
        ("Published: 2024-10-15", "has date"),
        ("Updated Q3 2024", "has date"),
        ("Some random content", "no date"),
    ]
    
    passed = 0
    for content, expected in test_cases:
        result = detector.detect_timestamp(content)
        has_date = result is not None
        status = "✅" if (has_date and "has" in expected) or (not has_date and "no" in expected) else "❌"
        print(f"  {status} '{content[:50]}...' → {result}")
        if (has_date and "has" in expected) or (not has_date and "no" in expected):
            passed += 1
    
    print(f"  Passed: {passed}/{len(test_cases)}")
    return passed == len(test_cases)


def test_factoid_extraction_pattern():
    """Test pattern-based factoid extraction (fallback)"""
    print("\n🧪 Testing Pattern-Based Factoid Extraction...")
    
    # Create minimal config
    config = Config()
    llm = LLMClient(config)
    extractor = FactoidExtractor(llm, config)
    
    test_content = """
    Oxen.ai requires Python 3.9+ for installation.
    The API endpoint is /api/v1/data.
    This feature is currently in early access.
    """
    
    factoids = extractor._pattern_based_extraction(
        test_content,
        "https://test.com/blog",
        "blog"
    )
    
    print(f"  Extracted {len(factoids)} factoids:")
    for f in factoids:
        print(f"    - {f.claim_text} ({f.category})")
    
    # Check for expected patterns
    has_version = any('3.9' in f.claim_text for f in factoids)
    has_early_access = any('early access' in f.claim_text.lower() for f in factoids)
    
    status = "✅" if (has_version or has_early_access) else "❌"
    print(f"  {status} Found expected patterns: version={has_version}, early_access={has_early_access}")
    
    return has_version or has_early_access


def test_truth_registry():
    """Test truth registry functionality"""
    print("\n🧪 Testing TruthRegistry...")
    
    config = Config()
    registry = TruthRegistry(config, "test-registry")
    
    from pantsonfire.factoid import Factoid
    from datetime import datetime
    
    # Create test factoids
    factoid1 = Factoid(
        factoid_id="test1",
        claim_text="Python 3.9+ required",
        source_url="https://test.com/blog",
        domain="test.com",
        content_type="blog",
        truth_score=0.9,
        confidence=0.8
    )
    
    factoid2 = Factoid(
        factoid_id="test2",
        claim_text="Early access available",
        source_url="https://test.com/blog2",
        domain="test.com",
        content_type="blog",
        truth_score=0.2,
        confidence=0.7
    )
    
    registry.add_factoid(factoid1)
    registry.add_factoid(factoid2)
    
    # Test accuracy calculation
    accuracy = registry.get_accuracy_by_domain("test.com")
    
    print(f"  Added {len(registry.factoids)} factoids")
    print(f"  Domain accuracy: {accuracy['accuracy_score']:.1f}%")
    print(f"  ✅ Truth registry functional")
    
    return True


def run_all_tests():
    """Run all unit tests"""
    print("=" * 80)
    print("Pantsonfire v2.0 - Unit Tests")
    print("=" * 80)
    
    results = {
        "Content Type Classifier": test_content_type_classifier(),
        "Timestamp Detector": test_timestamp_detector(),
        "Pattern-Based Extraction": test_factoid_extraction_pattern(),
        "Truth Registry": test_truth_registry(),
    }
    
    print("\n" + "=" * 80)
    print("Test Summary")
    print("=" * 80)
    
    passed = sum(1 for v in results.values() if v)
    total = len(results)
    
    for test_name, result in results.items():
        status = "✅ PASS" if result else "❌ FAIL"
        print(f"{status}: {test_name}")
    
    print(f"\nTotal: {passed}/{total} tests passed")
    print("=" * 80)
    
    return passed == total


if __name__ == "__main__":
    success = run_all_tests()
    sys.exit(0 if success else 1)

