"""
CVE Data Sources
Module for fetching and managing CVE data from various sources
"""
import requests
import json
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
import time
from pathlib import Path

class CVEDataSource:
    """Base class for CVE data sources"""
    
    def __init__(self, cache_dir: Optional[str] = None):
        self.cache_dir = Path(cache_dir) if cache_dir else Path.home() / ".pysploit" / "cache"
        self.cache_dir.mkdir(parents=True, exist_ok=True)
    
    def fetch_cves(self, **kwargs) -> List[Dict]:
        """Fetch CVEs from data source"""
        raise NotImplementedError
    
    def get_cached_data(self, cache_key: str, max_age_hours: int = 24) -> Optional[Dict]:
        """Get cached data if available and not expired"""
        cache_file = self.cache_dir / f"{cache_key}.json"
        if not cache_file.exists():
            return None
            
        try:
            with open(cache_file, 'r') as f:
                data = json.load(f)
            
            cached_time = datetime.fromisoformat(data.get('timestamp', ''))
            if datetime.now() - cached_time > timedelta(hours=max_age_hours):
                return None
                
            return data.get('data')
        except Exception:
            return None
    
    def cache_data(self, cache_key: str, data: Dict):
        """Cache data with timestamp"""
        cache_file = self.cache_dir / f"{cache_key}.json"
        cache_data = {
            'timestamp': datetime.now().isoformat(),
            'data': data
        }
        
        try:
            with open(cache_file, 'w') as f:
                json.dump(cache_data, f, indent=2)
        except Exception as e:
            print(f"Warning: Could not cache data: {e}")

class NVDDataSource(CVEDataSource):
    """NVD (National Vulnerability Database) data source"""
    
    BASE_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
    
    def __init__(self, api_key: Optional[str] = None, cache_dir: Optional[str] = None):
        super().__init__(cache_dir)
        self.api_key = api_key
        self.session = requests.Session()
        if api_key:
            self.session.headers.update({'apiKey': api_key})
    
    def fetch_cves(self, 
                   cve_id: Optional[str] = None,
                   keyword: Optional[str] = None,
                   published_start: Optional[datetime] = None,
                   published_end: Optional[datetime] = None,
                   modified_start: Optional[datetime] = None,
                   modified_end: Optional[datetime] = None,
                   cvss_v3_severity: Optional[str] = None,
                   results_per_page: int = 100) -> List[Dict]:
        """
        Fetch CVEs from NVD API
        
        Args:
            cve_id: Specific CVE ID to fetch
            keyword: Keyword to search for
            published_start: Start date for published CVEs
            published_end: End date for published CVEs
            modified_start: Start date for modified CVEs  
            modified_end: End date for modified CVEs
            cvss_v3_severity: CVSS v3 severity (LOW, MEDIUM, HIGH, CRITICAL)
            results_per_page: Number of results per page (max 2000)
        """
        
        # Create cache key based on parameters
        cache_params = {
            'cve_id': cve_id,
            'keyword': keyword,
            'published_start': published_start.isoformat() if published_start else None,
            'published_end': published_end.isoformat() if published_end else None,
            'cvss_v3_severity': cvss_v3_severity,
            'results_per_page': results_per_page
        }
        cache_key = f"nvd_{hash(str(sorted(cache_params.items())))}"
        
        # Check cache first
        cached_data = self.get_cached_data(cache_key)
        if cached_data:
            return cached_data
        
        params = {
            'resultsPerPage': min(results_per_page, 2000)
        }
        
        if cve_id:
            params['cveId'] = cve_id
        if keyword:
            params['keywordSearch'] = keyword
        if published_start:
            params['pubStartDate'] = published_start.strftime('%Y-%m-%dT%H:%M:%S.000')
        if published_end:
            params['pubEndDate'] = published_end.strftime('%Y-%m-%dT%H:%M:%S.000')
        if modified_start:
            params['modStartDate'] = modified_start.strftime('%Y-%m-%dT%H:%M:%S.000')
        if modified_end:
            params['modEndDate'] = modified_end.strftime('%Y-%m-%dT%H:%M:%S.000')
        if cvss_v3_severity:
            params['cvssV3Severity'] = cvss_v3_severity.upper()
        
        all_cves = []
        start_index = 0
        
        while True:
            params['startIndex'] = start_index
            
            try:
                response = self.session.get(self.BASE_URL, params=params)
                response.raise_for_status()
                
                data = response.json()
                vulnerabilities = data.get('vulnerabilities', [])
                
                if not vulnerabilities:
                    break
                
                # Process CVEs
                for vuln in vulnerabilities:
                    cve_data = vuln.get('cve', {})
                    processed_cve = self._process_cve_data(cve_data)
                    all_cves.append(processed_cve)
                
                # Check if we have more results
                total_results = data.get('totalResults', 0)
                if start_index + len(vulnerabilities) >= total_results:
                    break
                
                start_index += len(vulnerabilities)
                
                # Rate limiting - NVD allows 5 requests per 30 seconds without API key
                if not self.api_key:
                    time.sleep(6)  # Be conservative
                    
            except requests.exceptions.RequestException as e:
                print(f"Error fetching CVE data: {e}")
                break
        
        # Cache results
        self.cache_data(cache_key, all_cves)
        return all_cves
    
    def _process_cve_data(self, cve_data: Dict) -> Dict:
        """Process raw CVE data into standardized format"""
        cve_id = cve_data.get('id', '')
        
        # Extract descriptions
        descriptions = cve_data.get('descriptions', [])
        description = next((d['value'] for d in descriptions if d.get('lang') == 'en'), '')
        
        # Extract CVSS scores
        metrics = cve_data.get('metrics', {})
        cvss_v3 = metrics.get('cvssMetricV31', [])
        cvss_v2 = metrics.get('cvssMetricV2', [])
        
        base_score = None
        severity = None
        
        if cvss_v3:
            cvss_data = cvss_v3[0].get('cvssData', {})
            base_score = cvss_data.get('baseScore')
            severity = cvss_data.get('baseSeverity')
        elif cvss_v2:
            cvss_data = cvss_v2[0].get('cvssData', {})
            base_score = cvss_data.get('baseScore')
        
        # Extract references
        references = []
        for ref in cve_data.get('references', []):
            references.append({
                'url': ref.get('url', ''),
                'source': ref.get('source', ''),
                'tags': ref.get('tags', [])
            })
        
        # Extract CPE configurations
        configurations = []
        for config in cve_data.get('configurations', []):
            for node in config.get('nodes', []):
                for cpe_match in node.get('cpeMatch', []):
                    if cpe_match.get('vulnerable', False):
                        configurations.append({
                            'cpe23Uri': cpe_match.get('criteria', ''),
                            'versionStartIncluding': cpe_match.get('versionStartIncluding'),
                            'versionEndExcluding': cpe_match.get('versionEndExcluding')
                        })
        
        return {
            'id': cve_id,
            'description': description,
            'published': cve_data.get('published', ''),
            'modified': cve_data.get('lastModified', ''),
            'base_score': base_score,
            'severity': severity,
            'references': references,
            'configurations': configurations,
            'source': 'NVD'
        }

class ExploitDBDataSource(CVEDataSource):
    """Exploit Database data source"""
    
    BASE_URL = "https://www.exploit-db.com"
    
    def __init__(self, cache_dir: Optional[str] = None):
        super().__init__(cache_dir)
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'PySploit/1.0 (Security Research Tool)'
        })
    
    def fetch_exploits(self, 
                      cve_id: Optional[str] = None,
                      platform: Optional[str] = None,
                      type_filter: Optional[str] = None) -> List[Dict]:
        """
        Fetch exploit data from Exploit-DB
        
        Args:
            cve_id: CVE ID to search for
            platform: Platform filter (windows, linux, etc.)
            type_filter: Exploit type filter (remote, local, etc.)
        """
        
        cache_key = f"edb_{hash(str(sorted({'cve': cve_id, 'platform': platform, 'type': type_filter}.items())))}"
        
        # Check cache first
        cached_data = self.get_cached_data(cache_key, max_age_hours=168)  # Cache for a week
        if cached_data:
            return cached_data
        
        # Note: This is a simplified implementation
        # In practice, you might need to scrape the website or use their CSV data
        # This would require more sophisticated parsing
        
        exploits = []
        
        # Placeholder implementation - would need actual scraping logic
        if cve_id:
            # Search for specific CVE
            search_url = f"{self.BASE_URL}/search?cve={cve_id}"
            # Implementation would parse search results
            pass
        
        # Cache results
        self.cache_data(cache_key, exploits)
        return exploits

class CVEDataManager:
    """Manager for multiple CVE data sources"""
    
    def __init__(self, nvd_api_key: Optional[str] = None, cache_dir: Optional[str] = None):
        self.nvd_source = NVDDataSource(api_key=nvd_api_key, cache_dir=cache_dir)
        self.edb_source = ExploitDBDataSource(cache_dir=cache_dir)
    
    def search_cves(self, 
                   query: str,
                   include_exploits: bool = False,
                   severity_filter: Optional[str] = None,
                   date_range: Optional[Tuple[datetime, datetime]] = None) -> List[Dict]:
        """
        Search CVEs across multiple sources
        
        Args:
            query: Search query (CVE ID, keyword, etc.)
            include_exploits: Whether to include exploit data
            severity_filter: CVSS severity filter
            date_range: Date range tuple (start, end)
        """
        
        # Determine if query is a CVE ID
        if query.upper().startswith('CVE-'):
            cve_id = query.upper()
            keyword = None
        else:
            cve_id = None
            keyword = query
        
        # Prepare date parameters
        published_start = date_range[0] if date_range else None
        published_end = date_range[1] if date_range else None
        
        # Fetch from NVD
        cves = self.nvd_source.fetch_cves(
            cve_id=cve_id,
            keyword=keyword,
            published_start=published_start,
            published_end=published_end,
            cvss_v3_severity=severity_filter
        )
        
        # Add exploit information if requested
        if include_exploits:
            for cve in cves:
                exploits = self.edb_source.fetch_exploits(cve_id=cve['id'])
                cve['exploits'] = exploits
        
        return cves
    
    def get_recent_cves(self, days: int = 7, severity_filter: Optional[str] = None) -> List[Dict]:
        """Get CVEs published in the last N days"""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days)
        
        return self.nvd_source.fetch_cves(
            published_start=start_date,
            published_end=end_date,
            cvss_v3_severity=severity_filter
        )
    
    def get_high_severity_cves(self, days: int = 30) -> List[Dict]:
        """Get high and critical severity CVEs from recent period"""
        cves = []
        
        # Get HIGH severity CVEs
        high_cves = self.get_recent_cves(days=days, severity_filter='HIGH')
        cves.extend(high_cves)
        
        # Get CRITICAL severity CVEs
        critical_cves = self.get_recent_cves(days=days, severity_filter='CRITICAL')
        cves.extend(critical_cves)
        
        # Sort by base score (highest first)
        cves.sort(key=lambda x: x.get('base_score', 0), reverse=True)
        
        return cves