"""Code analyzer service - analyzes infrastructure code and generates analysis documents.

Uses Claude Opus for analysis generation and integrates with compliance and cost tools.
"""

import asyncio
import difflib
import hashlib
import json
import logging
from asyncio import TimeoutError as AsyncTimeoutError
from datetime import datetime
from pathlib import Path
from typing import Any, Optional

try:
    from anthropic import AsyncAnthropic
    ANTHROPIC_AVAILABLE = True
except ImportError:
    ANTHROPIC_AVAILABLE = False
    AsyncAnthropic = None

from api.config import settings
from api.services.compliance_service import ComplianceService
from api.models.v1_requests import ComplianceRequirementsRequest
from wistx_mcp.tools import pricing
from wistx_mcp.tools.lib.retry_utils import with_retry
from wistx_mcp.tools.lib.metrics import track_tool_metrics
from data_pipelines.models.knowledge_article import (
    ContentType,
    Domain,
    KnowledgeArticle,
)

logger = logging.getLogger(__name__)


class CodeAnalyzer:
    """Analyzes infrastructure code and generates analysis documents.
    
    Performs component-level analysis using Claude Opus, integrating with
    compliance and cost tools for accurate analysis.
    """

    def __init__(self):
        """Initialize code analyzer with compliance and cost services."""
        self.compliance_service = ComplianceService()
        self.pricing_tool = pricing
        
        if not ANTHROPIC_AVAILABLE:
            logger.warning(
                "anthropic package not installed. Code analysis will fail. "
                "Install with: pip install anthropic"
            )
            self.llm_client = None
            self.model = None
        else:
            anthropic_api_key = getattr(settings, "anthropic_api_key", None)
            if not anthropic_api_key:
                logger.warning(
                    "ANTHROPIC_API_KEY not set. Code analysis will fail. "
                    "Set ANTHROPIC_API_KEY in .env file."
                )
                self.llm_client = None
                self.model = None
            else:
                self.llm_client = AsyncAnthropic(api_key=anthropic_api_key)
                self.model = "claude-opus-4-1"
        self.temperature = 0.1
        self.max_tokens = 8000
        
        if "opus" in self.model.lower():
            self.llm_timeout_seconds = 120.0
        else:
            self.llm_timeout_seconds = 60.0
        self.max_file_size_mb = 10
        self.max_content_length = 100000
        self.max_component_content_length = 50000
        self.code_similarity_threshold = 0.7

    def _validate_component_info(self, component_info: dict[str, Any]) -> None:
        """Validate component info structure.
        
        Args:
            component_info: Component information dictionary
            
        Raises:
            ValueError: If validation fails
        """
        required_fields = ["name", "type", "start_line", "end_line"]
        for field in required_fields:
            if field not in component_info:
                raise ValueError(f"Missing required field in component_info: {field}")
        
        start_line = component_info["start_line"]
        end_line = component_info["end_line"]
        
        if not isinstance(start_line, int) or start_line < 1:
            raise ValueError(f"start_line must be positive integer, got: {start_line}")
        
        if not isinstance(end_line, int) or end_line < start_line:
            raise ValueError(
                f"end_line ({end_line}) must be >= start_line ({start_line})"
            )
        
        if not isinstance(component_info.get("name"), str) or not component_info["name"]:
            raise ValueError("component name must be non-empty string")
        
        if not isinstance(component_info.get("type"), str) or not component_info["type"]:
            raise ValueError("component type must be non-empty string")

    def _validate_repo_context(self, repo_context: dict[str, Any]) -> None:
        """Validate repository context.
        
        Args:
            repo_context: Repository context dictionary
            
        Raises:
            ValueError: If validation fails
        """
        required_fields = ["repo_url", "branch", "commit_sha", "resource_id", "user_id"]
        for field in required_fields:
            if field not in repo_context:
                raise ValueError(f"Missing required field in repo_context: {field}")
        
        if not repo_context["commit_sha"] or len(repo_context["commit_sha"]) < 7:
            raise ValueError("commit_sha must be valid SHA (at least 7 characters)")
        
        if not repo_context["branch"]:
            raise ValueError("branch must be non-empty string")

    def _detect_code_in_content(
        self,
        analysis_content: str,
        original_code: str,
    ) -> bool:
        """Detect if analysis contains original code.
        
        Args:
            analysis_content: Analysis markdown content
            original_code: Original component code
            
        Returns:
            True if code detected, False otherwise
        """
        if not original_code or len(original_code.strip()) < 20:
            return False
        
        analysis_lines = analysis_content.split("\n")
        original_lines = original_code.split("\n")
        
        code_blocks = []
        in_code_block = False
        current_block = []
        
        for line in analysis_lines:
            if line.strip().startswith("```"):
                if in_code_block:
                    if current_block:
                        code_blocks.append("\n".join(current_block))
                    current_block = []
                    in_code_block = False
                else:
                    in_code_block = True
            elif in_code_block:
                current_block.append(line)
        
        if current_block:
            code_blocks.append("\n".join(current_block))
        
        for code_block in code_blocks:
            if len(code_block.strip()) < 20:
                continue
            
            similarity = difflib.SequenceMatcher(
                None,
                code_block.lower(),
                original_code.lower(),
            ).ratio()
            
            if similarity > self.code_similarity_threshold:
                logger.warning(
                    "Code detected in analysis (similarity: %.2f, threshold: %.2f)",
                    similarity,
                    self.code_similarity_threshold,
                )
                return True
        
        return False

    def _sanitize_analysis_content(
        self,
        content: str,
        original_code: str,
        source_url: str,
    ) -> str:
        """Remove code snippets from analysis content.
        
        Args:
            content: Analysis markdown content
            original_code: Original component code
            source_url: Source URL for reference
            
        Returns:
            Sanitized content without code snippets
        """
        lines = content.split("\n")
        sanitized_lines = []
        in_code_block = False
        code_block_start = -1
        
        for i, line in enumerate(lines):
            if line.strip().startswith("```"):
                if in_code_block:
                    code_block = "\n".join(lines[code_block_start:i+1])
                    
                    similarity = difflib.SequenceMatcher(
                        None,
                        code_block.lower(),
                        original_code.lower(),
                    ).ratio()
                    
                    if similarity > self.code_similarity_threshold:
                        logger.info(
                            "Removing code block from analysis (similarity: %.2f)",
                            similarity,
                        )
                        sanitized_lines.append(
                            f"\n> **Note**: Code removed for security. "
                            f"View source at: {source_url}\n"
                        )
                    else:
                        sanitized_lines.extend(lines[code_block_start:i+1])
                    
                    in_code_block = False
                    code_block_start = -1
                else:
                    in_code_block = True
                    code_block_start = i
            elif not in_code_block:
                sanitized_lines.append(line)
        
        return "\n".join(sanitized_lines)

    def calculate_file_hash(self, file_content: str) -> str:
        """Calculate SHA-256 hash of file content.
        
        Args:
            file_content: File content string
            
        Returns:
            SHA-256 hash as hexadecimal string
        """
        return hashlib.sha256(file_content.encode("utf-8")).hexdigest()

    async def should_reanalyze_file(
        self,
        file_path: Path,
        file_content: str,
        resource_id: str,
    ) -> tuple[bool, str]:
        """Check if file needs re-analysis based on content hash.
        
        Args:
            file_path: File path
            file_content: File content
            resource_id: Resource ID
            
        Returns:
            Tuple of (should_reanalyze, current_hash)
        """
        current_hash = self.calculate_file_hash(file_content)
        
        from api.database.mongodb import mongodb_manager
        
        db = mongodb_manager.get_database()
        existing = db.knowledge_articles.find_one(
            {
                "resource_id": resource_id,
                "source_url": {"$regex": str(file_path.name)},
                "source_type": "repository-analysis",
            },
            {"source_hash": 1}
        )
        
        if not existing:
            return True, current_hash
        
        existing_hash = existing.get("source_hash")
        if existing_hash != current_hash:
            logger.info(
                "File changed: %s (hash: %s -> %s)",
                file_path,
                existing_hash[:8] if existing_hash else "none",
                current_hash[:8],
            )
            return True, current_hash
        
        logger.debug("File unchanged: %s (hash: %s)", file_path, current_hash[:8])
        return False, current_hash

    async def generate_source_urls(
        self,
        repo_url: str,
        branch: str,
        commit_sha: str,
        relative_path: Path,
        start_line: int,
        end_line: int,
    ) -> dict[str, str]:
        """Generate source URLs for component (dual URL strategy).
        
        Args:
            repo_url: Repository URL
            branch: Branch name
            commit_sha: Commit SHA (for snapshot URL)
            relative_path: Relative file path
            start_line: Start line number
            end_line: End line number
            
        Returns:
            Dictionary with snapshot_url, latest_url, and file_url
        """
        repo_url_clean = repo_url.rstrip("/").replace(".git", "")
        relative_path_str = str(relative_path).replace("\\", "/")
        
        snapshot_url = f"{repo_url_clean}/blob/{commit_sha}/{relative_path_str}#L{start_line}-L{end_line}"
        latest_url = f"{repo_url_clean}/blob/{branch}/{relative_path_str}#L{start_line}-L{end_line}"
        file_url = f"{repo_url_clean}/blob/{branch}/{relative_path_str}"
        
        return {
            "snapshot_url": snapshot_url,
            "latest_url": latest_url,
            "file_url": file_url,
        }

    @track_tool_metrics(tool_name="code_analyzer.extract_resources")
    async def extract_resources(
        self,
        file_path: Path,
        file_content: str,
    ) -> dict[str, Any]:
        """Extract resource types and specs from infrastructure code.
        
        Uses LLM to identify resources in various formats:
        - Terraform: aws_instance, aws_rds_instance, etc.
        - Kubernetes: Deployment, Service, ConfigMap, etc.
        - Docker: FROM, RUN commands (infer base images)
        - CloudFormation: AWS::EC2::Instance, etc.
        
        Args:
            file_path: File path
            file_content: File content
            
        Returns:
            Dictionary with resource_types, cloud_providers, services, resources, resource_specs
        """
        if not self.llm_client:
            logger.warning("LLM client not available, using fallback extraction")
            return self._fallback_extract_resources(file_path, file_content)
        
        prompt = f"""Extract infrastructure resources from this {file_path.suffix} file.

File: {file_path}
Content:
{file_content[:30000]}

Extract:
1. Resource types (e.g., "RDS", "EC2", "Kubernetes Deployment", "Docker container")
2. Resource specifications (instance types, sizes, configurations)
3. Cloud providers (AWS, GCP, Azure, etc.)
4. Services (S3, RDS, EKS, GKE, etc.)

Return JSON:
{{
    "resource_types": ["RDS", "EC2", "S3"],
    "cloud_providers": ["aws"],
    "services": ["rds", "ec2", "s3"],
    "resources": [
        {{
            "name": "web_server",
            "type": "EC2",
            "instance_type": "t3.medium",
            "cloud": "aws",
            "service": "ec2",
            "quantity": 2,
            "start_line": 15,
            "end_line": 25
        }}
    ],
    "resource_specs": [
        {{"cloud": "aws", "service": "ec2", "instance_type": "t3.medium", "quantity": 2}},
        {{"cloud": "aws", "service": "rds", "instance_type": "db.t3.medium", "quantity": 1}}
    ]
}}
"""
        
        try:
            response = await self.llm_client.messages.create(
                model=self.model,
                max_tokens=self.max_tokens,
                temperature=self.temperature,
                messages=[{"role": "user", "content": prompt}],
            )
            
            response_text = response.content[0].text if response.content else ""
            
            if not response_text:
                logger.warning("Empty response from LLM for resource extraction")
                return self._fallback_extract_resources(file_path, file_content)
            
            try:
                result = json.loads(response_text)
                return result
            except json.JSONDecodeError as e:
                logger.warning("Failed to parse LLM response as JSON: %s", e)
                return self._fallback_extract_resources(file_path, file_content)
                
        except Exception as e:
            logger.error("Error extracting resources with LLM: %s", e, exc_info=True)
            return self._fallback_extract_resources(file_path, file_content)

    def _fallback_extract_resources(
        self,
        file_path: Path,
        file_content: str,
    ) -> dict[str, Any]:
        """Fallback resource extraction using pattern matching.
        
        Args:
            file_path: File path
            file_content: File content
            
        Returns:
            Dictionary with extracted resources
        """
        resource_types = []
        cloud_providers = []
        services = []
        resources = []
        resource_specs = []
        
        content_lower = file_content.lower()
        
        if ".tf" in file_path.suffix.lower():
            if "aws_" in content_lower:
                cloud_providers.append("aws")
            if "google_" in content_lower or "gcp_" in content_lower:
                cloud_providers.append("gcp")
            if "azurerm_" in content_lower:
                cloud_providers.append("azure")
            
            terraform_resources = [
                ("aws_instance", "EC2", "ec2"),
                ("aws_rds_instance", "RDS", "rds"),
                ("aws_s3_bucket", "S3", "s3"),
                ("aws_eks_cluster", "EKS", "eks"),
                ("google_compute_instance", "GCE", "compute"),
                ("google_sql_database_instance", "Cloud SQL", "sql"),
            ]
            
            for tf_resource, resource_type, service in terraform_resources:
                if tf_resource in content_lower:
                    resource_types.append(resource_type)
                    if service not in services:
                        services.append(service)
        
        elif file_path.suffix.lower() in [".yaml", ".yml"]:
            if "apiVersion:" in content_lower and "kind:" in content_lower:
                resource_types.append("Kubernetes")
                if "kind: Deployment" in content_lower:
                    resource_types.append("Kubernetes Deployment")
                if "kind: Service" in content_lower:
                    resource_types.append("Kubernetes Service")
        
        elif file_path.name.lower() in ["dockerfile", "docker-compose.yml", "docker-compose.yaml"]:
            resource_types.append("Docker Container")
            if "FROM" in content_lower:
                services.append("docker")
        
        return {
            "resource_types": list(set(resource_types)),
            "cloud_providers": list(set(cloud_providers)),
            "services": list(set(services)),
            "resources": resources,
            "resource_specs": resource_specs,
        }

    async def get_compliance_for_resources(
        self,
        resource_types: list[str],
        standards: Optional[list[str]] = None,
    ) -> dict[str, Any]:
        """Get compliance requirements using existing compliance service.
        
        Args:
            resource_types: List of resource types
            standards: Optional list of compliance standards to check
            
        Returns:
            Dictionary with compliance controls and summary
        """
        if not resource_types:
            return {
                "controls": [],
                "summary": {"total": 0, "by_standard": {}, "by_severity": {}},
                "by_standard": {},
                "by_severity": {},
            }
        
        try:
            request = ComplianceRequirementsRequest(
                resource_types=resource_types,
                standards=standards,
                include_remediation=True,
                include_verification=True,
            )
            
            response = await self.compliance_service.get_compliance_requirements(request)
            
            return {
                "controls": [
                    {
                        "control_id": c.control_id,
                        "standard": c.standard,
                        "title": c.title,
                        "description": c.description,
                        "severity": c.severity,
                        "applies_to": c.applies_to or [],
                        "remediation": c.remediation.model_dump() if c.remediation else None,
                    }
                    for c in response.controls
                ],
                "summary": {
                    "total": response.summary.total,
                    "by_standard": response.summary.by_standard,
                    "by_severity": response.summary.by_severity,
                },
                "by_standard": response.summary.by_standard,
                "by_severity": response.summary.by_severity,
            }
        except Exception as e:
            logger.error("Error getting compliance requirements: %s", e, exc_info=True)
            return {
                "controls": [],
                "summary": {"total": 0, "by_standard": {}, "by_severity": {}},
                "by_standard": {},
                "by_severity": {},
                "error": str(e),
            }

    async def calculate_costs_for_resources(
        self,
        resources: list[dict[str, Any]],
        track_missing: bool = True,
    ) -> dict[str, Any]:
        """Calculate costs using existing pricing tool.
        
        Args:
            resources: List of resource specifications
            track_missing: Track resources without pricing data
            
        Returns:
            Dictionary with cost breakdown and optimizations
        """
        if not resources:
            return {
                "total_monthly": 0.0,
                "total_annual": 0.0,
                "breakdown": [],
                "optimizations": [],
                "missing_pricing_count": 0,
                "missing_resources": [],
            }
        
        try:
            result = await self.pricing_tool.calculate_infrastructure_cost(resources)
            
            missing_resources = []
            for resource in resources:
                found = any(
                    b.get("resource", "").startswith(f"{resource.get('cloud')}:{resource.get('service')}")
                    for b in result.get("breakdown", [])
                )
                if not found:
                    missing_resources.append(resource)
            
            if track_missing and missing_resources:
                await self._track_missing_pricing(missing_resources)
            
            return {
                "total_monthly": result.get("total_monthly", 0.0),
                "total_annual": result.get("total_annual", 0.0),
                "breakdown": result.get("breakdown", []),
                "optimizations": result.get("optimizations", []),
                "missing_pricing_count": len(missing_resources),
                "missing_resources": missing_resources,
            }
        except Exception as e:
            logger.error("Error calculating costs: %s", e, exc_info=True)
            return {
                "total_monthly": 0.0,
                "total_annual": 0.0,
                "breakdown": [],
                "optimizations": [],
                "missing_pricing_count": len(resources),
                "missing_resources": resources,
                "error": str(e),
            }

    async def _track_missing_pricing(
        self,
        missing_resources: list[dict[str, Any]],
    ) -> None:
        """Track missing pricing data for continuous improvement.
        
        Args:
            missing_resources: List of resources without pricing data
        """
        try:
            from api.services.pricing_data_tracker import pricing_data_tracker
            
            for resource in missing_resources:
                await pricing_data_tracker.track_missing_pricing(
                    resource_spec=resource,
                    context={
                        "source": "repository_indexing",
                        "timestamp": datetime.utcnow().isoformat(),
                    },
                )
        except ImportError:
            logger.debug("PricingDataTracker not available, skipping missing data tracking")
        except Exception as e:
            logger.warning("Error tracking missing pricing data: %s", e)

    @track_tool_metrics(tool_name="code_analyzer.analyze_component")
    async def analyze_component(
        self,
        file_path: Path,
        file_content: str,
        component_info: dict[str, Any],
        repo_context: dict[str, Any],
    ) -> KnowledgeArticle:
        """Analyze a single infrastructure component and generate analysis document.
        
        Args:
            file_path: File path
            file_content: Full file content
            component_info: Component information (name, type, start_line, end_line)
            repo_context: Repository context (repo_url, branch, commit_sha, resource_id, user_id)
            
        Returns:
            KnowledgeArticle with component analysis
            
        Raises:
            ValueError: If validation fails
        """
        self._validate_component_info(component_info)
        self._validate_repo_context(repo_context)
        
        component_content = self._extract_component_content(
            file_content,
            component_info["start_line"],
            component_info["end_line"],
        )
        
        extracted_resources = await self.extract_resources(
            file_path,
            component_content,
        )
        
        component_resource_types = [component_info.get("type", "Unknown")]
        if extracted_resources.get("resource_types"):
            component_resource_types.extend(extracted_resources["resource_types"])
        
        compliance_data = await self.get_compliance_for_resources(
            resource_types=component_resource_types,
            standards=repo_context.get("compliance_standards"),
        )
        
        cost_data = await self.calculate_costs_for_resources(
            resources=extracted_resources.get("resource_specs", []),
            track_missing=True,
        )

        budget_status = None
        if cost_data.get("total_monthly", 0) > 0:
            try:
                from api.services.budget_service import budget_service
                
                budget_status = await budget_service.check_budgets(
                    user_id=repo_context["user_id"],
                    estimated_cost=cost_data["total_monthly"],
                    scope={
                        "cloud_providers": extracted_resources.get("cloud_providers", []),
                        "project_id": repo_context.get("project_id"),
                    },
                )
            except Exception as e:
                logger.warning("Failed to check budgets: %s", e, exc_info=True)
        
        file_size_mb = len(file_content.encode("utf-8")) / (1024 * 1024)
        if file_size_mb > self.max_file_size_mb:
            raise ValueError(
                f"File too large: {file_size_mb:.2f} MB (max: {self.max_file_size_mb} MB)"
            )
        
        if len(file_content) > self.max_content_length:
            logger.warning(
                "File content truncated: %s (%d chars > %d)",
                file_path,
                len(file_content),
                self.max_content_length,
            )
            file_content = file_content[:self.max_content_length]
        
        if len(component_content) > self.max_component_content_length:
            logger.warning(
                "Component content truncated: %s (%d chars > %d)",
                component_info.get("name"),
                len(component_content),
                self.max_component_content_length,
            )
            component_content = component_content[:self.max_component_content_length]

        analysis = await self._generate_component_analysis(
            file_path=file_path,
            file_content=file_content,
            component_content=component_content,
            component_info=component_info,
            repo_context=repo_context,
            extracted_resources=extracted_resources,
            compliance_data=compliance_data,
            cost_data=cost_data,
            budget_status=budget_status,
        )
        
        urls = await self.generate_source_urls(
            repo_url=repo_context["repo_url"],
            branch=repo_context["branch"],
            commit_sha=repo_context["commit_sha"],
            relative_path=file_path.relative_to(Path(repo_context.get("repo_path", ""))),
            start_line=component_info["start_line"],
            end_line=component_info["end_line"],
        )
        
        file_hash = self.calculate_file_hash(file_content)

        if cost_data.get("total_monthly", 0) > 0:
            try:
                from api.services.budget_service import budget_service
                
                await budget_service.record_spending(
                    user_id=repo_context["user_id"],
                    amount_usd=cost_data["total_monthly"],
                    source_type="repository-analysis",
                    source_id=repo_context["resource_id"],
                    component_id=None,
                    cloud_provider=extracted_resources.get("cloud_providers", [None])[0],
                    project_id=repo_context.get("project_id"),
                    service=extracted_resources.get("services", [None])[0],
                    resource_type=extracted_resources.get("resource_types", [None])[0],
                    resource_spec=extracted_resources.get("resource_specs", [None])[0],
                )
            except Exception as e:
                logger.warning("Failed to record spending: %s", e, exc_info=True)

        analysis_markdown = analysis.get("markdown", "")
        
        if self._detect_code_in_content(analysis_markdown, component_content):
            logger.error(
                "Code detected in analysis for component %s, sanitizing",
                component_info.get("name"),
            )
            analysis_markdown = self._sanitize_analysis_content(
                analysis_markdown,
                component_content,
                urls["snapshot_url"],
            )
            analysis["markdown"] = analysis_markdown
        
        domain = self._detect_domain(file_path, component_content)
        content_type = ContentType.REFERENCE
        
        article_id = f"repo_{repo_context['resource_id']}_{file_path.stem}_{component_info.get('name', 'component')}_{hash(str(file_path) + str(component_info['start_line'])) % 100000}"
        
        article = KnowledgeArticle(
            article_id=article_id,
            domain=domain,
            subdomain="infrastructure-analysis",
            content_type=content_type,
            title=f"{component_info.get('name', 'Component')} - {file_path.name}",
            summary=analysis.get("summary", ""),
            content=analysis_markdown,
            source_url=urls["snapshot_url"],
            source_urls=urls,
            commit_sha=repo_context["commit_sha"],
            branch=repo_context["branch"],
            analyzed_at=datetime.utcnow(),
            source_hash=file_hash,
            user_id=repo_context["user_id"],
            visibility="user",
            source_type="repository-analysis",
            resource_id=repo_context["resource_id"],
            structured_data={
                "component_name": component_info.get("name"),
                "component_type": component_info.get("type"),
                "file_path": str(file_path),
                "start_line": component_info["start_line"],
                "end_line": component_info["end_line"],
                "resources": extracted_resources.get("resources", []),
                "budget_status": budget_status,
            },
            compliance_impact=compliance_data,
            cost_impact=cost_data,
            security_impact=analysis.get("security", {}),
            tags=[file_path.suffix[1:] if file_path.suffix else "code"],
            cloud_providers=extracted_resources.get("cloud_providers", []),
            services=extracted_resources.get("services", []),
        )
        
        try:
            from api.services.context_generator import context_generator
            
            contextual_description = await context_generator.generate_context(
                article=article,
                repo_context=repo_context,
            )
            
            article.contextual_description = contextual_description
            article.context_generated_at = datetime.utcnow()
            article.context_version = "1.0"
        except Exception as e:
            logger.warning(
                "Failed to generate contextual description for article %s: %s",
                article_id,
                e,
                exc_info=True,
            )
        
        return article

    def _extract_component_content(
        self,
        file_content: str,
        start_line: int,
        end_line: int,
    ) -> str:
        """Extract component content from file.
        
        Args:
            file_content: Full file content
            start_line: Start line number (1-indexed)
            end_line: End line number (1-indexed)
            
        Returns:
            Component content string
        """
        lines = file_content.split("\n")
        if start_line < 1:
            start_line = 1
        if end_line > len(lines):
            end_line = len(lines)
        
        return "\n".join(lines[start_line - 1 : end_line])

    async def _generate_component_analysis(
        self,
        file_path: Path,
        file_content: str,
        component_content: str,
        component_info: dict[str, Any],
        repo_context: dict[str, Any],
        extracted_resources: dict[str, Any],
        compliance_data: dict[str, Any],
        cost_data: dict[str, Any],
        budget_status: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
        """Generate component analysis using Claude Opus.
        
        Args:
            file_path: File path
            file_content: Full file content
            component_content: Component-specific content
            component_info: Component information
            repo_context: Repository context
            extracted_resources: Extracted resources
            compliance_data: Compliance data from tool
            cost_data: Cost data from tool
            budget_status: Budget status (optional)
            
        Returns:
            Dictionary with analysis (summary, markdown, security)
        """
        if not self.llm_client:
            logger.warning("LLM client not available, using fallback analysis")
            return self._fallback_analysis(component_info, compliance_data, cost_data)
        
        file_type = self._detect_file_type(file_path, component_content)
        
        prompt = f"""You are an expert infrastructure analyst. Analyze this infrastructure component and generate a comprehensive analysis document.

File: {file_path}
Component: {component_info.get('name', 'Unknown')}
Type: {component_info.get('type', 'Unknown')}
File Type: {file_type}
Lines: {component_info['start_line']}-{component_info['end_line']}

Component Content:
{component_content[:20000]}

Extracted Resources:
{json.dumps(extracted_resources, indent=2)}

Compliance Requirements (from WISTX compliance database):
{json.dumps(compliance_data, indent=2)}

Cost Analysis (from WISTX pricing database):
{json.dumps(cost_data, indent=2)}

CRITICAL SECURITY REQUIREMENTS:
- DO NOT include any code snippets from the component
- DO NOT copy code blocks from the component
- DO NOT include code examples that match the component code
- Only provide analysis, recommendations, and references
- Use source URLs (provided) instead of showing code
- If you need to reference code, use line numbers and descriptions only
- Violation of these requirements will result in content rejection

Generate a detailed analysis document covering:

1. **Component Overview**: Brief description of what this component does
2. **Configuration Analysis**: Key configuration details (describe, don't show code)
3. **Compliance Mapping**: 
   - Use compliance_data provided above (from WISTX compliance database)
   - Map compliance controls to this component
   - Compliance status (✅ compliant, ⚠️ partial, ❌ non-compliant)
4. **Cost Analysis**: 
   - Use cost_data provided above (from WISTX pricing database)
   - Monthly cost (from cost breakdown)
   - Cost optimization opportunities (from cost_data.optimizations)
   - Budget status: {json.dumps(budget_status, indent=2) if budget_status else "No budgets configured"}
5. **Security Posture**: Security strengths, concerns, recommendations
6. **Best Practices**: Practices followed, improvements needed

IMPORTANT: 
- Use the compliance_data provided (from WISTX compliance tool) - do not generate generic compliance info
- Use the cost_data provided (from WISTX pricing tool) - do not estimate costs manually
- Focus on analyzing the component structure, patterns, and relationships
- Map compliance controls to specific component configuration
- Reference cost breakdown from cost_data
- NEVER include code snippets - only analysis and descriptions

Return JSON:
{{
    "summary": "Brief 2-3 sentence summary",
    "markdown": "Full markdown analysis document (NO CODE SNIPPETS)",
    "security": {{
        "strengths": [],
        "concerns": [],
        "recommendations": []
    }}
}}
"""
        
        try:
            async def call_llm():
                async with asyncio.timeout(self.llm_timeout_seconds):
                    return await self.llm_client.messages.create(
                        model=self.model,
                        max_tokens=self.max_tokens,
                        temperature=self.temperature,
                        messages=[{"role": "user", "content": prompt}],
                    )
            
            response = await with_retry(
                call_llm,
                max_attempts=3,
                initial_delay=1.0,
                max_delay=10.0,
                backoff_multiplier=2.0,
                retryable_exceptions=(TimeoutError, ConnectionError, RuntimeError),
            )
            
            response_text = response.content[0].text if response.content else ""
            
            if not response_text:
                logger.warning("Empty response from LLM for component analysis")
                return self._fallback_analysis(component_info, compliance_data, cost_data)
            
            try:
                result = json.loads(response_text)
                return result
            except json.JSONDecodeError as e:
                logger.warning("Failed to parse LLM response as JSON: %s", e)
                return self._fallback_analysis(component_info, compliance_data, cost_data)
                
        except AsyncTimeoutError:
            logger.error("LLM call timed out after %.1f seconds", self.llm_timeout_seconds)
            return self._fallback_analysis(component_info, compliance_data, cost_data)
        except Exception as e:
            logger.error("Error generating component analysis: %s", e, exc_info=True)
            return self._fallback_analysis(component_info, compliance_data, cost_data)

    def _fallback_analysis(
        self,
        component_info: dict[str, Any],
        compliance_data: dict[str, Any],
        cost_data: dict[str, Any],
    ) -> dict[str, Any]:
        """Fallback analysis when LLM is unavailable.
        
        Args:
            component_info: Component information
            compliance_data: Compliance data
            cost_data: Cost data
            
        Returns:
            Basic analysis dictionary
        """
        summary = f"Infrastructure component: {component_info.get('name', 'Unknown')} ({component_info.get('type', 'Unknown')})"
        
        markdown = f"""# {component_info.get('name', 'Component')}

## Overview
{summary}

## Compliance
"""
        if compliance_data.get("controls"):
            markdown += f"- Found {len(compliance_data['controls'])} compliance controls\n"
        else:
            markdown += "- No compliance data available\n"
        
        markdown += "\n## Cost\n"
        if cost_data.get("total_monthly"):
            markdown += f"- Monthly Cost: ${cost_data['total_monthly']:.2f}\n"
        else:
            markdown += "- Cost data not available\n"
        
        return {
            "summary": summary,
            "markdown": markdown,
            "security": {
                "strengths": [],
                "concerns": [],
                "recommendations": [],
            },
        }

    def _detect_file_type(
        self,
        file_path: Path,
        content: str,
    ) -> str:
        """Detect file type from path and content.
        
        Args:
            file_path: File path
            content: File content
            
        Returns:
            File type string
        """
        suffix = file_path.suffix.lower()
        content_lower = content.lower()
        
        if suffix == ".tf" or suffix == ".tfvars" or suffix == ".hcl":
            return "Terraform"
        elif suffix in [".yaml", ".yml"]:
            if "apiVersion:" in content_lower and "kind:" in content_lower:
                return "Kubernetes"
            elif "AWSTemplateFormatVersion" in content_lower:
                return "CloudFormation"
            elif "serverless:" in content_lower:
                return "Serverless Framework"
            else:
                return "YAML Configuration"
        elif file_path.name.lower() == "dockerfile":
            return "Docker"
        elif file_path.name.lower() in ["docker-compose.yml", "docker-compose.yaml"]:
            return "Docker Compose"
        elif ".github/workflows" in str(file_path):
            return "GitHub Actions"
        elif file_path.name.lower() == ".gitlab-ci.yml":
            return "GitLab CI"
        elif file_path.name.lower() == "jenkinsfile":
            return "Jenkins"
        else:
            return "Infrastructure Configuration"

    def _detect_domain(
        self,
        file_path: Path,
        content: str,
    ) -> Domain:
        """Detect domain from file path and content.
        
        Args:
            file_path: File path
            content: File content
            
        Returns:
            Domain enum
        """
        path_str = str(file_path).lower()
        content_lower = content.lower()
        
        if "terraform" in path_str or file_path.suffix == ".tf":
            return Domain.INFRASTRUCTURE
        if "kubernetes" in path_str or "k8s" in path_str or ("apiVersion:" in content_lower and "kind:" in content_lower):
            return Domain.DEVOPS
        if "security" in path_str or "auth" in path_str:
            return Domain.SECURITY
        if "cost" in path_str or "billing" in path_str:
            return Domain.FINOPS
        if "compliance" in content_lower or "pci" in content_lower or "hipaa" in content_lower:
            return Domain.COMPLIANCE
        
        return Domain.DEVOPS

    @track_tool_metrics(tool_name="code_analyzer.extract_components")
    async def extract_components_from_file(
        self,
        file_path: Path,
        file_content: str,
    ) -> list[dict[str, Any]]:
        """Extract components from infrastructure file.
        
        Uses LLM to identify individual components/resources within a file.
        
        Args:
            file_path: File path
            file_content: File content
            
        Returns:
            List of component dictionaries with name, type, start_line, end_line
        """
        if not self.llm_client:
            logger.warning("LLM client not available, using fallback component extraction")
            return self._fallback_extract_components(file_path, file_content)
        
        prompt = f"""Extract individual infrastructure components/resources from this {file_path.suffix} file.

File: {file_path}
Content:
{file_content[:50000]}

For each component/resource, identify:
1. Component name/identifier
2. Component type (e.g., "aws_instance", "Kubernetes Deployment", "Docker container")
3. Start line number
4. End line number

Return JSON:
{{
    "components": [
        {{
            "name": "web_server",
            "type": "aws_instance",
            "start_line": 15,
            "end_line": 25
        }},
        {{
            "name": "database",
            "type": "aws_rds_instance",
            "start_line": 30,
            "end_line": 45
        }}
    ]
}}
"""
        
        try:
            response = await self.llm_client.messages.create(
                model=self.model,
                max_tokens=self.max_tokens,
                temperature=self.temperature,
                messages=[{"role": "user", "content": prompt}],
            )
            
            response_text = response.content[0].text if response.content else ""
            
            if not response_text:
                logger.warning("Empty response from LLM for component extraction")
                return self._fallback_extract_components(file_path, file_content)
            
            try:
                result = json.loads(response_text)
                return result.get("components", [])
            except json.JSONDecodeError as e:
                logger.warning("Failed to parse LLM response as JSON: %s", e)
                return self._fallback_extract_components(file_path, file_content)
                
        except Exception as e:
            logger.error("Error extracting components with LLM: %s", e, exc_info=True)
            return self._fallback_extract_components(file_path, file_content)

    def _fallback_extract_components(
        self,
        file_path: Path,
        file_content: str,
    ) -> list[dict[str, Any]]:
        """Fallback component extraction using pattern matching.
        
        Args:
            file_path: File path
            file_content: File content
            
        Returns:
            List of component dictionaries
        """
        components = []
        lines = file_content.split("\n")
        
        if file_path.suffix.lower() == ".tf":
            current_component = None
            brace_count = 0
            
            for i, line in enumerate(lines, 1):
                if "resource" in line.lower() and '"' in line:
                    if current_component:
                        components.append({
                            "name": current_component.get("name", "resource"),
                            "type": current_component.get("type", "resource"),
                            "start_line": current_component["start_line"],
                            "end_line": i - 1,
                        })
                    
                    parts = line.split('"')
                    if len(parts) >= 3:
                        current_component = {
                            "name": parts[3] if len(parts) > 3 else "resource",
                            "type": parts[1] if len(parts) > 1 else "resource",
                            "start_line": i,
                        }
                        brace_count = line.count("{") - line.count("}")
                
                if current_component:
                    brace_count += line.count("{") - line.count("}")
                    if brace_count == 0 and "}" in line:
                        components.append({
                            "name": current_component["name"],
                            "type": current_component["type"],
                            "start_line": current_component["start_line"],
                            "end_line": i,
                        })
                        current_component = None
        
        elif file_path.suffix.lower() in [".yaml", ".yml"]:
            if "kind:" in file_content.lower():
                current_kind = None
                current_name = None
                start_line = None
                
                for i, line in enumerate(lines, 1):
                    if line.strip().startswith("kind:"):
                        if current_kind and current_name:
                            components.append({
                                "name": current_name,
                                "type": current_kind,
                                "start_line": start_line or i,
                                "end_line": i - 1,
                            })
                        current_kind = line.split(":")[-1].strip()
                        start_line = i
                    elif line.strip().startswith("name:") and current_kind:
                        current_name = line.split(":")[-1].strip()
                
                if current_kind and current_name:
                    components.append({
                        "name": current_name,
                        "type": current_kind,
                        "start_line": start_line or len(lines),
                        "end_line": len(lines),
                    })
        
        if not components:
            components.append({
                "name": file_path.stem,
                "type": "file",
                "start_line": 1,
                "end_line": len(lines),
            })
        
        return components


code_analyzer = CodeAnalyzer()

