"""
ChalkML Git Engine - Version Control for Data
==============================================

Git-like version control system for data transformations.

Commands:
    chalkml init                    - Initialize data repository
    chalkml commit -m "message"     - Commit current state
    chalkml log                     - View commit history
    chalkml checkout <sha>          - Time travel to commit
    chalkml diff <sha1> <sha2>      - Compare two states
    chalkml branch <name>           - Create/switch branch
    chalkml stash <name>            - Stash current state
    chalkml stash pop <name>        - Restore stashed state
"""

import json
import hashlib
import shutil
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, List, Tuple, Any
import pandas as pd


class ChalkMLGit:
    """Git-like version control for data"""
    
    def __init__(self, workspace_path: Optional[str] = None):
        if workspace_path is None:
            workspace_path = Path.cwd()
        self.workspace_path = Path(workspace_path)
        self.chalk_dir = self.workspace_path / ".chalkml"
        self.objects_dir = self.chalk_dir / "objects"
        self.refs_dir = self.chalk_dir / "refs" / "heads"
        self.stash_dir = self.chalk_dir / "stash"
        self.head_file = self.chalk_dir / "HEAD"
        self.config_file = self.chalk_dir / "config.json"
        
    def init(self) -> Tuple[bool, str]:
        """Initialize ChalkML repository (like git init)"""
        try:
            if self.chalk_dir.exists():
                return False, "❌ ChalkML repository already initialized"
            
            # Create directory structure
            self.chalk_dir.mkdir(parents=True, exist_ok=True)
            self.objects_dir.mkdir(exist_ok=True)
            self.refs_dir.mkdir(parents=True, exist_ok=True)
            self.stash_dir.mkdir(exist_ok=True)
            
            # Initialize HEAD (points to main branch)
            self.head_file.write_text("ref: refs/heads/main\n")
            
            # Initialize config
            config = {
                "version": "1.0.0",
                "created": datetime.now().isoformat(),
                "user": {
                    "name": "ChalkML User",
                    "email": "user@chalkml.dev"
                }
            }
            self.config_file.write_text(json.dumps(config, indent=2))
            
            # Create main branch (empty initially)
            main_ref = self.refs_dir / "main"
            main_ref.write_text("")
            
            return True, f"""✅ Initialized ChalkML repository in {self.chalk_dir}

Git-for-Data commands available:
  chalkml commit -m "message"  - Save current state
  chalkml log                  - View history
  chalkml checkout <sha>       - Time travel
  chalkml stash <name>         - Stash changes
  chalkml branch <name>        - Create branch

Your data is now version controlled! 🚀
"""
        except Exception as e:
            return False, f"Error initializing repository: {str(e)}"
    
    def commit(self, message: str, file_path: str) -> Tuple[bool, str]:
        """Commit current data state (like git commit)"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository. Run 'chalkml init' first"
            
            file_path = Path(file_path)
            if not file_path.exists():
                return False, f"❌ File not found: {file_path}"
            
            # Read current branch
            current_branch = self._get_current_branch()
            
            # Get parent commit (if exists)
            parent_sha = self._get_branch_head(current_branch)
            
            # Create commit object
            timestamp = datetime.now().isoformat()
            
            # Compute data hash
            data_hash = self._compute_file_hash(file_path)
            
            # Read data metadata
            df = pd.read_csv(file_path)
            metadata = {
                "rows": len(df),
                "columns": len(df.columns),
                "column_names": list(df.columns),
                "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
                "memory_kb": df.memory_usage(deep=True).sum() / 1024
            }
            
            commit_obj = {
                "message": message,
                "timestamp": timestamp,
                "parent": parent_sha,
                "branch": current_branch,
                "file": str(file_path.name),
                "data_hash": data_hash,
                "metadata": metadata
            }
            
            # Generate commit SHA
            commit_json = json.dumps(commit_obj, sort_keys=True)
            commit_sha = hashlib.sha256(commit_json.encode()).hexdigest()[:12]
            
            # Save commit object
            commit_dir = self.objects_dir / commit_sha[:2]
            commit_dir.mkdir(exist_ok=True)
            commit_file = commit_dir / commit_sha[2:]
            commit_file.write_text(json.dumps(commit_obj, indent=2))
            
            # Save data snapshot (compressed)
            data_snapshot = self.objects_dir / f"{commit_sha}.csv"
            shutil.copy2(file_path, data_snapshot)
            
            # Update branch HEAD
            branch_file = self.refs_dir / current_branch
            branch_file.write_text(commit_sha)
            
            return True, f"""✅ Committed: {message}
   SHA: {commit_sha}
   Branch: {current_branch}
   File: {file_path.name}
   Rows: {metadata['rows']}, Cols: {metadata['columns']}
   Time: {timestamp}
"""
        except Exception as e:
            return False, f"Error committing: {str(e)}"
    
    def log(self, limit: int = 10) -> Tuple[bool, str]:
        """Show commit history (like git log)"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            current_branch = self._get_current_branch()
            current_sha = self._get_branch_head(current_branch)
            
            if not current_sha:
                return True, "No commits yet. Use 'chalkml commit -m \"message\" file.csv'"
            
            log_output = f"📋 Commit History ({current_branch} branch)\n"
            log_output += "=" * 60 + "\n\n"
            
            # Walk commit history
            commits_shown = 0
            sha = current_sha
            
            while sha and commits_shown < limit:
                commit_obj = self._load_commit(sha)
                if not commit_obj:
                    break
                
                log_output += f"commit {sha}\n"
                log_output += f"Date:   {commit_obj['timestamp']}\n"
                log_output += f"Branch: {commit_obj['branch']}\n"
                log_output += f"\n    {commit_obj['message']}\n"
                log_output += f"\n    File: {commit_obj['file']}\n"
                log_output += f"    Rows: {commit_obj['metadata']['rows']}, "
                log_output += f"Cols: {commit_obj['metadata']['columns']}\n"
                log_output += "\n" + "-" * 60 + "\n\n"
                
                sha = commit_obj.get('parent')
                commits_shown += 1
            
            if commits_shown == limit and sha:
                log_output += f"... (use 'chalkml log --limit {limit*2}' for more)\n"
            
            return True, log_output
        except Exception as e:
            return False, f"Error reading log: {str(e)}"
    
    def checkout(self, sha: str, output_path: str) -> Tuple[bool, str]:
        """Checkout specific commit (time travel)"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            commit_obj = self._load_commit(sha)
            if not commit_obj:
                return False, f"❌ Commit not found: {sha}"
            
            # Restore data snapshot
            data_snapshot = self.objects_dir / f"{sha}.csv"
            if not data_snapshot.exists():
                return False, f"❌ Data snapshot not found for commit {sha}"
            
            output_path = Path(output_path)
            shutil.copy2(data_snapshot, output_path)
            
            return True, f"""✅ Checked out commit {sha}
   Message: {commit_obj['message']}
   Date: {commit_obj['timestamp']}
   Output: {output_path}
   Rows: {commit_obj['metadata']['rows']}, Cols: {commit_obj['metadata']['columns']}
"""
        except Exception as e:
            return False, f"Error checking out: {str(e)}"
    
    def diff(self, sha1: str, sha2: str) -> Tuple[bool, str]:
        """Compare two commits (like git diff)"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            commit1 = self._load_commit(sha1)
            commit2 = self._load_commit(sha2)
            
            if not commit1:
                return False, f"❌ Commit not found: {sha1}"
            if not commit2:
                return False, f"❌ Commit not found: {sha2}"
            
            # Load data
            data1_path = self.objects_dir / f"{sha1}.csv"
            data2_path = self.objects_dir / f"{sha2}.csv"
            
            df1 = pd.read_csv(data1_path)
            df2 = pd.read_csv(data2_path)
            
            diff_output = f"📊 Diff: {sha1} → {sha2}\n"
            diff_output += "=" * 60 + "\n\n"
            
            # Compare metadata
            diff_output += f"Commit 1: {commit1['message']}\n"
            diff_output += f"Commit 2: {commit2['message']}\n\n"
            
            # Row changes
            row_diff = len(df2) - len(df1)
            diff_output += f"Rows: {len(df1)} → {len(df2)} "
            if row_diff > 0:
                diff_output += f"(+{row_diff})\n"
            elif row_diff < 0:
                diff_output += f"({row_diff})\n"
            else:
                diff_output += "(no change)\n"
            
            # Column changes
            cols1 = set(df1.columns)
            cols2 = set(df2.columns)
            
            added_cols = cols2 - cols1
            removed_cols = cols1 - cols2
            
            if added_cols:
                diff_output += f"\n✅ Added columns: {', '.join(added_cols)}\n"
            if removed_cols:
                diff_output += f"❌ Removed columns: {', '.join(removed_cols)}\n"
            
            # Common columns - value changes
            common_cols = cols1 & cols2
            if common_cols:
                diff_output += f"\n📈 Value changes in common columns:\n"
                for col in sorted(common_cols):
                    try:
                        if df1[col].dtype in ['int64', 'float64']:
                            mean1 = df1[col].mean()
                            mean2 = df2[col].mean()
                            diff_output += f"  {col}: mean {mean1:.2f} → {mean2:.2f}\n"
                    except:
                        pass
            
            return True, diff_output
        except Exception as e:
            return False, f"Error computing diff: {str(e)}"
    
    def branch(self, name: str, create: bool = True) -> Tuple[bool, str]:
        """Create or switch branch"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            branch_file = self.refs_dir / name
            
            if create:
                if branch_file.exists():
                    return False, f"❌ Branch '{name}' already exists"
                
                # Create new branch from current HEAD
                current_sha = self._get_branch_head(self._get_current_branch())
                branch_file.write_text(current_sha or "")
                
                # Switch to new branch
                self.head_file.write_text(f"ref: refs/heads/{name}\n")
                
                return True, f"✅ Created and switched to branch '{name}'"
            else:
                if not branch_file.exists():
                    return False, f"❌ Branch '{name}' does not exist"
                
                # Switch to existing branch
                self.head_file.write_text(f"ref: refs/heads/{name}\n")
                return True, f"✅ Switched to branch '{name}'"
        except Exception as e:
            return False, f"Error with branch: {str(e)}"
    
    def stash(self, name: str, file_path: str) -> Tuple[bool, str]:
        """Stash current state (like git stash)"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            file_path = Path(file_path)
            if not file_path.exists():
                return False, f"❌ File not found: {file_path}"
            
            # Create stash entry
            timestamp = datetime.now().isoformat()
            stash_obj = {
                "name": name,
                "timestamp": timestamp,
                "file": str(file_path.name),
                "branch": self._get_current_branch()
            }
            
            # Save stash
            stash_file = self.stash_dir / f"{name}.json"
            stash_data = self.stash_dir / f"{name}.csv"
            
            stash_file.write_text(json.dumps(stash_obj, indent=2))
            shutil.copy2(file_path, stash_data)
            
            return True, f"""✅ Stashed: {name}
   File: {file_path.name}
   Time: {timestamp}
   
Use 'chalkml stash pop {name}' to restore
"""
        except Exception as e:
            return False, f"Error stashing: {str(e)}"
    
    def stash_pop(self, name: str, output_path: str) -> Tuple[bool, str]:
        """Restore stashed state"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            stash_file = self.stash_dir / f"{name}.json"
            stash_data = self.stash_dir / f"{name}.csv"
            
            if not stash_file.exists():
                return False, f"❌ Stash not found: {name}"
            
            stash_obj = json.loads(stash_file.read_text())
            
            # Restore data
            output_path = Path(output_path)
            shutil.copy2(stash_data, output_path)
            
            return True, f"""✅ Restored stash: {name}
   File: {stash_obj['file']}
   Stashed: {stash_obj['timestamp']}
   Output: {output_path}
"""
        except Exception as e:
            return False, f"Error restoring stash: {str(e)}"
    
    def stash_list(self) -> Tuple[bool, str]:
        """List all stashes"""
        try:
            if not self.chalk_dir.exists():
                return False, "❌ Not a ChalkML repository"
            
            stashes = list(self.stash_dir.glob("*.json"))
            
            if not stashes:
                return True, "No stashes saved"
            
            output = "📦 Stashed States\n"
            output += "=" * 60 + "\n\n"
            
            for stash_file in sorted(stashes):
                stash_obj = json.loads(stash_file.read_text())
                output += f"  {stash_obj['name']}\n"
                output += f"    File: {stash_obj['file']}\n"
                output += f"    Time: {stash_obj['timestamp']}\n"
                output += f"    Branch: {stash_obj['branch']}\n\n"
            
            return True, output
        except Exception as e:
            return False, f"Error listing stashes: {str(e)}"
    
    # Helper methods
    def _get_current_branch(self) -> str:
        """Get current branch name"""
        head_content = self.head_file.read_text().strip()
        if head_content.startswith("ref: refs/heads/"):
            return head_content.replace("ref: refs/heads/", "")
        return "main"
    
    def _get_branch_head(self, branch: str) -> Optional[str]:
        """Get SHA of branch HEAD"""
        branch_file = self.refs_dir / branch
        if branch_file.exists():
            sha = branch_file.read_text().strip()
            return sha if sha else None
        return None
    
    def _load_commit(self, sha: str) -> Optional[Dict]:
        """Load commit object"""
        commit_file = self.objects_dir / sha[:2] / sha[2:]
        if commit_file.exists():
            return json.loads(commit_file.read_text())
        return None
    
    def _compute_file_hash(self, file_path: Path) -> str:
        """Compute SHA-256 of file"""
        sha256 = hashlib.sha256()
        with open(file_path, 'rb') as f:
            for chunk in iter(lambda: f.read(4096), b''):
                sha256.update(chunk)
        return sha256.hexdigest()[:12]


def get_chalkml_git(workspace_path: Optional[str] = None) -> ChalkMLGit:
    """Get ChalkML Git instance for workspace"""
    # Always create new instance for current workspace (no singleton)
    return ChalkMLGit(workspace_path)
