import os
from enum import Enum
from typing import Tuple, List, Union

# I created this to optimize file path flattening while providing an absolute collision proof guarnatee (not just statistical).
# My postfix trick keeps stems readable and only grows with the exact ambiguity you need to resolve (underscore runs + depth).
# Other comparable approaches give ground in either readability or length. Here are some examples:

# - escape-and-join: keeps one string but grows with every special char.
# - length-prefixed join: (e.g., `len:name`…) is reversible but far less readable.
# - Hashing: is compact and (statistically) safe but loses human readability and determinism across renames unless you keep a side index.

#TODO: possibly add an option to customize the join char (instead of hardcoding '_')

# Here are some possibilities for optimization to approach closer to SHA256 results: 
#       batch mode, precompute segments (for similar paths), table lookups


class FlatPathMode(str, Enum):
    """
    Determines where the postfix is placed in the flattened filename.
    """
    SUFFIX = "suffix"   # Standard: filename.ext_postfix (V1.0 behavior)
    PREEXT = "preext"   # RFC V1.1: filename_postfix.ext (Preserves extension)

class TdsFlatNameCodecV1:
    """
    Implements **tds-flatpath Specification V1.1**.
    
    This codec handles the 'V1' family of encodings, providing collision-proof
    flattening using underscore-run postfixes + depth separators.
    
    Supports two modes (defined in Spec V1.1):
      1. SUFFIX (Default): Corresponds to V1.0 behavior. Postfix appended after extension.
      2. PREEXT (Opt-in):  New in V1.1. Postfix inserted before extension to preserve file type.
    """
    
    VALID_POSTFIX_CHARS = set("n-0123456789ABCDEF")

    # ---------- FLATTEN ---------- #

    @staticmethod
    def _postfix_from_segments(segments: Tuple[str, ...]) -> str:
        """
        Construct the collision-proof postfix string from a sequence of path segments.

        Each segment is scanned in order:
          - For every consecutive run of '_' characters, append 'n' + HEX(count), uppercase.
          - After each segment (except the last), append a hyphen '-' to mark a directory boundary.
          - If no underscores are found in any segment, the postfix is a single '-'.

        The resulting string encodes both underscore density and directory depth,
        ensuring a deterministic, reversible mapping when combined with the flattened name.
        """
        parts = []
        for idx, seg in enumerate(segments):
            # Optimization: Use iterator loop instead of while loop with manual indexing
            run = 0
            for char in seg:
                if char == "_":
                    run += 1
                elif run:
                    parts.append("n" + format(run, "X"))
                    run = 0
            
            # Check for trailing run in segment
            if run:
                parts.append("n" + format(run, "X"))

            if idx < len(segments) - 1:
                parts.append("-")
        return "".join(parts) if parts else "-"

    @classmethod
    def flat_name(cls, 
                  path_array: Tuple[str, ...], 
                  mode: Union[FlatPathMode, str] = FlatPathMode.SUFFIX, 
                  validate: bool = True) -> str:
        """
        Encodes a Path Tuple into a flattened string according to Spec V1.1.
        
        Args:
            path_array: Tuple of strings representing the path (e.g., ('src', 'main.py')).
                        Must be a tuple to ensure immutability.
            mode:       'suffix' (default) or 'preext'.
                        - Suffix: <base><ext>_<postfix>
                        - PreExt: <base>_<postfix><ext>
            validate:   If True, checks for invalid types (lists) or OS separators.
            
        Returns:
            The flattened filename string.
            
        Raises:
            TypeError: If input is not a tuple.
            ValueError: If segments contain OS separators or if PREEXT mode is used
                        with an extension containing underscores.
        """
        if validate:
            if not isinstance(path_array, tuple):
                raise TypeError(f"Expected tuple[str, ...], got {type(path_array)}")
            if not path_array or any((not isinstance(s, str)) or (s == "") or (os.sep in s) for s in path_array):
                raise ValueError("Invalid path_array: non-empty strings only, no OS separators.")

        # Optimization: Avoid Enum constructor overhead if already an Enum (common case)
        if not isinstance(mode, FlatPathMode):
            mode = FlatPathMode(mode)

        last = path_array[-1]
        name, ext = os.path.splitext(last)
        if not name and ext:
            # Handle dotfiles like '.gitignore' or '.env' as pure names with no extension 
            # to avoid weird placement of postfix.
            name, ext = last, ""

        # RFC Check: If PREEXT mode, extension cannot contain underscore.
        # This ambiguity would prevent us from knowing where the postfix ends and extension begins.
        if mode == FlatPathMode.PREEXT and "_" in ext:
            raise ValueError(f"Cannot use PREEXT mode: Extension '{ext}' contains underscores.")

        # 1. Calculate Postfix
        # We always calculate postfix based on the segments.
        # If single file + no underscores, postfix is '-' (which implies omission).
        if len(path_array) == 1:
            postfix = cls._postfix_from_segments((name,))
            if postfix == "-": 
                # Special Case: Single segment, no underscores -> No change needed
                return f"{name}{ext}"
        else:
            # Standard case
            dir_segs = path_array[:-1]
            # The 'stem' for postfix calculation includes the name part of the last file
            postfix = cls._postfix_from_segments(dir_segs + (name,))

        # 2. Construct String based on Mode
        # Flatten the stem (directories + filename_base)
        if len(path_array) > 1:
            flattened_base = "_".join(path_array[:-1] + (name,))
        else:
            flattened_base = name

        if mode == FlatPathMode.PREEXT:
            return f"{flattened_base}_{postfix}{ext}"
        else:
            return f"{flattened_base}{ext}_{postfix}"

    # ---------- UNFLATTEN ---------- #

    @classmethod
    def _split_components(cls, filename: str, mode: FlatPathMode) -> Tuple[str, str, str]:
        """
        Helper to split a filename into (base, ext, postfix) depending on the mode.
        
        - SUFFIX Mode: Scans from the end for the last underscore.
        - PREEXT Mode: Splits extension first, then scans the stem for the last underscore.
        """

        if mode == FlatPathMode.SUFFIX:
            # Format: base.ext_postfix
            # 1. Find last underscore
            u = filename.rfind("_")
            if u == -1:
                # No postfix implied (single file, no underscores)
                # Just split ext normally
                root, ext = os.path.splitext(filename)
                return root, ext, "" # empty postfix means '-' was elided

            postfix = filename[u+1:]
            remainder = filename[:u]
            
            # 2. Split ext from remainder
            root, ext = os.path.splitext(remainder)
            return root, ext, postfix

        elif mode == FlatPathMode.PREEXT:
            # Format: base_postfix.ext
            # 1. Split extension first
            root_with_postfix, ext = os.path.splitext(filename)
            
            # 2. Find last underscore in the root part
            u = root_with_postfix.rfind("_")
            if u == -1:
                # No postfix implied
                return root_with_postfix, ext, ""
                
            postfix = root_with_postfix[u+1:]
            base = root_with_postfix[:u]
            return base, ext, postfix
            
        raise ValueError(f"Unknown mode: {mode}")

    @classmethod
    def postfix_to_counts(cls, postfix: str) -> List[int]:
        """
        Convert a postfix string into an array of integers tokens.
        
        - 'n<HEX>' becomes that integer (underscore run length).
        - '-' becomes 0 (directory separator).
        
        Example: 'n1-n1-n2' -> [1, 1, 0, 2]
        """
        # If postfix is empty string (from helper), it treats it as "-", meaning no runs.
        if not postfix:
            return []
            
        # Manual parse to avoid regex dependency if desired, but regex is fine
        import re
        parts = re.split(r'(n|-)', postfix)
        counts = []
        it = iter(parts)

        for p in it:
            if not p: continue
            if p == '-':
                counts.append(0)
            elif p == 'n':
                hexpart = next(it)
                counts.append(int(hexpart, 16))
            else:
                counts.append(int(p, 16))
        return counts

    @classmethod
    def unflatten_to_path(cls, flattened_filename: str, mode: Union[FlatPathMode, str] = FlatPathMode.SUFFIX) -> Tuple[str, ...]:
        """
        Decodes a flattened filename back into its original Path Tuple.
        
        Args:
            flattened_filename: The string to decode.
            mode: The mode used during encoding ('suffix' or 'preext').
            
        Returns:
            A tuple of strings representing the original path structure.
        """
        # Same optimization for unflatten (though less critical for benchmark)
        if not isinstance(mode, FlatPathMode):
            mode = FlatPathMode(mode)
            
        base, ext, postfix = cls._split_components(flattened_filename, mode)
        
        # If no postfix was found, we assume it was the single-segment-no-underscore case
        if not postfix:
            return (base + ext,)

        counts = cls.postfix_to_counts(postfix)

        i = 0          # cursor into base
        cur = []       # chars for current segment
        path = []      # completed segments

        for c in counts:
            if c == 0:
                # Directory boundary
                j = base.find("_", i)
                if j == -1:
                    # Sentinel '-' case: boundary at end
                    path.append("".join(cur))
                    cur = []
                    i = len(base)
                else:
                    cur.append(base[i:j])
                    i = j + 1
                    path.append("".join(cur))
                    cur = []
            else:
                # Consume c underscores
                while i < len(base) and base[i] != "_":
                    cur.append(base[i])
                    i += 1

                if base[i:i+c] != "_" * c:
                    raise ValueError("Malformed flattened string vs postfix: underscore run mismatch.")
                cur.append(base[i:i+c])
                i += c

        # Tail
        cur.append(base[i:])
        
        # Reattach extension to the final segment
        final_segment = "".join(cur) + ext
        path.append(final_segment)
        
        return tuple(path)