"""
Data manipulation module for alyios_windows.
Provides functions for reading, writing, and manipulating data files using pandas.
"""

import pandas as pd
import chardet
import shutil
from pathlib import Path
from typing import Optional, Union, List, Dict, Any


def detect_encoding(file_path: str) -> str:
    """
    Auto-detect the encoding of a text file.

    Args:
        file_path: Path to the file

    Returns:
        Detected encoding name (e.g., 'utf-8', 'latin-1')
    """
    with open(file_path, 'rb') as f:
        result = chardet.detect(f.read())
    return result['encoding']


def detect_csv_delimiter(file_path: str, encoding: Optional[str] = None) -> str:
    """
    Auto-detect the delimiter of a CSV file.

    Args:
        file_path: Path to the CSV file
        encoding: File encoding (auto-detected if None)

    Returns:
        Detected delimiter character
    """
    if encoding is None:
        encoding = detect_encoding(file_path)

    # Read first few lines to detect delimiter
    with open(file_path, 'r', encoding=encoding) as f:
        first_line = f.readline()

    # Common delimiters to check
    delimiters = [',', ';', '\t', '|']
    delimiter_counts = {d: first_line.count(d) for d in delimiters}

    # Return the delimiter with the highest count
    return max(delimiter_counts, key=delimiter_counts.get)


def read_csv(
    file_path: str,
    delimiter: Optional[str] = None,
    encoding: Optional[str] = None,
    **kwargs
) -> pd.DataFrame:
    """
    Read a CSV file with auto-detection of delimiter and encoding.

    Args:
        file_path: Path to the CSV file
        delimiter: Column delimiter (auto-detected if None)
        encoding: File encoding (auto-detected if None)
        **kwargs: Additional arguments to pass to pd.read_csv

    Returns:
        DataFrame containing the CSV data
    """
    if encoding is None:
        encoding = detect_encoding(file_path)

    if delimiter is None:
        delimiter = detect_csv_delimiter(file_path, encoding)

    return pd.read_csv(file_path, delimiter=delimiter, encoding=encoding, **kwargs)


def read_excel(
    file_path: str,
    sheet_name: Union[str, int, None] = 0,
    **kwargs
) -> pd.DataFrame:
    """
    Read an Excel file.

    Args:
        file_path: Path to the Excel file
        sheet_name: Sheet name or index (0 for first sheet)
        **kwargs: Additional arguments to pass to pd.read_excel

    Returns:
        DataFrame containing the Excel data
    """
    return pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl', **kwargs)


def save_csv(
    df: pd.DataFrame,
    file_path: str,
    delimiter: str = ',',
    encoding: str = 'utf-8',
    index: bool = False,
    **kwargs
) -> None:
    """
    Save a DataFrame to a CSV file.

    Args:
        df: DataFrame to save
        file_path: Output file path
        delimiter: Column delimiter
        encoding: File encoding
        index: Whether to write row indices
        **kwargs: Additional arguments to pass to df.to_csv
    """
    df.to_csv(file_path, sep=delimiter, encoding=encoding, index=index, **kwargs)


def save_excel(
    df: pd.DataFrame,
    file_path: str,
    sheet_name: str = 'Sheet1',
    index: bool = False,
    **kwargs
) -> None:
    """
    Save a DataFrame to an Excel file.

    Args:
        df: DataFrame to save
        file_path: Output file path
        sheet_name: Name of the sheet
        index: Whether to write row indices
        **kwargs: Additional arguments to pass to df.to_excel
    """
    df.to_excel(file_path, sheet_name=sheet_name, engine='openpyxl', index=index, **kwargs)


def join_data(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    on: Union[str, List[str]],
    how: str = 'inner'
) -> pd.DataFrame:
    """
    Join two DataFrames (SQL-like join).

    Args:
        df1: First DataFrame
        df2: Second DataFrame
        on: Column(s) to join on
        how: Type of join ('inner', 'outer', 'left', 'right')

    Returns:
        Joined DataFrame
    """
    return pd.merge(df1, df2, on=on, how=how)


def append_data(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    ignore_index: bool = True
) -> pd.DataFrame:
    """
    Append one DataFrame to another (concatenate vertically).

    Args:
        df1: First DataFrame
        df2: Second DataFrame to append
        ignore_index: Whether to reset the index

    Returns:
        Combined DataFrame
    """
    return pd.concat([df1, df2], ignore_index=ignore_index)


def copy_file(src: str, dst: str) -> None:
    """
    Copy a file from source to destination.

    Args:
        src: Source file path
        dst: Destination file path
    """
    shutil.copy2(src, dst)


def loop_columns(df: pd.DataFrame) -> List[str]:
    """
    Get list of column names for iteration.

    Args:
        df: DataFrame

    Returns:
        List of column names
    """
    return df.columns.tolist()


def loop_rows(df: pd.DataFrame) -> pd.DataFrame:
    """
    Return DataFrame for row iteration.

    Args:
        df: DataFrame

    Returns:
        The same DataFrame (for use with iterrows() or itertuples())
    """
    return df


def filter_data(
    df: pd.DataFrame,
    column: str,
    operator: str,
    value: Any
) -> pd.DataFrame:
    """
    Filter DataFrame based on condition.

    Args:
        df: DataFrame to filter
        column: Column name to filter on
        operator: Comparison operator ('==', '!=', '>', '<', '>=', '<=', 'contains')
        value: Value to compare against

    Returns:
        Filtered DataFrame
    """
    if operator == '==':
        return df[df[column] == value]
    elif operator == '!=':
        return df[df[column] != value]
    elif operator == '>':
        return df[df[column] > value]
    elif operator == '<':
        return df[df[column] < value]
    elif operator == '>=':
        return df[df[column] >= value]
    elif operator == '<=':
        return df[df[column] <= value]
    elif operator == 'contains':
        return df[df[column].astype(str).str.contains(str(value))]
    else:
        raise ValueError(f"Unknown operator: {operator}")


def select_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
    """
    Select specific columns from DataFrame.

    Args:
        df: DataFrame
        columns: List of column names to select

    Returns:
        DataFrame with selected columns
    """
    return df[columns]


def get_value(df: pd.DataFrame, row: int, column: str) -> Any:
    """
    Get a specific value from DataFrame.

    Args:
        df: DataFrame
        row: Row index
        column: Column name

    Returns:
        Cell value
    """
    return df.loc[row, column]


def set_value(df: pd.DataFrame, row: int, column: str, value: Any) -> pd.DataFrame:
    """
    Set a specific value in DataFrame.

    Args:
        df: DataFrame
        row: Row index
        column: Column name
        value: Value to set

    Returns:
        Modified DataFrame
    """
    df.loc[row, column] = value
    return df


# Store loaded DataFrames in memory for use across actions
_dataframes: Dict[str, pd.DataFrame] = {}


def load_dataframe(name: str, df: pd.DataFrame) -> None:
    """
    Store a DataFrame in memory with a given name.

    Args:
        name: Variable name for the DataFrame
        df: DataFrame to store
    """
    _dataframes[name] = df


def get_dataframe(name: str) -> pd.DataFrame:
    """
    Retrieve a stored DataFrame by name.

    Args:
        name: Variable name of the DataFrame

    Returns:
        Stored DataFrame
    """
    if name not in _dataframes:
        raise KeyError(f"DataFrame '{name}' not found. Available: {list(_dataframes.keys())}")
    return _dataframes[name]


def clear_dataframes() -> None:
    """
    Clear all stored DataFrames from memory.
    """
    _dataframes.clear()
