# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_mlst.ipynb.

# %% auto 0
__all__ = ['process_mlst_data', 'check_if_mlst_empty', 'process_mlst_data_from_cli']

# %% ../nbs/02_mlst.ipynb 2
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test
from . import core

# %% ../nbs/02_mlst.ipynb 6
def process_mlst_data(
    input_path: str,
    output_path: str = "./output.tsv",
    add_header: str = None,
    replace_header: str = None,
    filter_columns: str = None,
    remove_sampleid: bool = False,
    combine_alleles: bool = False,
):
    """
    Command-line interface for processing MLST data.

    This function sets up an argument parser to handle command-line arguments for processing MLST data files.
    It supports specifying input and output file paths, replacing headers, filtering columns, and handling the presence or absence of headers in the input file.

    Arguments:
        input_path (str): Path to the input file.
        output_path (str): Path to the output file (default: './output.tsv').
        add_header (str): Header to add if the header does not exist in the input file (default: None).
        replace_header (str): Header to replace the existing header (default: None).
        filter_columns (str): Columns to filter from the header (default: None).
        remove_sampleid (bool): Whether to remove the SampleID column (default: False).
        combine_alleles (bool): Whether to combine allele columns into one (default: False).
    """

    if not os.path.exists(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")

    df_check = core.DataFrame()
    df_check.import_data(input_path, file_type="tsv")

    if check_if_mlst_empty(df_check):
        with open(output_path, "w") as f:
            f.write("")
        return

    df = core.DataFrame()
    df.import_data(input_path, file_type="tsv")

    if combine_alleles:
        # print 4th and further values of df.df.columns in a comma separated list
        combine_alleles_string = ",".join(df.df.columns[3:])

        # Replace 4th column with combined string
        cols = list(df.df.columns)
        cols[3] = combine_alleles_string
        df.df.columns = cols

        # Remove columns after 4th
        df.df = df.df.iloc[:, :4]

    if add_header:
        header_list = add_header.split(", ")
        # Add more headers if not combining alleles
        if not combine_alleles:
            header_list[3] = header_list[3] + "1"
            for i in range(4, len(df.df.columns)):
                header_list.append(header_list[3][:-1] + str(i - 2))
        df.df.loc[-1] = df.df.columns  # adding header as first row
        df.df.index = df.df.index + 1  # shifting index
        df.df = df.df.sort_index()  # sorting by index to move the header row to the top
        df.df.columns = header_list  # setting new header

    if replace_header:
        df.rename_header(replace_header)

    if filter_columns:
        df.filter_columns(filter_columns)

    if remove_sampleid:
        # Remove the first column (SampleID) if it exists
        if df.df.columns[0] == "SampleID":
            df.df = df.df.iloc[:, 1:]

    df.export_data(output_path, file_type="tsv")


def check_if_mlst_empty(df):
    if df.df.columns[1] == "-":
        return True
    else:
        return False


@call_parse
def process_mlst_data_from_cli(
    input_path: str,
    output_path: str = "./output.tsv",
    add_header: str = None,
    replace_header: str = None,
    filter_columns: str = None,
    remove_sampleid: bool = False,
    combine_alleles: bool = False,
):
    process_mlst_data(
        input_path,
        output_path,
        add_header,
        replace_header,
        filter_columns,
        remove_sampleid,
        combine_alleles,
    )
