import argparse
import logging
import subprocess
import sys
import warnings
from os.path import join as pjoin

import pandas as pd
import pysam
from ugbio_core.logger import logger

warnings.filterwarnings("ignore")


def add_cnmops_vcf_header(sample_name: str, fasta_index_file: str) -> pysam.VariantHeader:
    header = pysam.VariantHeader()

    # Add meta-information to the header
    header.add_meta("fileformat", value="VCFv4.2")
    header.add_meta("source", value="ULTIMA_CNV")

    # Add sample names to the header
    header.add_sample(sample_name)

    header.add_line("##GENOOX_VCF_TYPE=ULTIMA_CNV")

    # Add contigs info to the header
    df_genome = pd.read_csv(fasta_index_file, sep="\t", header=None, usecols=[0, 1])
    df_genome.columns = ["chr", "length"]
    for _, row in df_genome.iterrows():
        chr_id = row["chr"]
        length = row["length"]
        header.add_line(f"##contig=<ID={chr_id},length={length}>")

    # Add ALT
    header.add_line('##ALT=<ID=<CNV>,Description="Copy number variant region">')
    header.add_line('##ALT=<ID=<DEL>,Description="Deletion relative to the reference">')
    header.add_line('##ALT=<ID=<DUP>,Description="Region of elevated copy number relative to the reference">')

    # Add FILTER
    header.add_line('##FILTER=<ID=PASS,Description="high confidence CNV call">')
    header.add_line('##FILTER=<ID=UG-CNV-LCR,Description="CNV calls overlpping (>50% overlap) with UG-CNV-LCR">')
    header.add_line('##FILTER=<ID=LEN,Description="CNV calls with length less then 10Kb">')

    # Add INFO
    header.add_line(
        '##INFO=<ID=CONFIDENCE,Number=1,Type=String,Description="Confidence level for CNV call.'
        + 'can be one of: LOW,MEDIUM,HIGH">'
    )
    header.add_line('##INFO=<ID=CopyNumber,Number=1,Type=Float,Description="copy number of CNV call">')
    header.add_line('##INFO=<ID=RoundedCopyNumber,Number=1,Type=Integer,Description="rounded copy number of CNV call">')
    header.add_line('##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="CNV length">')
    header.add_line('##INFO=<ID=SVTYPE,Number=1,Type=String,Description="CNV type. can be DUP or DEL">')

    # Add FORMAT
    header.add_line('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">')

    return header


def write_cnmops_vcf(outfile: str, header: pysam.VariantHeader, cnv_annotated_bed_file: str, sample_name: str) -> None:
    """
    Write CNV calls from a cn.mops BED file to a VCF file.
    Args:
        outfile (str): Path to the output VCF file.
        header (pysam.VariantHeader): VCF header with reference genome information.
        cnv_annotated_bed_file (str): Path to the input BED file containing CNV calls generated by cn.mops and
            annotated with UG-CNV-LCR.
        sample_name (str): Name of the sample.
    """
    with pysam.VariantFile(outfile, mode="w", header=header) as vcf_out:
        df_cnvs = pd.read_csv(cnv_annotated_bed_file, sep="\t", header=None)
        df_cnvs.columns = ["chr", "start", "end", "info"]
        for _, row in df_cnvs.iterrows():
            # Create a new VCF record
            chr_id = row["chr"]
            start = row["start"]
            end = row["end"]
            info = row["info"]

            cn = float(info.split("|")[0].replace("CN", ""))
            cnv_type = "<DUP>"
            if cn < 2:  # noqa: PLR2004
                cnv_type = "<DEL>"

            filters = []
            for item in info.split(";"):
                arr = item.split("|")
                if len(arr) > 1:
                    filters.append(arr[1])

            record = vcf_out.new_record()
            record.contig = chr_id
            record.start = start
            record.stop = end
            record.ref = "N"
            record.alts = (cnv_type,)

            confidence = "HIGH"
            if len(filters) > 0:
                for f in filters:
                    record.filter.add(f)
                confidence = "FAIL"
                if len(filters) > 1:
                    confidence = "FAIL"
            else:
                record.filter.add("PASS")

            record.info["CONFIDENCE"] = confidence
            record.info["CopyNumber"] = cn
            record.info["RoundedCopyNumber"] = int(round(cn))
            record.info["SVLEN"] = int(end) - int(start)
            record.info["SVTYPE"] = cnv_type.replace("<", "").replace(">", "")
            # END position is automatically generated for multi-base variants

            # Set genotype information for each sample
            gt = [None, 1]
            if cn == 1:
                gt = [0, 1]
            elif cn == 0:
                gt = [1, 1]
            record.samples[sample_name]["GT"] = (gt[0], gt[1])

            # Write the record to the VCF file
            vcf_out.write(record)


def run(argv):
    """
    converts CNV calls in bed format to vcf.
    input arguments:
    --cnv_annotated_bed_file: input bed file holding CNV calls.
    --fasta_index_file: (.fai file) tab delimeted file holding reference genome chr ids with their lengths.
    --out_directory: output directory
    --sample_name: sample name
    output files:
    vcf file: <sample_name>.cnv.vcf.gz
        shows called CNVs in zipped vcf format.
    vcf index file: <sample_name>.cnv.vcf.gz.tbi
        vcf corresponding index file.
    """
    parser = argparse.ArgumentParser(
        prog="cnv_results_to_vcf.py", description="converts CNV calls in bed format to vcf."
    )

    parser.add_argument("--cnv_annotated_bed_file", help="input bed file holding CNV calls", required=True, type=str)
    parser.add_argument(
        "--fasta_index_file",
        help="tab delimeted file holding reference genome chr ids with their lengths. (.fai file)",
        required=True,
        type=str,
    )
    parser.add_argument("--out_directory", help="output directory", required=False, type=str)
    parser.add_argument("--sample_name", help="sample name", required=True, type=str)
    parser.add_argument("--verbosity", help="Verbosity: ERROR, WARNING, INFO, DEBUG", required=False, default="INFO")

    args = parser.parse_args(argv[1:])
    logger.setLevel(getattr(logging, args.verbosity))
    sample_name = args.sample_name
    header = add_cnmops_vcf_header(sample_name, args.fasta_index_file)

    # Open a VCF file for writing
    if args.out_directory:
        out_directory = args.out_directory
    else:
        out_directory = ""
    outfile = pjoin(out_directory, sample_name + ".cnv.vcf.gz")

    write_cnmops_vcf(outfile, header, args.cnv_annotated_bed_file, args.sample_name)

    try:
        cmd = ["bcftools", "index", "-t", outfile]
        subprocess.check_call(cmd)
    except subprocess.CalledProcessError as e:
        print(f"bcftools index command failed with exit code: {e.returncode}")
        sys.exit(1)  # Exit with error status
    logger.info(f"output file: {outfile}")
    logger.info(f"output file index: {outfile}.tbi")


def main():
    run(sys.argv)


if __name__ == "__main__":
    main()
