#!/usr/bin/env python

"""
Copyright (c) 2015 Kwangbom Choi, The Jackson Laboratory
This software was developed by Kwangbom "KB" Choi in Gary Churchill's Lab.
This is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This software is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this software. If not, see <http://www.gnu.org/licenses/>.
"""

import os
import sys
import getopt
from emase.AlignmentMatrixFactory import AlignmentMatrixFactory as AMF


help_message = '''
Usage:
    bam-to-emase -a <aln_file> -i <lid_file> -s <haplotypes> -o <out_file>

Input:
    -a <aln_file>   : Sam/Bam file
    -i <lid_file>   : Text file that lists all the locus IDs
    -s <haplotypes> : Haplotype names listed using comma(,)
    -o <out_file>   : PyTables file that stores the data

Parameters:
    -h, --help      : shows this help message
'''


class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg


def get_names(idfile):
    ids = dict()
    master_id = 0
    with open(idfile) as fh:
        for curline in fh:
            item = curline.rstrip().split("\t")
            g = item[0]
            if not ids.has_key(g):
                ids[g] = master_id
                master_id += 1
    num_ids = len(ids)
    names = {index:name for name, index in ids.iteritems()}
    return [names[k] for k in xrange(num_ids)]


def main(argv=None):
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts, args = getopt.getopt(argv[1:], "ha:i:o:s:t:T:", ["help"])
        except getopt.error, msg:
            raise Usage(msg)

        # Default values of vars
        alnfile = None
        lidfile = None
        outfile = 'alignments.transcriptome.h5'
        haplotypes = tuple()
        index_dtype = 'uint32'
        data_dtype  = 'uint8'

        # option processing (change this later with optparse)
        for option, value in opts:
            if option in ("-h", "--help"):
                raise Usage(help_message)
            if option == "-a":
                alnfile = value
            if option == "-i":
                lidfile = value
            if option == "-o":
                outfile = value
            if option == "-s":
                haplotypes = tuple(value.split(','))
            if option == "-t":
                index_dtype = value
            if option == "-T":
                data_dtype = value

        # Check if the required options are given
        if len(haplotypes) < 2:
            print >> sys.stderr, "[Warning] %d haplotype/strain is given." % len(haplotypes)
        if alnfile is None:
            print >> sys.stderr, "[Error] No alignment file is given."
            return 2
        if lidfile is None:
            print >> sys.stderr, "[Error] No locus ID file is given."
            return 2

        #
        # Main body
        #

        loci = get_names(lidfile)

        alignmat_factory = AMF(alnfile)
        alignmat_factory.prepare(haplotypes, loci, outdir=os.path.dirname(outfile))
        alignmat_factory.produce(outfile, index_dtype=index_dtype, data_dtype=data_dtype)
        alignmat_factory.cleanup()

        #
        # End of main body
        #

    except Usage, err:
        print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
        return 2


if __name__ == "__main__":
    sys.exit(main())

