#!/usr/bin/env python

"""
Copyright (c) 2015 Kwangbom Choi, The Jackson Laboratory
This software was developed by Kwangbom "KB" Choi in Gary Churchill's Lab.
This is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This software is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this software. If not, see <http://www.gnu.org/licenses/>.
"""

import sys
import getopt
from emase.EMfactory import EMfactory
from emase.AlignmentPropertyMatrix import AlignmentPropertyMatrix as APM


help_message = '''
Usage:
    run-emase -i <h5_file> -g <grp_file> -L <len_file> -M <multiread_model> -o <outbase> \\
              -p <pseudocount> -r <read_length> -m <max_iters> -t <tolerance>
Input:
    -i <h5_file>         : Alignments stored in a PyTables HDF5 format
    -g <grp_file>        : Gene-to-transcript map (ENSMUSGxxx followed by a list of ENSMUSTyyy's)
    -L <len_file>        : File that contains transcript lengths
    -M <multiread_model> : Multiread model ID
                           1: Gene->Isoform->Allele,
                           2: Gene->Allele->Isoform,
                           3: Gene->(Isoform*Allele), (default model)
                           4: Gene*Isoform*Allele
    -o <outbase>         : EMASE outputs the result to <folder/basename> (default: './emase')
    -p <pseudocount>     : Pseudocount for allele specificity (default: 0.0)
    -r <read_length>     : Read length (default: 100)
    -m <max_iters>       : The number of maximum iterations for EM (default: 999)
    -t <tolerance>       : Tolerance for the termination of EM. (default: 0.0001)
Parameters:
    -h, --help : shows this help message
    -c         : reports the alignment counts (Consider using another script 'count-alignments' instead.)
    -w         : reports the posterior probability for each read
'''


class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg


def main(argv=None):
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts, args = getopt.getopt(argv[1:], "hi:g:L:M:o:p:r:m:t:cw", ["help"])
        except getopt.error, msg:
            raise Usage(msg)

        # Default values of vars
        h5file = None
        grpfile = None
        lenfile = None
        multiread_model = 3
        outbase = './emase'
        read_length = 100
        pseudocount = 0.0
        max_iters = 999
        tolerance = 0.0001
        report_gene_counts = False
        report_alignment_counts = False
        report_posterior = False

        # option processing (change this later with optparse)
        for option, value in opts:
            if option in ("-h", "--help"):
                raise Usage(help_message)
            if option == "-i":
                h5file = value
            if option == "-g":
                grpfile = value
                report_gene_counts = True
            if option == "-L":
                lenfile = value
            if option == "-M":
                multiread_model = int(value)
            if option == "-o":
                outbase = value
            if option == "-p":
                pseudocount = float(value)
            if option == "-r":
                read_length = int(value)
            if option == "-m":
                max_iters = int(value)
            if option == "-t":
                tolerance = float(value)
            if option == "-c":
                report_alignment_counts = True
            if option == "-w":
                report_posterior = True

        # Check if the required options are given
        if h5file is None:  # If alignment file is not given
            raise Usage(help_message)

        #
        # Main body
        #

        em_factory = EMfactory(APM(h5file=h5file, grpfile=grpfile))
        em_factory.prepare(pseudocount=pseudocount, lenfile=lenfile, read_length=read_length)
        em_factory.run(model=multiread_model, tol=tolerance, max_iters=max_iters, verbose=True)
        em_factory.report_depths(filename="%s.isoforms.tpm" % outbase, tpm=True)
        em_factory.report_effective_read_counts(filename="%s.isoforms.effective_read_counts" % outbase)
        if report_posterior:
            em_factory.export_posterior_probability(filename="%s.posterior.h5" % outbase)
        if report_gene_counts:
            em_factory.report_depths(filename="%s.genes.tpm" % outbase, tpm=True, grp_wise=True)
            em_factory.report_effective_read_counts(filename="%s.genes.effective_read_counts" % outbase, grp_wise=True)
        if report_alignment_counts:
            alnmat = APM(h5file=h5file, grpfile=grpfile)
            alnmat.report_alignment_counts(filename="%s.isoforms.alignment_counts" % outbase)
            if report_gene_counts:
                alnmat._bundle_inline(reset=True)
                alnmat.report_alignment_counts(filename="%s.genes.alignment_counts" % outbase)

        #
        # End of main body
        #

    except Usage, err:
        print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
        return 2


if __name__ == "__main__":
    sys.exit(main())
