#!/usr/bin/env python3

"""
script for calculating gc skew

Chris Brown
ctb@berkeley.edu
"""

# python modules
import os
import sys
import argparse

# ctb
sys.path.append((os.path.dirname(os.path.abspath(__file__)).rsplit('/', 1)[0]))
from iRep import gc_skew as gc_skew

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description = \
            '# calculate gc skew and find Ori and Ter of replication')
    parser.add_argument(\
            '-f', nargs = '*', action = 'store', required = True, \
            help = 'fasta(s)')
    parser.add_argument(\
            '-l', default = False, type = int, \
            help = 'minimum contig length (default = 10 x window)')
    parser.add_argument(\
            '-w', default = 1000, type = int, \
            help = 'window length (default = 1000)')
    parser.add_argument(\
            '-s', default = 10, type = int, \
            help = 'slide length (default = 10)')
    parser.add_argument(\
            '--single', action = 'store_true', \
            help = 'combine multi-fasta sequences into single genome')
    parser.add_argument(\
            '--no-plot', action = 'store_false', \
            help = 'do not generate plots, print GC Skew to stdout')
    args = vars(parser.parse_args())
    fastas = gc_skew.open_files(args['f'])
    single, plot_skew = args['single'], args['no_plot']
    window, slide = args['w'], args['s']
    min_len = args['l']
    if min_len is False:
        min_len = 10 * window
    for name, length, seq in gc_skew.parse_genomes(fastas, single):
        if length < min_len:
            print('%s: Too Short' % (name), file=sys.stderr)
            continue
        ori, ter, skew, c_skew = gc_skew.gc_skew(name, length, seq, window, slide, plot_skew)
        if ori == False:
            ori, ter = 'n/a', 'n/a'
        else:
            ori, ter = '{:,}'.format(ori), '{:,}'.format(ter)
        print('%s -> Origin: %s Terminus: %s' \
                % (name, ori, ter), file=sys.stderr)
        if plot_skew is False:
            print('\t'.join(['# Name', 'Position', 'GC Skew', 'Cumulative GC Skew']))
            for i, pos in enumerate(skew[0]):
                out = [name, pos, skew[1][i], c_skew[1][i]]
                print('\t'.join([str(i) for i in out]))
