#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2011, A. Murat Eren
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Please read the COPYING file.

import sys

import IlluminaUtils.lib.fastalib as u
import IlluminaUtils.utils.helperfunctions as h
import IlluminaUtils.utils.terminal as terminal

run = terminal.Run()
progress = terminal.Progress()

def main(input_file_path, output_file_path, max_mismatches):
    input = u.SequenceSource(input_file_path)
    output = u.FastaOutput(output_file_path)

    num_passed = 0

    progress.new('Filtering reads (max %d mismatches)' % max_mismatches)
    while next(input):
        if input.pos % 100 == 0 or input.pos == 1:
            progress.update('%d with 0 mismatches among %d processed' % (num_passed, input.pos))

        num_mismatches = sys.maxsize

        try:
            num_mismatches = int(input.id.split('mismatches:')[1].strip())
        except:
            raise h.ConfigError("Unexpected header format (no 'mismatches:' in the header).")
            sys.exit(-1)

        if num_mismatches > max_mismatches:
            continue
        else:
            num_passed += 1
            output.store(input, split = False)
        progress.end()

    output.close()
    run.info('Filtered output', output_file_path)


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Filter reads from a merged file based on maximum mismatches at the overlapped region')
    parser.add_argument('input', metavar = 'FILE_PATH', help = 'FASTA file to be filtered')
    parser.add_argument('-o', '--output', type=str, default = None, metavar = 'FILE_PATH',
                        help = 'Where filtered reads will be written (default: [-i]-MAX-MISMATCH-[-m]')
    parser.add_argument('-m', '--max-mismatches', type=int, metavar = 'INT', default=3,
                        help = 'Maximum number of mismatches allowed in the overlapped region')

    args = parser.parse_args()

    input_file_path = args.input
    max_mismatches = 0 if args.max_mismatches < 0 else args.max_mismatches
    output_file_path = args.output if args.output else '%s-MAX-MISMATCH-%d' % (args.input, max_mismatches)

    sys.exit(main(input_file_path, output_file_path, max_mismatches))
