#!/usr/bin/env Rscript
#
# visualizes the mismatch distribution table generated by merge-illumina-pairs script
#

suppressPackageStartupMessages(library(reshape))
suppressPackageStartupMessages(library(gtools))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(optparse))


# command line options
option_list <- list(
		make_option(c("-o", "--output_file_prefix"), default="unknown",
				help = "Output file prefix for visualization files [default \"%default\"]"),
		make_option("--title", default="Distribution of the Number of Mismatches at the Overlapped Region",
				help="Title for the output figure [default '%default']")
)

parser <- OptionParser(usage = "visualize-mismatch-distribution [options] input_file", option_list=option_list,
                        description="An R program to visualize the mismatch breakdown output of merge-illumina-pairs script")

arguments <- parse_args(parser, positional_arguments = TRUE)
options <- arguments$options

# check if the positional argument is set
if(length(arguments$args) != 1) {
	cat("Incorrect number of required positional arguments\n\n")
	print_help(parser)
	stop()
} else {
	input_file <- arguments$args
}

# check if the input file is accessible
if(file.access(input_file) == -1){
	stop(sprintf("Specified file '%s' does not exist", input_file))
}

# load data frame.
df <- as.data.frame(read.csv(input_file, header=TRUE, sep="\t"))
row.names <- df$samples
col.names <- colnames(df)

df <- melt(df,  id = 'num_mismatch', variable_name = 'bins')

P <- function(){
	p = ggplot(df, aes(x = num_mismatch, y = value, colour = bins, group = bins))
	p <- p + geom_line()
	p <- p + geom_point()
	p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 7))
	p <- p + theme(axis.text.y = element_text(size = 7))
	p <- p + theme(legend.position = 'bottom')
	p <- p + labs(x='Number of Mistmaches', y='Number of Pairs')
	p <- p + ggtitle(options$title)
	p <- p + coord_cartesian(xlim=c(-1, df$num_mismatch + 1))
	p <- p + scale_x_continuous(breaks=seq(0, max(df$num_mismatch), 2))
	p <- p + theme(plot.title = element_text(hjust=0, vjust=1))
	
	print(p)
}


# determine the appropriate width for visualization
num_x_items <- length(row.names)
pdf_w <- num_x_items / 4
if(num_x_items < 40)
	pdf_w <- 10
png_w = pdf_w * 100


# gen PDF
pdf_output <- paste(options$output_file_prefix,".pdf",sep="")
pdf(pdf_output, width = pdf_w, height = 4)
P()
sprintf("Lines PDF: '%s'", pdf_output)

# gen PNG
png_output <- paste(options$output_file_prefix,".png",sep="")
png(png_output, width = png_w, height = 400, units = "px", pointsize = 12, bg = "transparent", type = c("cairo", "cairo-png", "Xlib", "quartz"))
P()
sprintf("Lines PNG: '%s'", png_output)
