#!/usr/bin/env bash
# cols: apply a command to a subset of the columns and merge back with the remaining columns.
#
# Assumes that the input data is comma-delimited and that it has a header.
# Depends on csvcut, which is part of csvkit: http://csvkit.readthedocs.org
# 
# Example usage 1: reverse sort column 'a'
# $ echo 'a,b\n1,2\n3,4\n5,6' | cols -c a body sort -nr
#
# Example usage 2: apply PCA (using tapkee) to all numerical features (-C selects all but the specified columns) of the Iris data set:
# $ < iris.csv cols -C species body tapkee --method pca | header -r x,y,species
# 
# See also: header and body (https://github.com/jeroenjanssens/command-line-tools-for-data-science)
#
# Author: http://jeroenjanssens.com

ARG="$1"
ARG_INV="$(tr cC Cc <<< ${ARG})"
shift
COLUMNS="$1"
shift
EXPR="$@"

finish() {
	rm -f $OTHER_COLUMNS
}
trap finish EXIT

if [ -z "$TMPDIR" ]; then
    TMPDIR=/tmp
fi
OTHER_COLUMNS=$(mktemp ${TMPDIR}/cols-XXXXXXXX)

tee $OTHER_COLUMNS | csvcut $ARG "$COLUMNS" | eval ${EXPR} | paste -d, - <(csvcut ${ARG_INV} "$COLUMNS" $OTHER_COLUMNS)
