final touches
#!/bin/bash
# det-interGenes.sh
# main script in charge of determining the intergenic regions
# this part of the pipeline requires the table generated by the ORF reads/counts
#
#
# HOW TO USE THIS SCRIPT:
# arg1: final combined table gnerated by the ORF from the RACS pipeline
# arg2: reference genome file (gff3)
# arg3: name of the file where to save the InterGenic Regions
# arg4/5: name of the output file from ORF part/tag.(ref.) name
# arg6/7: ...
# ...
#
# eg.
# det-interGenes.sh combinedTABLES_MED1-MED2 dataset/T_thermophila_June2014.sorted.gff3 interGENs_MED1-MED2.csv
# Fillingham_1_MED1_INPUT_S25_L007_R1_001 INPUTm1 \
# Fillingham_2_MED2_INPUT_S26_L007_R1_001 INPUTm2 \
# Fillingham_3_MED1_IP_S27_L007_R1_001 IPm1 \
# Fillingham_4_MED2_IP_S28_L007_R1_001 IPm2 \
# ...
#
#################################################
# Setting preamble, detecting scripts location
scriptsDIR=`dirname $0`
#################################################
# load auxiliary fns for integrity checks and message/error handling
. $scriptsDIR/../auxs/auxFns.sh --source-only
#### CHECKS #####################################
### CHECK arguments
if [[ $# -eq 0 ]]; then
errMsg "No arguments were supplied!";
fi
#
### INTEGRITY CHECK
# check RACS scripts
checkIntegrityPipeline intergenic/det-interGenes.sh intergenic/interGeneRegions.R intergenic/utils_RACS-IGR.R intergenic/interGenes.sh
# check external tools needed
checkTools samtools Rscript
#################################################
# getting command line arguments
combTABLE=$1 #combinedTABLES_MED1-MED2
refFILE=$2 #dataset/T_thermophila_June2014.sorted.gff3
interGENregions=$3 #interGENs_MED1-MED2.csv -- output from Rscript
#
INPUTfile1=$4 #Fillingham_1_MED1_INPUT_S25_L007_R1_001
tag1=$5 #INPUTm1
INPUTfile2=$6 #Fillingham_2_MED2_INPUT_S26_L007_R1_001
tag2=$7 #INPUTm2
INPUTfile3=$8 #Fillingham_3_MED1_IP_S27_L007_R1_001
tag3=$9 #IPm1
INPUTfile4=${10} #Fillingham_4_MED2_IP_S28_L007_R1_001
tag4=${11} #IPm2
#
combinedFinalIGenicTable=${interGENregions}_INPUTs-IPs`date '+%Y%m%d-%H%M%S'`.csv
# ================================================
# step #0: look into combTABLE and generate list of intergenic regions
t0=$(time Rscript $scriptsDIR/interGeneRegions.R $combTABLE $refFILE $interGENregions)
# process each of the samples in comparison to the intergenic regions just generated in 'step #0'
t1=$(time $scriptsDIR/interGenes.sh $tag1 $INPUTfile1 $interGENregions)
#t1=$(time . $scriptsDIR/interGenes.sh INPUTm1 Fillingham_1_MED1_INPUT_S25_L007_R1_001 interGENs_MED1-MED2.csv )
t2=$(time $scriptsDIR/interGenes.sh $tag2 $INPUTfile2 $interGENregions)
#t2=$(time . $scriptsDIR/interGenes.sh INPUTm2 Fillingham_2_MED2_INPUT_S26_L007_R1_001 interGENs_MED1-MED2.csv )
t3=$(time $scriptsDIR/interGenes.sh $tag3 $INPUTfile3 $interGENregions)
#t3=$(time . $scriptsDIR/interGenes.sh IPm1 Fillingham_3_MED1_IP_S27_L007_R1_001 interGENs_MED1-MED2.csv )
t4=$(time $scriptsDIR/interGenes.sh $tag4 $INPUTfile4 $interGENregions)
#t4=$(time . $scriptsDIR/interGenes.sh IPm2 Fillingham_4_MED2_IP_S28_L007_R1_001 interGENs_MED1-MED2.csv )
# combine tables with INPUT/IP reads
#t5=$(time paste interGENs_MED1-MED2.csv interGENs-Fillingham_1_MED1_INPUT_S25_L007_R1_001 interGENs-Fillingham_3_MED1_IP_S27_L007_R1_001 interGENs-Fillingham_2_MED2_INPUT_S26_L007_R1_001 interGENs-Fillingham_4_MED2_IP_S28_L007_R1_001 > interGENs_MED1-MED2_INPUTs-IPs.csv )
t5=$(time paste $interGENregions interGENs-$INPUTfile1 interGENs-$INPUTfile3 interGENs-$INPUTfile2 interGENs-$INPUTfile4 > $combinedFinalIGenicTable)
# statistics on running times
echo "Partial Times: $t0, $t1, $t2, $t3, $t4"
echo Total time: $t0+($t1+$t2+$t3+$t4)+$t5