Last commit for core/intergenic/det-interGenes-ORIG.sh: e917e8e74cde86ed76637133ae3cce20ead94163

final touches

Marcelo Ponce [2019-03-11 19:44:24]
final touches
#!/bin/bash

# det-interGenes.sh
# main script in charge of determining the intergenic regions
# this part of the pipeline requires the table generated by the ORF reads/counts
#
#
# HOW TO USE THIS SCRIPT:
#	arg1: final combined table gnerated by the ORF from the RACS pipeline
#	arg2: reference genome file (gff3)
#	arg3: name of the file where to save the InterGenic Regions
#	arg4/5:	name of the output file from ORF part/tag.(ref.) name
#	arg6/7: ...
#	...
#
# eg.
#    det-interGenes.sh  combinedTABLES_MED1-MED2  dataset/T_thermophila_June2014.sorted.gff3  interGENs_MED1-MED2.csv
#				Fillingham_1_MED1_INPUT_S25_L007_R1_001  INPUTm1	\
#				Fillingham_2_MED2_INPUT_S26_L007_R1_001  INPUTm2	\
#				Fillingham_3_MED1_IP_S27_L007_R1_001  IPm1	\
#				Fillingham_4_MED2_IP_S28_L007_R1_001  IPm2	\
#				...
#

#################################################
# Setting preamble, detecting scripts location
scriptsDIR=`dirname $0`

#################################################
# load auxiliary fns for integrity checks and message/error handling
. $scriptsDIR/../auxs/auxFns.sh --source-only

#### CHECKS #####################################
### CHECK arguments
if [[ $# -eq 0 ]]; then
	errMsg "No arguments were supplied!";
fi
#

### INTEGRITY CHECK
# check RACS scripts
checkIntegrityPipeline  intergenic/det-interGenes.sh  intergenic/interGeneRegions.R  intergenic/utils_RACS-IGR.R  intergenic/interGenes.sh
# check external tools needed
checkTools samtools Rscript
#################################################

# getting command line arguments

combTABLE=$1    	#combinedTABLES_MED1-MED2
refFILE=$2		#dataset/T_thermophila_June2014.sorted.gff3
interGENregions=$3      #interGENs_MED1-MED2.csv  -- output from Rscript
#
INPUTfile1=$4	#Fillingham_1_MED1_INPUT_S25_L007_R1_001
tag1=$5		#INPUTm1
INPUTfile2=$6	#Fillingham_2_MED2_INPUT_S26_L007_R1_001
tag2=$7		#INPUTm2
INPUTfile3=$8	#Fillingham_3_MED1_IP_S27_L007_R1_001
tag3=$9		#IPm1
INPUTfile4=${10}	#Fillingham_4_MED2_IP_S28_L007_R1_001
tag4=${11}	#IPm2
#
combinedFinalIGenicTable=${interGENregions}_INPUTs-IPs`date '+%Y%m%d-%H%M%S'`.csv
# ================================================

# step #0: look into combTABLE and generate list of intergenic regions
t0=$(time Rscript  $scriptsDIR/interGeneRegions.R   $combTABLE  $refFILE  $interGENregions)

# process each of the samples in comparison to the intergenic regions just generated in 'step #0'
t1=$(time  $scriptsDIR/interGenes.sh  $tag1  $INPUTfile1  $interGENregions)
#t1=$(time . $scriptsDIR/interGenes.sh  INPUTm1 Fillingham_1_MED1_INPUT_S25_L007_R1_001  interGENs_MED1-MED2.csv )
t2=$(time  $scriptsDIR/interGenes.sh  $tag2  $INPUTfile2  $interGENregions)
#t2=$(time . $scriptsDIR/interGenes.sh  INPUTm2 Fillingham_2_MED2_INPUT_S26_L007_R1_001  interGENs_MED1-MED2.csv )
t3=$(time  $scriptsDIR/interGenes.sh  $tag3  $INPUTfile3  $interGENregions)
#t3=$(time . $scriptsDIR/interGenes.sh  IPm1 Fillingham_3_MED1_IP_S27_L007_R1_001  interGENs_MED1-MED2.csv )
t4=$(time  $scriptsDIR/interGenes.sh  $tag4  $INPUTfile4  $interGENregions)
#t4=$(time . $scriptsDIR/interGenes.sh  IPm2 Fillingham_4_MED2_IP_S28_L007_R1_001  interGENs_MED1-MED2.csv )

# combine tables with INPUT/IP reads
#t5=$(time paste interGENs_MED1-MED2.csv interGENs-Fillingham_1_MED1_INPUT_S25_L007_R1_001 interGENs-Fillingham_3_MED1_IP_S27_L007_R1_001 interGENs-Fillingham_2_MED2_INPUT_S26_L007_R1_001 interGENs-Fillingham_4_MED2_IP_S28_L007_R1_001 > interGENs_MED1-MED2_INPUTs-IPs.csv )
t5=$(time paste  $interGENregions  interGENs-$INPUTfile1  interGENs-$INPUTfile3  interGENs-$INPUTfile2  interGENs-$INPUTfile4  >  $combinedFinalIGenicTable)

# statistics on running times
echo "Partial Times: $t0, $t1, $t2, $t3, $t4"
echo Total time: $t0+($t1+$t2+$t3+$t4)+$t5

ViewGit