Last commit for core/intergenic/det-interGenes.sh: 2d666199e293821d799f5caa28356d21f3c6f2a4

adding a whole timer for the IGR main script

Marcelo Ponce [2019-07-24 22:54:04]
adding a whole timer for the IGR main script
#!/bin/bash

# det-interGenes.sh	---	RACS IGR pipeline
# main script in charge of determining the intergenic regions
# this part of the pipeline requires the table generated by the ORF reads/counts
#
#
# HOW TO USE THIS SCRIPT:
#	arg1: final combined table generated by the ORF from the RACS pipeline
#	arg2: reference genome file (gff3)
#	arg3: name of the file where to save the InterGenic Regions
#	arg4: text file containg the name of the output file from ORF part/tag.(ref.) name; eg.
#       	alnGreenblatt_1_SSCH1-1_ChIP_S81_R1_001.fastq.gz-sorted.bam
#		alnGreenblatt_1_SSCH1-1_ChInput_S82_R1_001.fastq.gz-sorted.bam
#
#		The aln*.fastq.g-sorted.bam files should have been generated with the ORF part of the RACS pipeline!
#
# eg.
#    PATHtoRACSrepo/core/intergenic/det-interGenes.sh  combinedTABLES_MED1-MED2  dataset/T_thermophila_June2014.sorted.gff3  interGENs_MED1-MED2.csv  samples.file
#

#################################################

# Setting preamble, detecting scripts location
scriptsDIR=$( cd `dirname $0` && pwd )

#################################################
# load auxiliary fns for integrity checks and message/error handling
if [[ -f $scriptsDIR/../auxs/auxFns.sh ]]; then
	. $scriptsDIR/../auxs/auxFns.sh --source-only;
else
	echo "Error auxiliary file: " $scriptsDIR/../auxs/auxFns.sh "NOT found!"
	exit
fi

# display RACS welcome/credit message
welcome


#### CHECKS #####################################
### INTEGRITY CHECKs
# check RACS scripts for the IGR routines
checkIntegrityPipeline  det-interGenes.sh  \
			interGeneRegions.R  utils_RACS-IGR.R \
			interGenes.sh

# check external tools needed
checkTools samtools Rscript

################################################

### CHECK arguments
if [[ $# -eq 0 ]]; then
	errMsg "No arguments were supplied!";
fi
#
case $# in
	4) echo $# "Arguments received:"; echo $@;;
	*) usage ;;
esac

###########

# getting command line arguments
# MANDATORY arguments
combTABLE=$1    	#combinedTABLES_MED1-MED2
refFILE=$2		#dataset/T_thermophila_June2014.sorted.gff3
interGENregions=$3      #interGENs_MED1-MED2.csv  -- output from Rscript
SAMPLES=$4	# input.samples: text file containing the name of the sample files to process, eg.
		#	alnGreenblatt_1_SSCH1-1_ChIP_S81_R1_001.fastq.gz-sorted.bam
		#
		#	alnFillingham_1_MED1_INPUT_S25_L007_R1_001
		#	alnFillingham_2_MED2_INPUT_S26_L007_R1_001
		#	alnFillingham_3_MED1_IP_S27_L007_R1_001
		#	alnFillingham_4_MED2_IP_S28_L007_R1_001
#

# check inut arguments, ie. that the files indicated actually exist!
checkFile $combTABLE $refFILE $SAMPLES


combinedFinalIGenicTable=${interGENregions}_INPUTs-IPs`date '+%Y%m%d-%H%M%S'`.csv
# ================================================

# step #0: look into combTABLE and generate list of intergenic regions
t0=$(time Rscript  $scriptsDIR/interGeneRegions.R   $combTABLE  $refFILE  $interGENregions)

Ttot=$((t0))
files=$(cat $SAMPLES)
echo "TARGETS: " $files
for file in $files; do
	echo 'processing sample file:' $file
	# process each of the samples in comparison to the intergenic regions just generated in 'step #0'
	t1=$(time $scriptsDIR/interGenes.sh  $file-`date '+%Y%m%d-%H%M%S'`  $file  $interGENregions)
	# Add individual times...
	#Ttot=$((Ttot+t1))
done


# combine tables with INPUT/IP reads
#t5=$(time paste interGENs_MED1-MED2.csv interGENs-Fillingham_1_MED1_INPUT_S25_L007_R1_001 interGENs-Fillingham_3_MED1_IP_S27_L007_R1_001 interGENs-Fillingham_2_MED2_INPUT_S26_L007_R1_001 interGENs-Fillingham_4_MED2_IP_S28_L007_R1_001 > interGENs_MED1-MED2_INPUTs-IPs.csv )
###>>>>>>
t5=$(time paste  $interGENregions  `cat $SAMPLES | awk '{print "interGENs-"$1".csv"}'`  >  $combinedFinalIGenicTable)

# statistics on running times
echo Total time: $((Ttot+t5))

###########################################################################
ViewGit