Last commit for hpc/IGR-jobs_parallel.gnu: 92a95de25353773c5d9e6f8a6768b5ddc2db1d24

Version 1.0, MArch 2019

Marcelo Ponce [2019-03-11 19:20:23]
Version 1.0, MArch 2019
#!/bin/bash
#
## example of a sequence of RACS IGR jobs using GNU-parallel

#####
errMsg() { msg "$*"; helpMsg;  exit 111;}
####

# this script requires GNU-PARALLEL to work, so let's first
# be sure that it installed in the system...
GNUPARALLEL=$(which parallel)
[ -z $GNUPARALLEL ] && errMsg "This script uses GNU-PARALLEL to launch in parallel a series of InterGenic Region processes" || echo "Using $GNUPARALLEL"

# set location of RACS
RACSloc=$SCRATCH"/REPO/core/"
interGenic=$RACSloc"/intergenic/det-interGenes.sh"

# select reference GFF3 file
REFgff3=$SCRATCH"/negCtrl/refs/T_thermophila_June2014.gff3"

# location of ORF results used as input for the IGR script
ORFdirs="ORF_RACS_results-"	#20181212-141459"	#"ORF_RACS_results-"


# function to explore directories with RACS ORF results
processFiles() {
	for i in $ORFdirs*; do
		cd $i; #pwd; #echo FINAL.table.*1;
		# generate list of sample files per directory
		ls -1 *fastq.gz-sorted.bam > sample.input
		### check the actual alingment and final.table files are there...
		alnFiles=$(cat sample.input | grep -o -P '(?<=aln).*(?=.fastq.gz-sorted.bam)' | sed 's/ /-/')
		IGRtable="FINAL.table."$alnFiles
		#echo "expected files to process..." $alnFiles $IGRtable
		###
		execute=$(for IGRtable in FINAL.table*1; do	\
			echo	$interGenic  $IGRtable $REFgff3 tableIGR--$IGRtable sample.input ;
			done)
		echo "echo $i &&  cd $i &&      pwd     &&	$execute   && 	cd .. ";
		cd ..
	done
}

# specify name of the file were to record the log of the jobs
LOG=$USER-IGR_RACS-`date '+%Y%m%d-%H%M%S'`.log

# number of tasks to run at the same time, ideally it could be equal to
#he number of processors (or twice if hyper-threading is availble)
cores=$(grep -P '^core id\t' /proc/cpuinfo| wc -l)
PARALLEL_TASKS_PER_NODE=$cores

# process all the jobs via GNU-PARALLEL
processFiles  | $GNUPARALLEL --joblog $LOG  -j $PARALLEL_TASKS_PER_NODE
ViewGit