:78 Function create_function() is deprecated [8192]

adding comments and details about 'FILTERS \& SELECTIONS', implementing selections using environment variablkes passed into AWK

Marcelo Ponce [2019-06-22 20:13:59]
adding comments and details about 'FILTERS \& SELECTIONS', implementing selections using environment variablkes passed into AWK
Filename
core/table.sh
diff --git a/core/table.sh b/core/table.sh
index 335302d..116a7ca 100755
--- a/core/table.sh
+++ b/core/table.sh
@@ -5,8 +5,30 @@
 FILE=$1
 # eg. FILE=T_thermophila_June2014.gff3

+
+#####################################################################
+#####################################################################
+## The folllowing two sections:
+##
+##		"FILTERS AND DELIMITERS"
+##
+##	and
+##
+##		"SELECTIONS"
+##
+## should be determine depending on the particular organism, protein,
+## genes (ie. target) and data layout of the specific file to be
+## processed.
+##
+## Here we present the case for Tetrahymena Thermophila.
+#####################################################################
+#####################################################################
+
+
+############  CASE FOR TETRAHYMENA THERMOPHILA  #####################
+################# FILTERS AND DELIMITERS ############################
 # filterS can be modified/added depending on the "TARGET" organism and 'protein'
-filter1=gene
+filter1="gene"
 filter2="Name=TTHERM_"
 # and one could keep addingg further 'filters' if needed...
 # filter3='"hypothetical protein"'
@@ -23,15 +45,23 @@ delim1="TTHERM"
 delim2=";Note"
 delim3="Note="

+######################################################################
+
+

 ################## selection ... #####################################

 # grab scafold and genes' range
 grep $filter1 $FILE | grep $filter2 | awk '{print $1" "$4"-"$5}' > tmp0
-# grab "TTHERM"
-grep $filter1 $FILE | grep $filter2 | awk 'BEGIN{FS="TTHERM"} {print $2}' | awk 'BEGIN{FS=";Note"} {print "TTHERM"$1}' > tmp1
+
+# grab "TTHERM", ie. *delim1*
+# grep $filter1 $FILE | grep $filter2 | awk 'BEGIN{FS="TTHERM"} {print $2}' | awk	'BEGIN{FS=";Note"} {print "TTHERM"$1}' > tmp1
+grep $filter1 $FILE | grep $filter2 | awk  -v d1="$delim1" 'BEGIN{FS=d1} {print $2}' | awk -v d1="$delim1" -v d2="$delim2" 'BEGIN{FS=d2} {print d1$1}' > tmp1
+
 # grab 'Note' and replace 'spaces' with 'underscores (_)'
-grep $filter1 $FILE | grep $filter2 | awk 'BEGIN{FS="Note="} {print "Note="$2}' | sed 's/ /_/g' > tmp2
+#grep $filter1 $FILE | grep $filter2 | awk 'BEGIN{FS="Note="} {print "Note="$2}' | sed 's/ /_/g' > tmp2
+grep $filter1 $FILE | grep $filter2 | awk -v d3="$delim3" 'BEGIN{FS=d3} {print d3$2}' | sed 's/ /_/g' > tmp2
+
 # compute gene size
 grep $filter1 $FILE | grep $filter2 | awk '{print $5-$4+1}' > tmp3
ViewGit