:78 Function create_function() is deprecated [8192]

IMPORTANT: making the IGR main alg more robust against corner cases and data inconsistencies that could be present in the gff3 annotation files

Marcelo Ponce [2019-07-24 22:59:17]
IMPORTANT: making the IGR main alg more robust against corner cases and data inconsistencies that could be present in the gff3 annotation files
Filename
core/intergenic/interGeneRegions.R
diff --git a/core/intergenic/interGeneRegions.R b/core/intergenic/interGeneRegions.R
index 95cdff0..20f9273 100755
--- a/core/intergenic/interGeneRegions.R
+++ b/core/intergenic/interGeneRegions.R
@@ -126,25 +126,33 @@ for (i in c(1:nbrEntries)) {
          beginRegion <- begReg
          #dumpData(SCFLD,endRegion,'xxx', lstscfld,lstregion1,lstregion2,lstSize, 1)
          scfCAP <- scfldCAP(SCFLD,refTable)
-         if (endRegion < scfCAP ) {	# dealing with cases where the gene ends at the limit of the scaffold...
-         myScfld <- dumpData(SCFLD,endRegion,scfCAP+1, 1)
-          lstscfld <- c(lstscfld, myScfld[1])
-          lstSize <- c(lstSize, myScfld[4])
-          lstregion1 <- c(lstregion1,endRegion+1)
-          lstregion2 <- c(lstregion2,scfCAP)
-         } else {
-		print(">>>>>>>>>>>>>>>> SUSPICIOUS OVERLYING REGIONs!!!  <<<<<<<<<<<<<<<<")
-		cat(scaffold,begReg,endReg,'\n')
-		cat(try(lstscfld[length(lstscfld)-1]),try(lstregion1[length(lstscfld)-1]),try(lstregion2[length(lstscfld)-1]),'\n')
-		#stop
-           }
-         #dumpData(scaffold,'0',beginRegion, lstscfld,lstregion1,lstregion2,lstSize, 1)
-         myScfld <- dumpData(scaffold,'0',beginRegion, 1)
-          lstscfld <- c(lstscfld, myScfld[1])
-          lstSize <- c(lstSize, myScfld[4])
-          lstregion1 <- c(lstregion1,0+1)
-          lstregion2 <- c(lstregion2,beginRegion-1)
-         endRegion <- endReg
+         #DBG output: print(paste("XXX >~>~>",endRegion,"/",endReg," - ",scfCAP))
+         # cautionary guard: delaing with the case of  scfCAP  being empty ""  --> Integer(0)!!!
+         #  in principle due to data inconsistencies, keep it for robustness of the code
+         if (length(scfCAP)>0) {
+		if (endRegion < scfCAP) {	# dealing with cases where the gene ends at the limit of the scaffold...
+                   myScfld <- dumpData(SCFLD,endRegion,scfCAP+1, 1)
+                   lstscfld <- c(lstscfld, myScfld[1])
+                   lstSize <- c(lstSize, myScfld[4])
+                   lstregion1 <- c(lstregion1,endRegion+1)
+                   lstregion2 <- c(lstregion2,scfCAP)
+                } else {
+			print(">>>>>>>>>>>>>>>> SUSPICIOUS OVERLYING REGIONs!!!  <<<<<<<<<<<<<<<<")
+			cat(scaffold,begReg,endReg,'\n')
+			cat(try(lstscfld[length(lstscfld)-1]),try(lstregion1[length(lstscfld)-1]),try(lstregion2[length(lstscfld)-1]),'\n')
+			#stop
+           	}
+                #dumpData(scaffold,'0',beginRegion, lstscfld,lstregion1,lstregion2,lstSize, 1)
+                myScfld <- dumpData(scaffold,'0',beginRegion, 1)
+                lstscfld <- c(lstscfld, myScfld[1])
+                lstSize <- c(lstSize, myScfld[4])
+                lstregion1 <- c(lstregion1,0+1)
+                lstregion2 <- c(lstregion2,beginRegion-1)
+                endRegion <- endReg
+	 } else {
+		# potential data inconsistency...
+		cat("Potential data inconsistency issue detected... please verify your data integrity...",'\n\n')
+        }
       } else { 	# first scaffold ever ...
               beginRegion <- begReg	#strsplit(region,'-')[[1]][1]
               endRegion <- endReg	#strsplit(region,'-')[[1]][2]
@@ -157,6 +165,7 @@ for (i in c(1:nbrEntries)) {
          }
       SCFLD <- scaffold
     }
+	if (length(lstregion1) != length(lstregion2)) stop()
 }
 # take care of last case...
 #dumpData(scaffold,endRegion,'xxx', lstscfld,lstregion1,lstregion2,lstSize, 1)
ViewGit