2b2a19a35ceb667d2c0659ff422a980e7e1e22c1
hiram
  Thu Jun 23 13:23:49 2022 -0700
correctly sizing numbers in allGaps refs #29545

diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl
index bed66a8..cdae480 100755
--- src/hg/utils/automation/doAssemblyHub.pl
+++ src/hg/utils/automation/doAssemblyHub.pl
@@ -1302,74 +1302,74 @@
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 export buildDir=$buildDir
 
 if [ \$buildDir/\$asmId.2bit -nt \$asmId.allGaps.bb ]; then
   twoBitInfo -nBed ../../\$asmId.2bit stdout | awk '{printf "%s\\t%d\\t%d\\t%d\\t%d\\t+\\n", \$1, \$2, \$3, NR, \$3-\$2}' > \$asmId.allGaps.bed
   if [ ! -s \$asmId.allGaps.bed ]; then
     exit 0
   fi
   if [ -s ../assemblyGap/\$asmId.gap.bb ]; then
     bigBedToBed ../assemblyGap/\$asmId.gap.bb \$asmId.gap.bed
     # verify the 'all' gaps should include the gap track items
     bedIntersect -minCoverage=0.0000000014 \$asmId.allGaps.bed \$asmId.gap.bed \\
       \$asmId.verify.annotated.gap.bed
     gapTrackCoverage=`awk '{print \$3-\$2}' \$asmId.gap.bed \\
-      | ave stdin | grep "^total" | sed -e 's/.000000//;'`
+      | ave stdin | grep "^total" | awk '{print \$NF}' | sed -e 's/.000000//;'`
     intersectCoverage=`ave -col=5 \$asmId.verify.annotated.gap.bed \\
-      | grep "^total" | sed -e 's/.000000//;'`
+      | grep "^total" | awk '{print \$NF}' | sed -e 's/.000000//;'`
     if [ \$gapTrackCoverage -ne \$intersectCoverage ]; then
       printf "ERROR: 'all' gaps does not include gap track coverage\\n" 1>&2
       printf "gapTrackCoverage: \$gapTrackCoverage != \$intersectCoverage intersection\\n" 1>&2
       exit 255
     fi
   else
     touch \$asmId.gap.bed
   fi
   bedInvert.pl ../../\$asmId.chrom.sizes \$asmId.allGaps.bed \\
     > \$asmId.NOT.allGaps.bed
   # verify bedInvert worked correctly
   #   sum of both sizes should equal genome size
   both=`cat \$asmId.NOT.allGaps.bed \$asmId.allGaps.bed \\
     | awk '{print \$3-\$2}' | ave stdin | grep "^total" \\
-    | sed -e 's/.000000//;'`
+      | awk '{print \$NF}' | sed -e 's/.000000//;'`
   genomeSize=`ave -col=2 ../../\$asmId.chrom.sizes | grep "^total" \\
-    | sed -e 's/.000000//;'`
+    | awk '{print \$NF}' | sed -e 's/.000000//;'`
   if [ \$genomeSize -ne \$both ]; then
      printf "ERROR: bedInvert.pl did not function correctly on allGaps.bed\n" 1>&2
      printf "genomeSize: \$genomeSize != \$both both gaps data\n" 1>&2
      exit 255
   fi
   bedInvert.pl ../../\$asmId.chrom.sizes \$asmId.gap.bed \\
       > \$asmId.NOT.gap.bed
   # again, verify bedInvert is working correctly, sum of both == genomeSize
   both=`cat \$asmId.NOT.gap.bed \$asmId.gap.bed \\
     | awk '{print \$3-\$2}' | ave stdin | grep "^total" \\
-    | sed -e 's/.000000//;'`
+      | awk '{print \$NF}' | sed -e 's/.000000//;'`
   if [ \$genomeSize -ne \$both ]; then
      printf "ERROR: bedInvert did not function correctly on gap.bed\n" 1>&2
      printf "genomeSize: \$genomeSize != \$both both gaps data\n" 1>&2
      exit 255
   fi
   bedIntersect -minCoverage=0.0000000014 \$asmId.allGaps.bed \\
      \$asmId.NOT.gap.bed \$asmId.notAnnotated.gap.bed
   # verify the intersect functioned correctly
   # sum of new gaps plus gap track should equal all gaps
   allGapCoverage=`awk '{print \$3-\$2}' \$asmId.allGaps.bed \\
-     | ave stdin | grep "^total" | sed -e 's/.000000//;'`
+     | ave stdin | grep "^total" | awk '{print \$NF}' | sed -e 's/.000000//;'`
   both=`cat \$asmId.notAnnotated.gap.bed \$asmId.gap.bed \\
-    | awk '{print \$3-\$2}' | ave stdin | grep "^total" | sed -e 's/.000000//;'`
+    | awk '{print \$3-\$2}' | ave stdin | grep "^total" | awk '{print \$NF}' | sed -e 's/.000000//;'`
   if [ \$allGapCoverage -ne \$both ]; then
      printf "ERROR: bedIntersect to identify new gaps did not function correctly\n" 1>&2
      printf "allGaps: \$allGapCoverage != \$both (new + gap track)\n" 1>&2
   fi
   cut -f1-3 \$asmId.allGaps.bed | sort -k1,1 -k2,2n > toBbi.bed
   bedToBigBed -type=bed3 toBbi.bed ../../\$asmId.chrom.sizes \$asmId.allGaps.bb
   rm -f toBbi.bed
   gzip *.bed
 else
   printf "# allgaps step previously completed\\n" 1>&2
   exit 0
 fi
 _EOF_
   );
   $bossScript->execute();