5b5a242f38f695c23bc52e5388b941b8335307df
hiram
  Thu Jul 21 20:18:42 2022 -0700
properly eliminate rmsk from trackDb when there are no items in the track refs #29545

diff --git src/hg/utils/automation/asmHubTrackDb.sh src/hg/utils/automation/asmHubTrackDb.sh
index ecec619..c03ae79 100755
--- src/hg/utils/automation/asmHubTrackDb.sh
+++ src/hg/utils/automation/asmHubTrackDb.sh
@@ -1,23 +1,22 @@
 #!/bin/bash
 
 set -beEu -o pipefail
 
 if [ $# -ne 2 ]; then
   printf "usage: trackDb.sh <asmId> <pathTo/assembly hub build directory> > trackDb.txt\n" 1>&2
   printf "expecting to find *.ucsc.2bit and bbi/ files at given path\n" 1>&2
-  printf "the ncbi|ucsc selects the naming scheme\n" 1>&2
   exit 255
 fi
 
 export asmId=$1
 export buildDir=$2
 # hubLinks is for mouseStrains specific hub only
 export hubLinks="/hive/data/genomes/asmHubs/hubLinks"
 export accessionId=`echo "$asmId" | awk -F"_" '{printf "%s_%s", $1, $2}'`
 export gcX=`echo $asmId | cut -c1-3`
 export d0=`echo $asmId | cut -c5-7`
 export d1=`echo $asmId | cut -c8-10`
 export d2=`echo $asmId | cut -c11-13`
 export hubPath="$gcX/$d0/$d1/$d2/$asmId"
 
 export scriptDir="$HOME/kent/src/hg/utils/automation"
@@ -183,30 +182,43 @@
     printf "# skipping the tanDups track\n" 1>&2
   fi	#	the else clause of: if [ -z ${not_tanDups+x} ]
 fi	#	if [ "${gapOverlapCount}" -gt 0 -o "${tanDupCount}" -gt 0 ]
 
 # see if there are repeatMasker bb files
 export rmskCount=`(ls $buildDir/trackData/repeatMasker/bbi/${asmId}.rmsk.*.bb 2> /dev/null | wc -l) || true`
 export newRmsk=`(ls $buildDir/trackData/repeatMasker/${asmId}.rmsk.align.bb $buildDir/trackData/repeatMasker/${asmId}.rmsk.bb 2> /dev/null | wc -l) || true`
 
 if [ "${newRmsk}" -gt 0 -o "${rmskCount}" -gt 0 ]; then
 
 if [ ! -s "$buildDir/trackData/repeatMasker/$asmId.sorted.fa.out.gz" ]; then
   printf "ERROR: can not find trackData/repeatMasker/$asmId.sorted.fa.out.gz\n" 1>&2
   exit 255
 fi
 
+# see if there are actually rmsk items in the track, this has to be > 3
+export rmskItemCount=`zcat $buildDir/trackData/repeatMasker/$asmId.sorted.fa.out.g | head | wc -l`
+
+# clean up garbage from previous errors here
+if [ "${rmskItemCount}" -lt 4 ]; then
+  rm -f $buildDir/$asmId.repeatMasker.out.gz
+  rm -f "$buildDir/${asmId}.repeatMasker.version.txt"
+  rm -f $buildDir/bbi/${asmId}.rmsk.align.bb
+  rm -f $buildDir/bbi/${asmId}.rmsk.bb
+  rm -f $buildDir/${asmId}.fa.align.tsv.gz
+  rm -f $buildDir/${asmId}.fa.join.tsv.gz
+else
+
 rm -f $buildDir/$asmId.repeatMasker.out.gz
 ln -s trackData/repeatMasker/$asmId.sorted.fa.out.gz $buildDir/$asmId.repeatMasker.out.gz
 if [ -s "$buildDir/trackData/repeatMasker/versionInfo.txt" ]; then
    rm -f "$buildDir/${asmId}.repeatMasker.version.txt"
    ln -s trackData/repeatMasker/versionInfo.txt "$buildDir/${asmId}.repeatMasker.version.txt"
 fi
 
 if [ "${newRmsk}" -gt 0 ]; then
   rm -f $buildDir/bbi/${asmId}.rmsk.align.bb
   rm -f $buildDir/bbi/${asmId}.rmsk.bb
   rm -f $buildDir/${asmId}.fa.align.tsv.gz
   rm -f $buildDir/${asmId}.fa.join.tsv.gz
   if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then
     ln -s ../trackData/repeatMasker/${asmId}.rmsk.align.bb $buildDir/bbi/${asmId}.rmsk.align.bb
     ln -s trackData/repeatMasker/${asmId}.fa.align.tsv.gz $buildDir/${asmId}.fa.align.tsv.gz
@@ -216,33 +228,35 @@
 
 printf "track repeatMasker
 shortLabel RepeatMasker
 longLabel RepeatMasker Repetitive Elements
 type bigRmsk 9 +
 visibility pack
 group varRep
 bigDataUrl bbi/%s.rmsk.bb\n" "${asmId}"
 if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then
   printf "xrefDataUrl bbi/%s.rmsk.align.bb\n" "${asmId}"
 fi
 export rmskClassProfile="$buildDir/trackData/repeatMasker/$asmId.rmsk.class.profile.txt"
 if [ -s "${rmskClassProfile}" ]; then
   printf "html html/%s.repeatMasker\n\n" "${asmId}"
   $scriptDir/asmHubRmskJoinAlign.pl $asmId $buildDir > $buildDir/html/$asmId.repeatMasker.html
-
+else
+  printf "\n"
 fi
-else	#	if [ "${newRmsk}" -eq 2 ]; then
+
+else	#	else clause of if [ "${newRmsk}" -gt 0 ]
 
 printf "track repeatMasker
 compositeTrack on
 shortLabel RepeatMasker
 longLabel Repeating Elements by RepeatMasker
 group varRep
 visibility dense
 type bigBed 6 +
 colorByStrand 50,50,150 150,50,50
 maxWindowToDraw 10000000
 spectrum on
 html html/%s.repeatMasker\n\n" "${asmId}"
 $scriptDir/asmHubRmsk.pl $asmId $buildDir/html/$asmId.names.tab $buildDir/trackData/repeatMasker/$asmId.rmsk.class.profile.txt > $buildDir/html/$asmId.repeatMasker.html
 
 
@@ -342,31 +356,32 @@
     bigDataUrl bbi/%s.rmsk.RNA.bb\n\n" "${asmId}"
 fi
 
 if [ -s ${buildDir}/trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb ]; then
 rm -f $buildDir/bbi/${asmId}.rmsk.Other.bb
 ln -s ../trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb $buildDir/bbi/${asmId}.rmsk.Other.bb
 printf "    track repeatMaskerOther
     parent repeatMasker
     shortLabel Other
     longLabel Other Repeating Elements by RepeatMasker
     type bigBed 6 +
     priority 9
     bigDataUrl bbi/%s.rmsk.Other.bb\n\n" "${asmId}"
 fi
 
-fi	#	if [ "${newRmsk}" -eq 2 ]; then
+fi	#	else clause of if [ "${newRmsk}" -gt 0 ]; then
+fi	#	else clause of if [ "${rmskItemCount}" -lt 4 ]
 fi      #       if [ "${newRmsk}" -eq 2 -o "${rmskCount}" -gt 0 ]; then
 
 if [ -s ${buildDir}/trackData/simpleRepeat/simpleRepeat.bb ]; then
 rm -f $buildDir/bbi/${asmId}.simpleRepeat.bb
 ln -s ../trackData/simpleRepeat/simpleRepeat.bb $buildDir/bbi/${asmId}.simpleRepeat.bb
 printf "track simpleRepeat
 shortLabel Simple Repeats
 longLabel Simple Tandem Repeats by TRF
 group varRep
 visibility dense
 type bigBed 4 +
 bigDataUrl bbi/%s.simpleRepeat.bb
 html html/%s.simpleRepeat\n\n" "${asmId}" "${asmId}"
 $scriptDir/asmHubSimpleRepeat.pl $asmId $buildDir/html/$asmId.names.tab $buildDir > $buildDir/html/$asmId.simpleRepeat.html
 fi