5b5a242f38f695c23bc52e5388b941b8335307df hiram Thu Jul 21 20:18:42 2022 -0700 properly eliminate rmsk from trackDb when there are no items in the track refs #29545 diff --git src/hg/utils/automation/asmHubTrackDb.sh src/hg/utils/automation/asmHubTrackDb.sh index ecec619..c03ae79 100755 --- src/hg/utils/automation/asmHubTrackDb.sh +++ src/hg/utils/automation/asmHubTrackDb.sh @@ -1,23 +1,22 @@ #!/bin/bash set -beEu -o pipefail if [ $# -ne 2 ]; then printf "usage: trackDb.sh <asmId> <pathTo/assembly hub build directory> > trackDb.txt\n" 1>&2 printf "expecting to find *.ucsc.2bit and bbi/ files at given path\n" 1>&2 - printf "the ncbi|ucsc selects the naming scheme\n" 1>&2 exit 255 fi export asmId=$1 export buildDir=$2 # hubLinks is for mouseStrains specific hub only export hubLinks="/hive/data/genomes/asmHubs/hubLinks" export accessionId=`echo "$asmId" | awk -F"_" '{printf "%s_%s", $1, $2}'` export gcX=`echo $asmId | cut -c1-3` export d0=`echo $asmId | cut -c5-7` export d1=`echo $asmId | cut -c8-10` export d2=`echo $asmId | cut -c11-13` export hubPath="$gcX/$d0/$d1/$d2/$asmId" export scriptDir="$HOME/kent/src/hg/utils/automation" @@ -183,30 +182,43 @@ printf "# skipping the tanDups track\n" 1>&2 fi # the else clause of: if [ -z ${not_tanDups+x} ] fi # if [ "${gapOverlapCount}" -gt 0 -o "${tanDupCount}" -gt 0 ] # see if there are repeatMasker bb files export rmskCount=`(ls $buildDir/trackData/repeatMasker/bbi/${asmId}.rmsk.*.bb 2> /dev/null | wc -l) || true` export newRmsk=`(ls $buildDir/trackData/repeatMasker/${asmId}.rmsk.align.bb $buildDir/trackData/repeatMasker/${asmId}.rmsk.bb 2> /dev/null | wc -l) || true` if [ "${newRmsk}" -gt 0 -o "${rmskCount}" -gt 0 ]; then if [ ! -s "$buildDir/trackData/repeatMasker/$asmId.sorted.fa.out.gz" ]; then printf "ERROR: can not find trackData/repeatMasker/$asmId.sorted.fa.out.gz\n" 1>&2 exit 255 fi +# see if there are actually rmsk items in the track, this has to be > 3 +export rmskItemCount=`zcat $buildDir/trackData/repeatMasker/$asmId.sorted.fa.out.g | head | wc -l` + +# clean up garbage from previous errors here +if [ "${rmskItemCount}" -lt 4 ]; then + rm -f $buildDir/$asmId.repeatMasker.out.gz + rm -f "$buildDir/${asmId}.repeatMasker.version.txt" + rm -f $buildDir/bbi/${asmId}.rmsk.align.bb + rm -f $buildDir/bbi/${asmId}.rmsk.bb + rm -f $buildDir/${asmId}.fa.align.tsv.gz + rm -f $buildDir/${asmId}.fa.join.tsv.gz +else + rm -f $buildDir/$asmId.repeatMasker.out.gz ln -s trackData/repeatMasker/$asmId.sorted.fa.out.gz $buildDir/$asmId.repeatMasker.out.gz if [ -s "$buildDir/trackData/repeatMasker/versionInfo.txt" ]; then rm -f "$buildDir/${asmId}.repeatMasker.version.txt" ln -s trackData/repeatMasker/versionInfo.txt "$buildDir/${asmId}.repeatMasker.version.txt" fi if [ "${newRmsk}" -gt 0 ]; then rm -f $buildDir/bbi/${asmId}.rmsk.align.bb rm -f $buildDir/bbi/${asmId}.rmsk.bb rm -f $buildDir/${asmId}.fa.align.tsv.gz rm -f $buildDir/${asmId}.fa.join.tsv.gz if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then ln -s ../trackData/repeatMasker/${asmId}.rmsk.align.bb $buildDir/bbi/${asmId}.rmsk.align.bb ln -s trackData/repeatMasker/${asmId}.fa.align.tsv.gz $buildDir/${asmId}.fa.align.tsv.gz @@ -216,33 +228,35 @@ printf "track repeatMasker shortLabel RepeatMasker longLabel RepeatMasker Repetitive Elements type bigRmsk 9 + visibility pack group varRep bigDataUrl bbi/%s.rmsk.bb\n" "${asmId}" if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then printf "xrefDataUrl bbi/%s.rmsk.align.bb\n" "${asmId}" fi export rmskClassProfile="$buildDir/trackData/repeatMasker/$asmId.rmsk.class.profile.txt" if [ -s "${rmskClassProfile}" ]; then printf "html html/%s.repeatMasker\n\n" "${asmId}" $scriptDir/asmHubRmskJoinAlign.pl $asmId $buildDir > $buildDir/html/$asmId.repeatMasker.html - +else + printf "\n" fi -else # if [ "${newRmsk}" -eq 2 ]; then + +else # else clause of if [ "${newRmsk}" -gt 0 ] printf "track repeatMasker compositeTrack on shortLabel RepeatMasker longLabel Repeating Elements by RepeatMasker group varRep visibility dense type bigBed 6 + colorByStrand 50,50,150 150,50,50 maxWindowToDraw 10000000 spectrum on html html/%s.repeatMasker\n\n" "${asmId}" $scriptDir/asmHubRmsk.pl $asmId $buildDir/html/$asmId.names.tab $buildDir/trackData/repeatMasker/$asmId.rmsk.class.profile.txt > $buildDir/html/$asmId.repeatMasker.html @@ -342,31 +356,32 @@ bigDataUrl bbi/%s.rmsk.RNA.bb\n\n" "${asmId}" fi if [ -s ${buildDir}/trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb ]; then rm -f $buildDir/bbi/${asmId}.rmsk.Other.bb ln -s ../trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb $buildDir/bbi/${asmId}.rmsk.Other.bb printf " track repeatMaskerOther parent repeatMasker shortLabel Other longLabel Other Repeating Elements by RepeatMasker type bigBed 6 + priority 9 bigDataUrl bbi/%s.rmsk.Other.bb\n\n" "${asmId}" fi -fi # if [ "${newRmsk}" -eq 2 ]; then +fi # else clause of if [ "${newRmsk}" -gt 0 ]; then +fi # else clause of if [ "${rmskItemCount}" -lt 4 ] fi # if [ "${newRmsk}" -eq 2 -o "${rmskCount}" -gt 0 ]; then if [ -s ${buildDir}/trackData/simpleRepeat/simpleRepeat.bb ]; then rm -f $buildDir/bbi/${asmId}.simpleRepeat.bb ln -s ../trackData/simpleRepeat/simpleRepeat.bb $buildDir/bbi/${asmId}.simpleRepeat.bb printf "track simpleRepeat shortLabel Simple Repeats longLabel Simple Tandem Repeats by TRF group varRep visibility dense type bigBed 4 + bigDataUrl bbi/%s.simpleRepeat.bb html html/%s.simpleRepeat\n\n" "${asmId}" "${asmId}" $scriptDir/asmHubSimpleRepeat.pl $asmId $buildDir/html/$asmId.names.tab $buildDir > $buildDir/html/$asmId.simpleRepeat.html fi