77a819d426026e8a6ac3d7965af8f44fbb9a0272
hiram
  Wed Jan 10 12:42:07 2024 -0800
better manage large genome construction and add in RepeatModeler track refs #29545

diff --git src/hg/utils/automation/asmHubTrackDb.sh src/hg/utils/automation/asmHubTrackDb.sh
index 62d0cc9..869590e 100755
--- src/hg/utils/automation/asmHubTrackDb.sh
+++ src/hg/utils/automation/asmHubTrackDb.sh
@@ -218,31 +218,31 @@
 if [ -s "$buildDir/trackData/repeatMasker/versionInfo.txt" ]; then
    rm -f "$buildDir/${asmId}.repeatMasker.version.txt"
    ln -s trackData/repeatMasker/versionInfo.txt "$buildDir/${asmId}.repeatMasker.version.txt"
 fi
 if [ -s "$buildDir/trackData/repeatModeler/${asmId}-families.fa" ]; then
    rm -f "$buildDir/${asmId}.rmsk.customLib.fa.gz"
    cp -p "$buildDir/trackData/repeatModeler/${asmId}-families.fa" "$buildDir/${asmId}.rmsk.customLib.fa"
    gzip "$buildDir/${asmId}.rmsk.customLib.fa"
 fi
 
 if [ "${newRmsk}" -gt 0 ]; then
   rm -f $buildDir/bbi/${asmId}.rmsk.align.bb
   rm -f $buildDir/bbi/${asmId}.rmsk.bb
   rm -f $buildDir/${asmId}.fa.align.tsv.gz
   rm -f $buildDir/${asmId}.fa.join.tsv.gz
-  if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then
+  if [ -s "$buildDir/trackData/repeatMasker/${asmId}.rmsk.align.bb" ]; then
     ln -s ../trackData/repeatMasker/${asmId}.rmsk.align.bb $buildDir/bbi/${asmId}.rmsk.align.bb
     ln -s trackData/repeatMasker/${asmId}.fa.align.tsv.gz $buildDir/${asmId}.fa.align.tsv.gz
   fi
   ln -s ../trackData/repeatMasker/${asmId}.rmsk.bb $buildDir/bbi/${asmId}.rmsk.bb
   ln -s trackData/repeatMasker/${asmId}.sorted.fa.join.tsv.gz $buildDir/${asmId}.fa.join.tsv.gz
 
 printf "track repeatMasker
 shortLabel RepeatMasker
 longLabel RepeatMasker Repetitive Elements
 type bigRmsk 9 +
 visibility pack
 group varRep
 bigDataUrl bbi/%s.rmsk.bb\n" "${asmId}"
 if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then
   printf "xrefDataUrl bbi/%s.rmsk.align.bb\n" "${asmId}"
@@ -370,31 +370,108 @@
 
 if [ -s ${buildDir}/trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb ]; then
 rm -f $buildDir/bbi/${asmId}.rmsk.Other.bb
 ln -s ../trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb $buildDir/bbi/${asmId}.rmsk.Other.bb
 printf "    track repeatMaskerOther
     parent repeatMasker
     shortLabel Other
     longLabel Other Repeating Elements by RepeatMasker
     type bigBed 6 +
     priority 9
     bigDataUrl bbi/%s.rmsk.Other.bb\n\n" "${asmId}"
 fi
 
 fi	#	else clause of if [ "${newRmsk}" -gt 0 ]; then
 fi	#	else clause of if [ "${rmskItemCount}" -lt 4 ]
-fi      #       if [ "${newRmsk}" -eq 2 -o "${rmskCount}" -gt 0 ]; then
+fi      #       if [ "${newRmsk}" -gt 0 -o "${rmskCount}" -gt 0 ]; then
+
+# see if there are repeatModeler bb files
+export rModelCount=`(ls $buildDir/trackData/repeatModeler/bbi/${asmId}.rmsk.*.bb 2> /dev/null | wc -l) || true`
+export newRmodel=`(ls $buildDir/trackData/repeatModeler/${asmId}.rmsk.align.bb $buildDir/trackData/repeatModeler/${asmId}.rmsk.bb 2> /dev/null | wc -l) || true`
+
+if [ "${newRmodel}" -gt 0 -o "${rModelCount}" -gt 0 ]; then
+
+if [ ! -s "$buildDir/trackData/repeatModeler/$asmId.sorted.fa.out.gz" ]; then
+  printf "ERROR: can not find trackData/repeatModeler/$asmId.sorted.fa.out.gz\n" 1>&2
+  exit 255
+fi
+
+# see if there are actually rmsk items in the track, this has to be > 3
+export rModelItemCount=`zcat $buildDir/trackData/repeatModeler/$asmId.sorted.fa.out.gz | head | wc -l`
+
+# clean up garbage from previous errors here
+if [ "${rModelItemCount}" -lt 4 ]; then
+  rm -f $buildDir/$asmId.repeatModeler.out.gz
+  rm -f "$buildDir/${asmId}.repeatModeler.version.txt"
+  rm -f $buildDir/bbi/${asmId}.rModel.align.bb
+  rm -f $buildDir/bbi/${asmId}.rModel.bb
+  rm -f $buildDir/${asmId}.fa.rModel.align.tsv.gz
+  rm -f $buildDir/${asmId}.fa.rModel.join.tsv.gz
+  rm -f $buildDir/${asmId}.rModel.customLib.fa.gz
+else
+
+rm -f $buildDir/$asmId.repeatModeler.out.gz
+ln -s trackData/repeatModeler/$asmId.sorted.fa.out.gz $buildDir/$asmId.repeatModeler.out.gz
+if [ -s "$buildDir/trackData/repeatModeler/versionInfo.txt" ]; then
+   rm -f "$buildDir/${asmId}.repeatModeler.version.txt"
+   ln -s trackData/repeatModeler/versionInfo.txt "$buildDir/${asmId}.repeatModeler.version.txt"
+fi
+if [ -s "$buildDir/trackData/repeatModeler/${asmId}-families.fa" ]; then
+   rm -f "$buildDir/${asmId}.rmsk.customLib.fa.gz"
+   cp -p "$buildDir/trackData/repeatModeler/${asmId}-families.fa" "$buildDir/${asmId}.rmsk.customLib.fa"
+   gzip "$buildDir/${asmId}.rmsk.customLib.fa"
+fi
+
+if [ "${newRmodel}" -gt 0 ]; then
+  rm -f $buildDir/bbi/${asmId}.rModel.align.bb
+  rm -f $buildDir/bbi/${asmId}.rModel.bb
+  rm -f $buildDir/${asmId}.fa.rModel.align.tsv.gz
+  rm -f $buildDir/${asmId}.fa.rModel.join.tsv.gz
+  if [ -s "$buildDir/trackData/repeatModeler/${asmId}.rmsk.align.bb" ]; then
+    ln -s ../trackData/repeatModeler/${asmId}.rmsk.align.bb $buildDir/bbi/${asmId}.rModel.align.bb
+    ln -s trackData/repeatModeler/${asmId}.fa.align.tsv.gz $buildDir/${asmId}.fa.rModel.align.tsv.gz
+  fi
+  ln -s ../trackData/repeatModeler/${asmId}.rmsk.bb $buildDir/bbi/${asmId}.rModel.bb
+  ln -s trackData/repeatModeler/${asmId}.sorted.fa.join.tsv.gz $buildDir/${asmId}.fa.rModel.join.tsv.gz
+
+printf "track repeatModeler
+shortLabel RepeatModeler
+longLabel RepeatModeler Repetitive Elements
+type bigRmsk 9 +
+visibility pack
+group varRep
+bigDataUrl bbi/%s.rModel.bb\n" "${asmId}"
+if [ -s "$buildDir/bbi/${asmId}.rModel.align.bb" ]; then
+  printf "xrefDataUrl bbi/%s.rModel.align.bb\n" "${asmId}"
+fi
+printf "maxWindowToDraw 5000000\n"
+export rModelClassProfile="$buildDir/trackData/repeatModeler/$asmId.rmsk.class.profile.txt"
+if [ -s "${rModelClassProfile}" ]; then
+  printf "html html/%s.repeatModeler\n\n" "${asmId}"
+  $scriptDir/asmHubRmodelJoinAlign.pl $asmId $buildDir > $buildDir/html/$asmId.repeatModeler.html
+else
+  printf "\n"
+fi
+
+else	#	else clause of if [ "${newRmodel}" -gt 0 ]
+
+  printf "ERROR: expected new version of rmsk files for RepeatModeler not found\n" 1>&2
+  exit 255
+
+fi	#	else clause of if [ "${newRmodel}" -gt 0 ]; then
+fi	#	else clause of if [ "${rModelItemCount}" -lt 4 ]
+fi      #       if [ "${newRmodel}" -gt 0 -o "${rModelCount}" -gt 0 ]; then
 
 if [ -s ${buildDir}/trackData/simpleRepeat/simpleRepeat.bb ]; then
 rm -f $buildDir/bbi/${asmId}.simpleRepeat.bb
 ln -s ../trackData/simpleRepeat/simpleRepeat.bb $buildDir/bbi/${asmId}.simpleRepeat.bb
 printf "track simpleRepeat
 shortLabel Simple Repeats
 longLabel Simple Tandem Repeats by TRF
 group varRep
 visibility dense
 type bigBed 4 +
 bigDataUrl bbi/%s.simpleRepeat.bb
 html html/%s.simpleRepeat\n\n" "${asmId}" "${asmId}"
 $scriptDir/asmHubSimpleRepeat.pl $asmId $buildDir/html/$asmId.names.tab $buildDir > $buildDir/html/$asmId.simpleRepeat.html
 fi