77a819d426026e8a6ac3d7965af8f44fbb9a0272 hiram Wed Jan 10 12:42:07 2024 -0800 better manage large genome construction and add in RepeatModeler track refs #29545 diff --git src/hg/utils/automation/asmHubTrackDb.sh src/hg/utils/automation/asmHubTrackDb.sh index 62d0cc9..869590e 100755 --- src/hg/utils/automation/asmHubTrackDb.sh +++ src/hg/utils/automation/asmHubTrackDb.sh @@ -218,31 +218,31 @@ if [ -s "$buildDir/trackData/repeatMasker/versionInfo.txt" ]; then rm -f "$buildDir/${asmId}.repeatMasker.version.txt" ln -s trackData/repeatMasker/versionInfo.txt "$buildDir/${asmId}.repeatMasker.version.txt" fi if [ -s "$buildDir/trackData/repeatModeler/${asmId}-families.fa" ]; then rm -f "$buildDir/${asmId}.rmsk.customLib.fa.gz" cp -p "$buildDir/trackData/repeatModeler/${asmId}-families.fa" "$buildDir/${asmId}.rmsk.customLib.fa" gzip "$buildDir/${asmId}.rmsk.customLib.fa" fi if [ "${newRmsk}" -gt 0 ]; then rm -f $buildDir/bbi/${asmId}.rmsk.align.bb rm -f $buildDir/bbi/${asmId}.rmsk.bb rm -f $buildDir/${asmId}.fa.align.tsv.gz rm -f $buildDir/${asmId}.fa.join.tsv.gz - if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then + if [ -s "$buildDir/trackData/repeatMasker/${asmId}.rmsk.align.bb" ]; then ln -s ../trackData/repeatMasker/${asmId}.rmsk.align.bb $buildDir/bbi/${asmId}.rmsk.align.bb ln -s trackData/repeatMasker/${asmId}.fa.align.tsv.gz $buildDir/${asmId}.fa.align.tsv.gz fi ln -s ../trackData/repeatMasker/${asmId}.rmsk.bb $buildDir/bbi/${asmId}.rmsk.bb ln -s trackData/repeatMasker/${asmId}.sorted.fa.join.tsv.gz $buildDir/${asmId}.fa.join.tsv.gz printf "track repeatMasker shortLabel RepeatMasker longLabel RepeatMasker Repetitive Elements type bigRmsk 9 + visibility pack group varRep bigDataUrl bbi/%s.rmsk.bb\n" "${asmId}" if [ -s "$buildDir/bbi/${asmId}.rmsk.align.bb" ]; then printf "xrefDataUrl bbi/%s.rmsk.align.bb\n" "${asmId}" @@ -370,31 +370,108 @@ if [ -s ${buildDir}/trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb ]; then rm -f $buildDir/bbi/${asmId}.rmsk.Other.bb ln -s ../trackData/repeatMasker/bbi/${asmId}.rmsk.Other.bb $buildDir/bbi/${asmId}.rmsk.Other.bb printf " track repeatMaskerOther parent repeatMasker shortLabel Other longLabel Other Repeating Elements by RepeatMasker type bigBed 6 + priority 9 bigDataUrl bbi/%s.rmsk.Other.bb\n\n" "${asmId}" fi fi # else clause of if [ "${newRmsk}" -gt 0 ]; then fi # else clause of if [ "${rmskItemCount}" -lt 4 ] -fi # if [ "${newRmsk}" -eq 2 -o "${rmskCount}" -gt 0 ]; then +fi # if [ "${newRmsk}" -gt 0 -o "${rmskCount}" -gt 0 ]; then + +# see if there are repeatModeler bb files +export rModelCount=`(ls $buildDir/trackData/repeatModeler/bbi/${asmId}.rmsk.*.bb 2> /dev/null | wc -l) || true` +export newRmodel=`(ls $buildDir/trackData/repeatModeler/${asmId}.rmsk.align.bb $buildDir/trackData/repeatModeler/${asmId}.rmsk.bb 2> /dev/null | wc -l) || true` + +if [ "${newRmodel}" -gt 0 -o "${rModelCount}" -gt 0 ]; then + +if [ ! -s "$buildDir/trackData/repeatModeler/$asmId.sorted.fa.out.gz" ]; then + printf "ERROR: can not find trackData/repeatModeler/$asmId.sorted.fa.out.gz\n" 1>&2 + exit 255 +fi + +# see if there are actually rmsk items in the track, this has to be > 3 +export rModelItemCount=`zcat $buildDir/trackData/repeatModeler/$asmId.sorted.fa.out.gz | head | wc -l` + +# clean up garbage from previous errors here +if [ "${rModelItemCount}" -lt 4 ]; then + rm -f $buildDir/$asmId.repeatModeler.out.gz + rm -f "$buildDir/${asmId}.repeatModeler.version.txt" + rm -f $buildDir/bbi/${asmId}.rModel.align.bb + rm -f $buildDir/bbi/${asmId}.rModel.bb + rm -f $buildDir/${asmId}.fa.rModel.align.tsv.gz + rm -f $buildDir/${asmId}.fa.rModel.join.tsv.gz + rm -f $buildDir/${asmId}.rModel.customLib.fa.gz +else + +rm -f $buildDir/$asmId.repeatModeler.out.gz +ln -s trackData/repeatModeler/$asmId.sorted.fa.out.gz $buildDir/$asmId.repeatModeler.out.gz +if [ -s "$buildDir/trackData/repeatModeler/versionInfo.txt" ]; then + rm -f "$buildDir/${asmId}.repeatModeler.version.txt" + ln -s trackData/repeatModeler/versionInfo.txt "$buildDir/${asmId}.repeatModeler.version.txt" +fi +if [ -s "$buildDir/trackData/repeatModeler/${asmId}-families.fa" ]; then + rm -f "$buildDir/${asmId}.rmsk.customLib.fa.gz" + cp -p "$buildDir/trackData/repeatModeler/${asmId}-families.fa" "$buildDir/${asmId}.rmsk.customLib.fa" + gzip "$buildDir/${asmId}.rmsk.customLib.fa" +fi + +if [ "${newRmodel}" -gt 0 ]; then + rm -f $buildDir/bbi/${asmId}.rModel.align.bb + rm -f $buildDir/bbi/${asmId}.rModel.bb + rm -f $buildDir/${asmId}.fa.rModel.align.tsv.gz + rm -f $buildDir/${asmId}.fa.rModel.join.tsv.gz + if [ -s "$buildDir/trackData/repeatModeler/${asmId}.rmsk.align.bb" ]; then + ln -s ../trackData/repeatModeler/${asmId}.rmsk.align.bb $buildDir/bbi/${asmId}.rModel.align.bb + ln -s trackData/repeatModeler/${asmId}.fa.align.tsv.gz $buildDir/${asmId}.fa.rModel.align.tsv.gz + fi + ln -s ../trackData/repeatModeler/${asmId}.rmsk.bb $buildDir/bbi/${asmId}.rModel.bb + ln -s trackData/repeatModeler/${asmId}.sorted.fa.join.tsv.gz $buildDir/${asmId}.fa.rModel.join.tsv.gz + +printf "track repeatModeler +shortLabel RepeatModeler +longLabel RepeatModeler Repetitive Elements +type bigRmsk 9 + +visibility pack +group varRep +bigDataUrl bbi/%s.rModel.bb\n" "${asmId}" +if [ -s "$buildDir/bbi/${asmId}.rModel.align.bb" ]; then + printf "xrefDataUrl bbi/%s.rModel.align.bb\n" "${asmId}" +fi +printf "maxWindowToDraw 5000000\n" +export rModelClassProfile="$buildDir/trackData/repeatModeler/$asmId.rmsk.class.profile.txt" +if [ -s "${rModelClassProfile}" ]; then + printf "html html/%s.repeatModeler\n\n" "${asmId}" + $scriptDir/asmHubRmodelJoinAlign.pl $asmId $buildDir > $buildDir/html/$asmId.repeatModeler.html +else + printf "\n" +fi + +else # else clause of if [ "${newRmodel}" -gt 0 ] + + printf "ERROR: expected new version of rmsk files for RepeatModeler not found\n" 1>&2 + exit 255 + +fi # else clause of if [ "${newRmodel}" -gt 0 ]; then +fi # else clause of if [ "${rModelItemCount}" -lt 4 ] +fi # if [ "${newRmodel}" -gt 0 -o "${rModelCount}" -gt 0 ]; then if [ -s ${buildDir}/trackData/simpleRepeat/simpleRepeat.bb ]; then rm -f $buildDir/bbi/${asmId}.simpleRepeat.bb ln -s ../trackData/simpleRepeat/simpleRepeat.bb $buildDir/bbi/${asmId}.simpleRepeat.bb printf "track simpleRepeat shortLabel Simple Repeats longLabel Simple Tandem Repeats by TRF group varRep visibility dense type bigBed 4 + bigDataUrl bbi/%s.simpleRepeat.bb html html/%s.simpleRepeat\n\n" "${asmId}" "${asmId}" $scriptDir/asmHubSimpleRepeat.pl $asmId $buildDir/html/$asmId.names.tab $buildDir > $buildDir/html/$asmId.simpleRepeat.html fi