src/hg/makeDb/doc/tiberius.txt 02f4db8b8cb4a5fce01f05c91f2dd48bb4e4b1ac

02f4db8b8cb4a5fce01f05c91f2dd48bb4e4b1ac
hiram
  Tue May 20 15:05:30 2025 -0700
indicate how the alpha version can be seen on genome-test refs #34917

diff --git src/hg/makeDb/doc/tiberius.txt src/hg/makeDb/doc/tiberius.txt
index e36b2884e4e..62ebc4f6821 100644
--- src/hg/makeDb/doc/tiberius.txt
+++ src/hg/makeDb/doc/tiberius.txt
@@ -1,244 +1,256 @@
 #############################################################################
 #  procedure for adding 'tiberius' gene prediction tracks to genark
 #############################################################################
 
 #############################################################################
 ### fetch tiberius predictions - (DONE - Hiram - 2025-02-01)
 #############################################################################
 
 mkdir -p /hive/data/outside/genark/tiberius
 cd /hive/data/outside/genark/tiberius
 
 ### credential file obtained from Mario Stanke in email
 
 rsync --password-file=.tiberius.credential -av -P \
    rsync://hiram@bioinf.uni-greifswald.de/tiberius/ ./2025-02-01/
 
 #############################################################################
 ### add html and trackDb definitions as obtained from Mario
 ###     (DONE - Hiram - 2025-05-09)
 #############################################################################
 
 cat << '__EOF__' > 2025-02-01/Tiberius.html
 <h2 id="description">UCSC Tiberius Track</h2>
 <p>The protein-coding genes were predicted with Tiberius in <i>ab initio</i> mode. The soft-masked genome was input only. The command was:</p>
 <code>tiberius.py --genome genome.fa --out tiberius.gtf</code>
 <p><a href="https://bioinf.uni-greifswald.de/bioinf/tiberius/genes/tib-tbl.html">Table with predicted coordinates, protein sequences and coding sequences of all mammals.</a></p>
 <p>Download code and see accuracy statistics on the <a href="https://github.com/Gaius-Augustus/Tiberius">Tiberius GitHub page</a>.</p>
 <p>Tiberius is a deep learning model that combines a HMM layer with other sequence-to-sequence models (convolutional neural networks, LSTM).</p>
 <p>Tiberius was trained on 32 mammalian genomes that did not include any <i>Hominidae</i> (see supplements of below preprint).</p>
 <h2 id="credits">Contact</h2>
 <p>Questions should be directed to <a href="mailto:lars.gabriel@uni-greifswald.de">Lars Gabriel</a> or <a href="mailto:mario.stanke@uni-greifswald.de">Mario Stanke</a>.</p>
 <h2 id="reference">Reference</h2>
 <a href="https://academic.oup.com/bioinformatics/article/40/12/btae685/7903281">Tiberius: End-to-End Deep Learning with an HMM for Gene Prediction. Lars Gabriel, Felix Becker, Katharina J. Hoff and Mario Stanke, <i>Bioinformatics</i> 2024;</a>, https://doi.org/10.1093/bioinformatics/btae685
 __EOF__
 
 cat << '__EOF__' > 2025-02-01/Tiberius.html
 track Tiberius
 bigDataUrl contrib/tiberius/tiberius.bigGenePred.bb
 shortLabel Tiberius genes
 longLabel Tiberius ab initio gene prediction
 type bigGenePred
 visibility pack
 color 0,102,204
 type bigGenePred
 html contrib/tiberius/Tiberius.html
 group genes
 dataVersion Tiberius version 2025-01-07
 baseColorDefault genomicCodons
 __EOF__
 
 ### And these two files are checked into the source tree in:
 
     makeDb/trackDb/contrib/tiberius/Tiberius.html
     makeDb/trackDb/contrib/tiberius/tiverius.trackDb.txt
 
 #############################################################################
 ### identify corresponding assemblies
 #############################################################################
 
 find ./2025-02-01 -type f | grep '/bb/' | awk -F$'/' '{print $NF}' \
    | sed -e 's/.bb//;' | sort -u > tiberius.2025-02-01.accession.list
 
 grep -F -f tiberius.2025-02-01.accession.list \
    $HOME/kent/src/hg/makeDb/doc/asmHubs/master.run.list \
      | cut -d' ' -f2 | sort -u > to.link.list
 
 ### looks like 701 of them match:
 
 wc -l tiberius.2025-02-01.accession.list to.link.list
  1317 tiberius.2025-02-01.accession.list
   701 to.link.list
 
 #############################################################################
 ### setup the link script to get these files into the genark build hierarchy
 #############################################################################
 
 cat << '__EOF__' > linkOne.sh
 #!/bin/bash
 
 set -beEu -o pipefail
 
 export TOP="/hive/data/outside/genark/tiberius"
 
 export asmId="${1}"
 export acc=`echo $asmId | cut -d'_' -f1-2`
 export gcX="${asmId:0:3}"
 export d0="${asmId:4:3}"
 export d1="${asmId:7:3}"
 export d2="${asmId:10:3}"
 
 export gbkRef="genbankBuild"
 
 if [[ "$gcX" == GCF ]]; then
     gbkRef="refseqBuild"
 fi
 
 export buildDir="/hive/data/genomes/asmHubs/${gbkRef}/${gcX}/${d0}/${d1}/${d2}/${asmId}"
 if [ ! -d "${buildDir}" ]; then
   printf "ERROR: can not find build directory:\n%s\n" "${buildDir}" 1>&2
   exit 255
 fi
 export buildTrackDb="${buildDir}/${asmId}.trackDb.txt"
 if [ ! -s "${buildTrackDb}" ]; then
   printf "ERROR: can not find build trackDb:\n%s\n" "${buildTrackDb}" 1>&2
   exit 255
 fi
 
 export destDir="${buildDir}/contrib/tiberius"
 
 export destDir="${buildDir}/contrib/tiberius"
 if [ -d "${destDir}" ]; then
    printf "DONE: %s\n" "${asmId}"
    exit 0
 fi
 
 mkdir -p "${destDir}"
 
 export tiberiusVer="/gbdb/genark/contribTracks/tiberius/2025-02-01"
 export destLink="${destDir}/tiberius.bigGenePred.bb"
 export destTrackDb="${destDir}/tiberius.trackDb.txt"
 export destHtml="${destDir}/Tiberius.html"
 
 export srcHtml="${TOP}/2025-02-01/Tiberius.html"
 export srcTrackDb="${TOP}/2025-02-01/tiberius.trackDb.txt"
 export srcCount=`ls  ${TOP}/2025-02-01/*/bb/${acc}.bb | wc -l`
 if [ "${srcCount}" -eq 1 ]; then
   export srcFile=`ls  ${TOP}/2025-02-01/*/bb/${acc}.bb`
   rm -f "${destLink}" "${destTrackDb}" "${destHtml}"
   printf "ln -s $srcFile $destLink\n"
   ln -s $srcFile $destLink
   ln -s $srcHtml $destHtml
   ln -s $srcTrackDb $destDir
   cat "${buildTrackDb}" ${buildDir}/contrib/*/*.trackDb.txt \
     > "${buildDir}/alpha.trackDb.txt"
   sed -e 's/genomesFile genomes.txt/useOneFile on/; /trackDb trackDb.txt/d; s/^genome /\ngenome /;' \
     ${buildDir}/$asmId.hub.txt \
     ${buildDir}/$asmId.genomes.txt > "${buildDir}/alpha.hub.txt"
   printf "\n" >> "${buildDir}/alpha.hub.txt"
   cat "${buildTrackDb}" >> "${buildDir}/alpha.hub.txt"
   cat ${buildDir}/contrib/*/*.trackDb.txt >> "${buildDir}/alpha.hub.txt"
 else
   printf "ERROR: can not find source file at\n" 1>&2
   printf "%s\n" "${TOP}/2025-02-01/*/bb/${acc}.bb"
   exit 255
 fi
 
 exit $?
 __EOF__
 
 chmod +x linkOne.sh
 
 #############################################################################
 ### make all the symlinks
 #############################################################################
 
 for S in `cat to.link.list`
 do
   ./linkOne.sh "${S}" 2>&1
 done > link.log
 
 #############################################################################
 ### with the links in place, the tracks will get into the genark
 ### assemblies with the usual GenArk build procedure
 #############################################################################
 ### for example, the 'primates':
 
 cd ~kent/src/hg/makeDb/doc/primatesAsmHub
 
 time (make) > dbg 2>&1
 ### verify no errors:
     egrep -i "fail|error|missing|cannot|clade|class|real" dbg
 ### if good, verify on download:
 time (make verifyTestDownload) >> test.down.log 2>&1
 ### verify no errors:
     egrep -i "fail|error|missing|cannot|clade|class|real|check" test.down.log
 time (make sendDownload) >> send.down.log 2>&1
 ### verify no errors:
     egrep -i "fail|error|missing|cannot|clade|class|real" send.down.log
 time (make verifyDownload) >> verify.down.log 2>&1
 ### verify no errors:
     egrep -i "fail|error|missing|cannot|clade|class|real|check" send.down.log
 
 ### all of the genark 'clades' can be done in one go:
 
 #!/bin/bash
 
 runOne() {
   clade="${1}"
   cd "../${clade}AsmHub"
   printf "%s sleep %d\n" "${clade}" "${rand}"
   time (make) > dbg 2>&1
   time (make verifyTestDownload) >> test.down.log 2>&1
   printf "# from the make in ../${clade}AsmHub\n"
   egrep -w -i "fail|error|missing|cannot|clade|class|real" dbg | egrep -v "unclassified"
   grep check test.down.log | tail -3
   printf "#### done with ${clade}\n"
 }
 
 for C in primates plants invertebrate legacy \
  birds fish fungi mammals vertebrate viral bacteria
 do
   runOne "${C}" &
 done
 
 printf "waiting . . .\n"
 wait
 printf ". . . exit\n"
 exit $?
 
+#############################################################################
+### viewing the 'alpha' release version on genome-test
+#############################################################################
+
+### it is possible to view the 'alpha' release version on genome-test
+### the linkOne.sh script above created this alpha.hub.txt file:
+
+  https://genome-test.gi.ucsc.edu/cgi-bin/hgTracks?genome=GCA_000001905.1&hubUrl=/gbdb/genark/GCA/000/001/905/GCA_000001905.1/alpha.hub.txt
+
+### perhaps a ReWrite rule on the apache on hgwdev could make this viewing
+### easier than this complicated path.
+
 #############################################################################
 ### beta and public release control
 #############################################################################
 
 ### the release of this track is controlled by two files in trackDb:
 
    makeDb/trackDb/betaGenArk.txt
    makeDb/trackDb/publicGenArk.txt
 
 ### cat publicGenArk.txt betaGenArk.txt
 # the listing in this file triggers the building of the public.hub.txt
 # file in the genark system.  Any contrib project listed here will be included
 # contrib track name: tracks found in <buildDir>/contrib/<thisName>/
 tiberius
 # the listing in this file triggers the building of the beta.hub.txt
 # file in the genark system.  Any contrib project listed here will be included
 # contrib track name: tracks found in <buildDir>/contrib/<thisName>/
 tiberius
 
 #############################################################################
 ### the daily cron jobs will correctly get all the files out to our
 ### mirror sites into the /gbdb/genark/ hierarchy
 #############################################################################
 
 ### current cron job is in Hiram's hgwdev account, this should be moved
 ### to the 'otto' user.
 
 # push out the /gbdb/hubs/GC[AF]/ hierarchy to:
 03 01 * * * /hive/data/inside/genArk/pushRR.sh
 
 ### on the Asia node, it is a pull script in the qateam account:
 #  pull down the /gbdb/hubs/GC[AF]/ files from hgwdev daily
 02 16 * * * ~/cronScripts/pullHgwdev.sh
 
 #############################################################################