098e7866ab59c5c6b0c4c77919350d637894d113 markd Wed Apr 28 00:38:42 2021 -0700 transMap V6 diff --git src/hg/makeDb/doc/transMapTracks.txt src/hg/makeDb/doc/transMapTracks.txt index 253be75..416d04d 100644 --- src/hg/makeDb/doc/transMapTracks.txt +++ src/hg/makeDb/doc/transMapTracks.txt @@ -1,131 +1,178 @@ TransMap maps transcripts from source assemblies to destination assemblies via chains. Tracks are built across current vertebrate assemblies concurrently. The concurrent build is done as the fasta files and metadata tables for the source transcripts are shared and stored in hgFixed/gbdb. Modifications will be made to allow incrementally add destination genomes. TransMap build is based on the pslMap and other tools in the kent/src tree. The driver for generating the data is available in hg clone /cluster/home/markd/compbio/repos/hg/transMap it also requires hg clone /cluster/home/markd/compbio/repos/hg/pycbio The source is checkout and run in /hive/data/inside/transMap/build General steps for setup, only done once: cd /hive/data/inside/transMap/build hg clone /cluster/home/markd/compbio/repos/hg/transMap mkdir transMap/extern cd transMap/extern hg clone /cluster/home/markd/compbio/repos/hg/pycbio (cd pycbio && make) ############################################################################################## # TransMap V3 tracks (2014-12-18 markd) ## # get data and setup batch make getData make batchSetup # build mappings ssk kk para create -batch=para1/aln para1/aln/jobs.para para try -batch=para1/aln para push -batch=para1/aln -maxPush=1000000 para create -batch=para1/filt para1/filt/jobs.para para try -batch=para1/filt para push -batch=para1/filt -maxPush=1000000 # load and verify data ssh hgwdev make -j 8 loadDb make runJoinerCheck # create push.list which has all tables for databases active on RR or hgwbeta make pushList # create template for hg/makeDb/doc that points to this file and append it to all # database in this build: destDbs=$(make printDestDbs) cd ~kent/src/hg/makeDb/doc for db in $destDbs ; do cat /hive/data/inside/transMap/build/transMap/builds/vertebrate.2014-12-11.V3/makedb.doc.template >> $db.txt; done # request push of tracks # note: was never made public ############################################################################################## # TransMap V4 tracks (2017-02-10 markd) ## - This is the build using bigTransMap format (derived from bigPsl), not mysql tracks. Build code is here: /cluster/home/markd/compbio/repos/git/transMap.git which also requires /cluster/home/markd/compbio/repos/git/pycbio.git git@github.com:diekhans/pipettor.git - Build process: tmdir=~/compbio/projs/transMap/transMap-dev mkdir -p /hive/data/outside/transMap/V4/logs pushd /hive/data/outside/transMap/V4 (time nice ${tmdir}/bin/luigiTransMap --workers=32 --logConfFile=${tmdir}/etc/logging.conf ${tmdir}/etc/transMapBuildConf.py) >&logs/2017-01-12.log & # takes maybe 10-14 hours, at the end, check for parasol jobs # that are lost (using one or two stranglers for a few batches), then do para stop -batch=/the/batch/name para push -batch=/the/batch/name # now wait for luigiTransMap to complete. Must manually check log, as # current luigi doesn't exit non-zero if there were errors # create symlinks to /gbdb for bigPsl files ${tmdir}/bin/linkToGbdb ${tmdir}/etc/transMapBuildConf.py # add trackDb entries at top level by editing: src/hg/makeDb/trackDb/trackDb.transMap.ra # create push lists for databases and gbdb files for genomes on hgwbeta: ${tmdir}/bin/mkPushList ${tmdir}/etc/transMapBuildConf.py db.push.lst gbdb.push.lst # and creating the following html files: transMapV4.html transMapEnsemblV4.html transMapEstV4.html transMapRefseqV4.html transMapRnaV4.html ############################################################################################## # TransMap V5 tracks (2019-06-10 markd) ## - Build code is here: /cluster/home/markd/compbio/repos/git/transMap.git which also requires /cluster/home/markd/compbio/repos/git/pycbio.git git@github.com:diekhans/pipettor.git - Build process: # IMPORTANT: SET VERSION IN transMap-dev/etc/transMapBuildConf.py tmdir=~/compbio/projs/transMap/transMap-dev mkdir -p /hive/data/inside/transMap/V5/logs pushd /hive/data/inside/transMap/V5 (time nice ${tmdir}/bin/luigiTransMap --workers=64 --logConfFile=${tmdir}/etc/logging.conf ${tmdir}/etc/transMapBuildConf.py) >&logs/2019-06-24.log & # takes maybe a day or more, at the end, check for parasol jobs # that are lost (using one or two stranglers for a few batches), then do para stop -batch=/the/batch/name para push -batch=/the/batch/name # now wait for luigiTransMap to complete. Must manually check log, as # current luigi doesn't exit non-zero if there were errors # create symlinks to /gbdb for bigPsl files ${tmdir}/bin/linkToGbdb ${tmdir}/etc/transMapBuildConf.py # add trackDb entries at top level by editing: src/hg/makeDb/trackDb/trackDb.transMap.ra # create push lists for databases and gbdb files for genomes on hgwbeta: ${tmdir}/bin/mkPushList ${tmdir}/etc/transMapBuildConf.py db.push.lst gbdb.push.lst ############################################################################################## +# TransMap V6 tracks (2021-04-26 markd) +## + - Build code is here: + /cluster/home/markd/compbio/repos/git/transMap.git + which also requires + /cluster/home/markd/compbio/repos/git/pycbio.git + git@github.com:diekhans/pipettor.git + + - Build process: + # IMPORTANT: SET VERSION IN transMap-dev/etc/transMapBuildConf.py + tmdir=~markd/compbio/projs/transMap/transMap + mkdir -p /hive/data/inside/transMap/V6/logs + pushd /hive/data/inside/transMap/V6 + # must start in foreground then background, as $(idate) is an alias that bash doesn't find + (time nice ${tmdir}/bin/luigiTransMap --workers=64 --logConfFile=${tmdir}/etc/logging.conf ${tmdir}/etc/transMapBuildConf.py) >&logs/$(idate).log + ^Z bg + + # takes maybe a day or more, at the end, check for parasol jobs + # that are lost (using one or two stranglers for a few batches), then do + para stop -batch=/the/batch/name + para push -batch=/the/batch/name + # now wait for luigiTransMap to complete. Review check log. + + # compare to stats/detailed of previous to see if all make sense + (cd ../V5/stats/detailed && find -type f) | sort >V5.stats.lst + (cd ../V6/stats/detailed && find -type f) | sort >V6.stats.lst + diff V5.stats.lst V6.stats.lst >&stats.lst.diff + + # all removed genomes have been deactivated as they have been moved to + # assembly hubs. these are not public hubs. + + # create symlinks to /gbdb for bigPsl files + ${tmdir}/bin/linkToGbdb ${tmdir}/etc/transMapBuildConf.py + + # add trackDb entries at top level by editing: + src/hg/makeDb/trackDb/trackDb.transMap.ra + # create push lists for databases and gbdb files for genomes on hgwbeta: + ${tmdir}/bin/mkPushList ${tmdir}/etc/transMapBuildConf.py db.push.lst gbdb.push.lst + + # update tracks + # add transMapV6.html + # update trackDb.transMap.ra + + cd src/hg/makeDb/trackDb + make -j 64 + +##############################################################################################