2eb7675b61bc79306e6ec54839140d2b40b31bf5 hiram Mon May 24 12:19:29 2021 -0700 canFam6 dog Tasha browser completed refs #27546 diff --git src/hg/makeDb/doc/canFam6/initialBuild.txt src/hg/makeDb/doc/canFam6/initialBuild.txt index a678442..2a3b381 100644 --- src/hg/makeDb/doc/canFam6/initialBuild.txt +++ src/hg/makeDb/doc/canFam6/initialBuild.txt @@ -742,51 +742,36 @@ mkdir /hive/data/genomes/canFam6/bed/cpgIslands cd /hive/data/genomes/canFam6/bed/cpgIslands time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \ -workhorse=hgwdev -smallClusterHub=ku canFam6) > do.log 2>&1 # real 2m52.170s cat fb.canFam6.cpgIslandExt.txt # 44591675 bases of 2312743346 (1.928%) in intersection ############################################################################## # genscan - (DONE - 2021-05-13 - Hiram) mkdir /hive/data/genomes/canFam6/bed/genscan cd /hive/data/genomes/canFam6/bed/genscan time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ -bigClusterHub=ku canFam6) > do.log 2>&1 -XXX - running - Thu May 13 10:38:58 PDT 2021 - # real 43m47.630s - -# four jobs failed, running manually on hgwdev: -./runGsBig2M.csh chr22 000 gtf/000/chr22.gtf pep/000/chr22.pep subopt/000/chr22.bed & -./runGsBig2M.csh chr15 000 gtf/000/chr15.gtf pep/000/chr15.pep subopt/000/chr15.bed & -./runGsBig2M.csh chr20 000 gtf/000/chr20.gtf pep/000/chr20.pep subopt/000/chr20.bed & -./runGsBig2M.csh chr3 000 gtf/000/chr3.gtf pep/000/chr3.pep subopt/000/chr3.bed -wait - # real 23m28.061s - - # continuing: - time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ - -continue=makeBed -bigClusterHub=ku canFam6) > makeBed.log 2>&1 - # real 0m54.356s - - cat fb.canFam6.genscan.txt - # 55250288 bases of 2337131234 (2.364%) in intersection + # real 48m34.968s - cat fb.canFam6.genscanSubopt.txt - # 48016592 bases of 2337131234 (2.055%) in intersection + sed -e 's/^/ # /;' fb.canFam6.genscan.txt + # 54159469 bases of 2312743346 (2.342%) in intersection + sed -e 's/^/ # /;' fb.canFam6.genscanSubopt.txt + # 46763127 bases of 2312743346 (2.022%) in intersection ######################################################################### # Create kluster run files (DONE - 2021-05-13 - Hiram) # numerator is canFam6 gapless bases "real" as reported by: featureBits -noRandom -noHap canFam6 gap # 58852 bases of 2310615395 (0.003%) in intersection # ^^^ # denominator is hg19 gapless bases as reported by: # featureBits -noRandom -noHap hg19 gap # 234344806 bases of 2861349177 (8.190%) in intersection # 1024 is threshold used for human -repMatch: calc \( 2310615395 / 2861349177 \) \* 1024 # ( 2310615395 / 2861349177 ) * 1024 = 826.907175 @@ -1026,35 +1011,34 @@ # etc/hgwdev.dbs etc/align.dbs git commit -m "Added canFam6 - dog refs #27546" etc/hgwdev.dbs etc/align.dbs git push make etc-update # Notify Chris Lee this is ready to go. Magic will happen. ############################################################################# # augustus gene track (DONE - 2021-05-13 - Hiram) mkdir /hive/data/genomes/canFam6/bed/augustus cd /hive/data/genomes/canFam6/bed/augustus time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \ -species=human -dbHost=hgwdev \ -workhorse=hgwdev canFam6) > do.log 2>&1 -XXX - running - Thu May 13 11:47:52 PDT 2021 - # real 189m35.455s + # real 114m7.669s - cat fb.canFam6.augustusGene.txt - # 48256052 bases of 2337131234 (2.065%) in intersection + sed -e 's/^/ # /;' fb.canFam6.augustusGene.txt + # 47489256 bases of 2312743346 (2.053%) in intersection ######################################################################### # ncbiRefSeq (DONE - 2021-05-13 - Hiram) mkdir /hive/data/genomes/canFam6/bed/ncbiRefSeq cd /hive/data/genomes/canFam6/bed/ncbiRefSeq # running step wise just to be careful time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev \ -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ GCF_000002285.5_Dog10K_Boxer_Tasha canFam6) > download.log 2>&1 # real 2m5.429s time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ -continue=process -bigClusterHub=ku -dbHost=hgwdev \ @@ -1062,94 +1046,87 @@ GCF_000002285.5_Dog10K_Boxer_Tasha canFam6) > process.log 2>&1 # real 3m31.265s time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ -continue=load -bigClusterHub=ku -dbHost=hgwdev \ -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ GCF_000002285.5_Dog10K_Boxer_Tasha canFam6) > load.log 2>&1 # real 0m47.905s cat fb.ncbiRefSeq.canFam6.txt # 88916188 bases of 2312743346 (3.845%) in intersection # add: include ../../refSeqComposite.ra alpha # to the dog/canFam6/trackDb.ra to turn on the track in the browser - # XXX 2021-05-13 - ready for this after genbank runs - featureBits -enrichment canFam6 refGene ncbiRefSeq - # refGene 0.402%, ncbiRefSeq 3.148%, both 0.402%, cover 99.90%, enrich 31.73x +# refGene 0.138%, ncbiRefSeq 3.845%, both 0.136%, cover 99.15%, enrich 25.79x featureBits -enrichment canFam6 ncbiRefSeq refGene - # ncbiRefSeq 3.148%, refGene 0.402%, both 0.402%, cover 12.76%, enrich 31.73x - +# ncbiRefSeq 3.845%, refGene 0.138%, both 0.136%, cover 3.55%, enrich 25.79x featureBits -enrichment canFam6 ncbiRefSeqCurated refGene - # ncbiRefSeqCurated 0.401%, refGene 0.402%, both 0.400%, cover 99.66%, enrich 247.79x - +# ncbiRefSeqCurated 0.152%, refGene 0.138%, both 0.133%, cover 87.51%, enrich 636.15x featureBits -enrichment canFam6 refGene ncbiRefSeqCurated - # refGene 0.402%, ncbiRefSeqCurated 0.401%, both 0.400%, cover 99.33%, enrich 247.79x + # refGene 0.138%, ncbiRefSeqCurated 0.152%, both 0.133%, cover 96.65%, enrich 636.15x ######################################################################### # LIFTOVER TO canFam5 (DONE - 2021-05-13 - Hiram) ssh hgwdev mkdir /hive/data/genomes/canFam6/bed/blat.canFam5.2021-05-13 cd /hive/data/genomes/canFam6/bed/blat.canFam5.2021-05-13 doSameSpeciesLiftOver.pl -verbose=2 \ -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam6/jkStuff/canFam6.11.ooc \ canFam6 canFam5 time (doSameSpeciesLiftOver.pl -verbose=2 \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam6/jkStuff/canFam6.11.ooc \ canFam6 canFam5) > doLiftOverToCanFam5.log 2>&1 -XXX - running - Thu May 13 11:34:24 PDT 2021 - # real 299m34.538s + # real 179m37.613s # see if the liftOver menus function in the browser from canFam6 to canFam5 ######################################################################### # LIFTOVER TO canFam4 (DONE - 2021-05-13 - Hiram) ssh hgwdev mkdir /hive/data/genomes/canFam6/bed/blat.canFam4.2021-05-13 cd /hive/data/genomes/canFam6/bed/blat.canFam4.2021-05-13 doSameSpeciesLiftOver.pl -verbose=2 \ -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam6/jkStuff/canFam6.11.ooc \ canFam6 canFam4 time (doSameSpeciesLiftOver.pl -verbose=2 \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam6/jkStuff/canFam6.11.ooc \ canFam6 canFam4) > doLiftOverToCanFam4.log 2>&1 -XXX - running - Thu May 13 11:34:24 PDT 2021 - # real 299m34.538s + # real 960m7.335s # see if the liftOver menus function in the browser from canFam6 to canFam4 ######################################################################### # LIFTOVER TO canFam3 (DONE - 2021-05-13 - Hiram) ssh hgwdev mkdir /hive/data/genomes/canFam6/bed/blat.canFam3.2021-05-13 cd /hive/data/genomes/canFam6/bed/blat.canFam3.2021-05-13 doSameSpeciesLiftOver.pl -verbose=2 \ -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam6/jkStuff/canFam6.11.ooc \ canFam6 canFam3 time (doSameSpeciesLiftOver.pl -verbose=2 \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam6/jkStuff/canFam6.11.ooc \ canFam6 canFam3) > doLiftOverToCanFam3.log 2>&1 -XXX - running - Thu May 13 11:34:24 PDT 2021 - # real 278m52.252s + # real 2695m19.425s # see if the liftOver menus function in the browser from canFam6 to canFam3 ######################################################################### # BLATSERVERS ENTRY (DONE - 2021-05-13 - Hiram) mkdir /hive/data/genomes/canFam6/dynamicBlat cd /hive/data/genomes/canFam6/dynamicBlat time gfServer -trans index canFam6.trans.gfidx ../canFam6.2bit & # real 5m27.906s time gfServer -stepSize=5 index canFam6.untrans.gfidx ../canFam6.2bit # real 3m3.944s rsync -a -P ../canFam6.2bit qateam@dynablat-01:/scratch/hubs/canFam6/ rsync -a -P canFam6.untrans.gfidx qateam@dynablat-01:/scratch/hubs/canFam6/ @@ -1185,83 +1162,77 @@ ssh hgwdev hgsql -e 'update dbDb set defaultPos="chrX:11706333-11735291" where name="canFam6";' hgcentraltest ############################################################################## # crispr whole genome (DONE - 2021-05-13 - Hiram) mkdir /hive/data/genomes/canFam6/bed/crisprAll cd /hive/data/genomes/canFam6/bed/crisprAll # the large shoulder argument will cause the entire genome to be scanned # this takes a while for a new genome to get the bwa indexing done time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \ canFam6 -tableName=crisprAll \ -fileServer=hgwdev \ -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \ - -workhorse=hgwdev) > ranges.log 2>&1 -XXX - running - Thu May 13 11:38:41 PDT 2021 + -workhorse=hgwdev) > indexFa.log 2>&1 # real 58m27.340s + # that command failed, finished the final command manually, it needed + # the gene table on the python command time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \ - -continue=guides -stop=load canFam6 augustusGene \ - -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \ + -continue=ranges canFam6 -tableName=crisprAll \ + -fileServer=hgwdev \ -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \ - -workhorse=hgwdev) > load.log 2>&1 - # zreal 6831m11.040s + -workhorse=hgwdev) > ranges.log 2>&1 + # real 8554m11.613s cat guides/run.time | sed -e 's/^/# /;' # Completed: 100 of 100 jobs -# CPU time in finished jobs: 17641s 294.01m 4.90h 0.20d 0.001 y -# IO & Wait Time: 1178s 19.64m 0.33h 0.01d 0.000 y -# Average job time: 188s 3.14m 0.05h 0.00d -# Longest finished job: 356s 5.93m 0.10h 0.00d -# Submission to last job: 362s 6.03m 0.10h 0.00d +# CPU time in finished jobs: 10613s 176.88m 2.95h 0.12d 0.000 y +# IO & Wait Time: 318s 5.30m 0.09h 0.00d 0.000 y +# Average job time: 109s 1.82m 0.03h 0.00d +# Longest finished job: 269s 4.48m 0.07h 0.00d +# Submission to last job: 272s 4.53m 0.08h 0.00d cat specScores/run.time | sed -e 's/^/# /;' -# Completed: 3079567 of 3079567 jobs -# CPU time in finished jobs: 249034274s 4150571.23m 69176.19h 2882.34d 7.897 y -# IO & Wait Time: 6571097s 109518.28m 1825.30h 76.05d 0.208 y -# Average job time: 83s 1.38m 0.02h 0.00d -# Longest finished job: 338s 5.63m 0.09h 0.00d -# Submission to last job: 288453s 4807.55m 80.13h 3.34d +# Completed: 3079460 of 3079460 jobs +# CPU time in finished jobs: 246730604s 4112176.74m 68536.28h 2855.68d 7.824 y +# IO & Wait Time: 5517084s 91951.39m 1532.52h 63.86d 0.175 y +# Average job time: 82s 1.37m 0.02h 0.00d +# Longest finished job: 160s 2.67m 0.04h 0.00d +# Submission to last job: 276754s 4612.57m 76.88h 3.20d - grep "Number of" load.log | grep Scores | grep "^#" -# Number of specScores: 231816384 -# Number of effScores: 252358865 + grep "Number of" ranges.log | grep Scores | grep "^#" +# Number of specScores: 231915228 +# Number of effScores: 253445228 cat effScores/run.time | sed -e 's/^/# /;' -# Completed: 25231 of 25231 jobs -# CPU time in finished jobs: 12713218s 211886.96m 3531.45h 147.14d 0.403 y -# IO & Wait Time: 150199s 2503.32m 41.72h 1.74d 0.005 y -# Average job time: 510s 8.50m 0.14h 0.01d -# Longest finished job: 6617s 110.28m 1.84h 0.08d -# Submission to last job: 14126s 235.43m 3.92h 0.16d +# Completed: 25340 of 25340 jobs +# CPU time in finished jobs: 13149467s 219157.78m 3652.63h 152.19d 0.417 y +# IO & Wait Time: 145678s 2427.97m 40.47h 1.69d 0.005 y +# Average job time: 525s 8.74m 0.15h 0.01d +# Longest finished job: 97897s 1631.62m 27.19h 1.13d +# Submission to last job: 110740s 1845.67m 30.76h 1.28d cat offTargets/run.time | sed -e 's/^/# /;' -# Completed: 153979 of 153979 jobs -# CPU time in finished jobs: 1739935s 28998.91m 483.32h 20.14d 0.055 y -# IO & Wait Time: 2672538s 44542.31m 742.37h 30.93d 0.085 y -# Average job time: 29s 0.48m 0.01h 0.00d -# Longest finished job: 53s 0.88m 0.01h 0.00d -# Submission to last job: 4617s 76.95m 1.28h 0.05d - - time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \ - -continue=cleanup canFam6 \ - -tableName=crisprAll -fileServer=hgwdev \ - -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \ - -workhorse=hgwdev) > cleanup.log 2>&1 - # real 375m19.820s +# Completed: 153974 of 153974 jobs +# CPU time in finished jobs: 1850764s 30846.07m 514.10h 21.42d 0.059 y +# IO & Wait Time: 1927940s 32132.33m 535.54h 22.31d 0.061 y +# Average job time: 25s 0.41m 0.01h 0.00d +# Longest finished job: 64s 1.07m 0.02h 0.00d +# Submission to last job: 3954s 65.90m 1.10h 0.05d ######################################################################### # all.joiner update, downloads and in pushQ - (WORKING - 2019-11-20 - Hiram) cd $HOME/kent/src/hg/makeDb/schema # verify all the business is done for release ~/kent/src/hg/utils/automation/verifyBrowser.pl canFam6 # 71 tables in database canFam6 - Dog, Canis lupus familiaris # verified 60 tables in database canFam6, 11 extra tables, 19 optional tables # Ensembl genes 5 optional tables # chainNetRBestHg38 3 optional tables # chainNetRBestMm10 3 optional tables # chainNetSynHg38 3 optional tables # chainNetSynMm10 3 optional tables # gapOverlap 1 optional tables # tandemDups 1 optional tables @@ -1285,64 +1256,59 @@ # blatServers: canFam6 blat1b 17907 0 1 canFam6 blat1b 17906 1 0 # fixup all.joiner until this is a clean output joinerCheck -database=canFam6 -tableCoverage all.joiner joinerCheck -database=canFam6 -times all.joiner joinerCheck -database=canFam6 -keys all.joiner # when clean, check in: git commit -m 'adding rules for canFam6 refs #27546' all.joiner git push # run up a 'make alpha' in hg/hgTables to get this all.joiner file # into the hgwdev/genome-test system cd /hive/data/genomes/canFam6 time (makeDownloads.pl canFam6) > downloads.log 2>&1 - # real 15m31.624s + # real 16m39.750s # now ready for pushQ entry mkdir /hive/data/genomes/canFam6/pushQ cd /hive/data/genomes/canFam6/pushQ time ($HOME/kent/src/hg/utils/automation/makePushQSql.pl -redmineList canFam6) > canFam6.pushQ.sql 2> stderr.out - # real 11m11.758s +XXX - running - Mon May 24 11:47:51 PDT 2021 + # real 15m12.083s # remove the tandemDups and gapOverlap from the file list: sed -i -e "/tandemDups/d" redmine.canFam6.table.list sed -i -e "/Tandem Dups/d" redmine.canFam6.releaseLog.txt sed -i -e "/gapOverlap/d" redmine.canFam6.table.list sed -i -e "/Gap Overlaps/d" redmine.canFam6.releaseLog.txt # check for errors in stderr.out, some are OK, e.g.: - # WARNING: canFam6 does not have ucscToRefSeq - # WARNING: hgwdev does not have /gbdb/canFam6/ncbiRefSeq/ncbiRefSeqVersion.txt - # WARNING: hgwdev does not have /gbdb/canFam6/ncbiRefSeq/ncbiRefSeqOther.bb - # WARNING: hgwdev does not have /gbdb/canFam6/ncbiRefSeq/ncbiRefSeqOther.ix - # WARNING: hgwdev does not have /gbdb/canFam6/ncbiRefSeq/ncbiRefSeqOther.ixx - # WARNING: hgwdev does not have /gbdb/canFam6/ncbiRefSeq/seqNcbiRefSeq.rna.fa # WARNING: canFam6 does not have seq # WARNING: canFam6 does not have extFile # verify the file list does correctly match to files cat redmine.canFam6.file.list | while read L do eval ls $L > /dev/null done # should be silent, missing files will show as errors # verify database tables, how many to expect: wc -l redmine.canFam6.table.list - # 57 redmine.canFam6.table.list + # 63 redmine.canFam6.table.list # how many actual: awk -F'.' '{printf "hgsql -N %s -e '"'"'show table status like \"%s\";'"'"'\n", $1, $2}' redmine.canFam6.table.list | sh | wc -l - # 57 + # 63 # would be a smaller number actual if some were missing # add the path names to the listing files in the redmine issue # in the three appropriate entry boxes: # /hive/data/genomes/canFam6/pushQ/redmine.canFam6.file.list # /hive/data/genomes/canFam6/pushQ/redmine.canFam6.releaseLog.txt # /hive/data/genomes/canFam6/pushQ/redmine.canFam6.table.list #########################################################################