80a6b42f5687642d83715afc58c46a095610c2dc hiram Tue Apr 27 08:10:49 2021 -0700 liftOver chainNet to GCF_900094665.1 per user request and cleaning hive of crispr results refs #27344 diff --git src/hg/makeDb/doc/mm10.txt src/hg/makeDb/doc/mm10.txt index 9304e86..8296bb1 100644 --- src/hg/makeDb/doc/mm10.txt +++ src/hg/makeDb/doc/mm10.txt @@ -14941,30 +14941,35 @@ ~/kent/src/hg/utils/automation/doCrispr.pl -continue=offTargets \ -stop=offTargets -buildDir=`pwd` mm10 ensGene # Completed: 77942 of 77942 jobs # CPU time in finished jobs: 1397706s 23295.10m 388.25h 16.18d 0.044 y # IO & Wait Time: 313616s 5226.94m 87.12h 3.63d 0.010 y # Average job time: 22s 0.37m 0.01h 0.00d # Longest finished job: 35s 0.58m 0.01h 0.00d # Submission to last job: 9239s 153.98m 2.57h 0.11d ~/kent/src/hg/utils/automation/doCrispr.pl -continue=load \ -stop=load -buildDir=`pwd` mm10 ensGene # real 235m41.378s + time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \ + -continue=cleanup mm10 -fileServer=hgwdev -buildDir=`pwd` \ + -smallClusterHub=hgwdev -bigClusterHub=ku \ + -workhorse=hgwdev) > cleanup.log 2>&1 + # real 100m50.151s ########################################################################## # FIXUP broken files (working - Max and Hiram - 2018-04,05) # Max generated a new specScores.tab, add in the chrM specScores # and make a unique set in a new specScores.tab file cd /hive/data/genomes/mm10/bed/crispr.10K/uniqSpecScores printf "targetSeq\tmitSpecScore\tofftargetCount\ttargetGenomeGeneLocus\n" \ > max.withChrM.specScores.tab grep -h -v targetSeq ../specScores.max.tab ../addChrM/specScores.tab \ | $HOME/bin/x86_64/gnusort -S100G --parallel=32 -u \ >> max.withChrM.specScores.tab # real 1m39.468s @@ -17470,30 +17475,37 @@ # effScores: Completed: 27697 of 27697 jobs CPU time in finished jobs: 14348277s 239137.94m 3985.63h 166.07d 0.455 y IO & Wait Time: 150120s 2502.01m 41.70h 1.74d 0.005 y Average job time: 523s 8.72m 0.15h 0.01d Longest finished job: 1966s 32.77m 0.55h 0.02d Submission to last job: 15067s 251.12m 4.19h 0.17d # offTargets: Completed: 147394 of 147394 jobs CPU time in finished jobs: 2213680s 36894.66m 614.91h 25.62d 0.070 y IO & Wait Time: 2663355s 44389.25m 739.82h 30.83d 0.084 y Average job time: 33s 0.55m 0.01h 0.00d Longest finished job: 68s 1.13m 0.02h 0.00d + # cleaning up 2021-04-24 - Hiram + time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \ + -continue=cleanup mm10 -tableName=crisprAll -fileServer=hgwdev \ + -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \ + -workhorse=hgwdev) > cleanup.log 2>&1 + # real 430m18.499s + ######################################################################### # For ENCODE 3 tracks, see doc/encode3/mouse.txt ############################################################################## # LASTZ Gorilla gorGor6 (DONE - 2019-11-20 - Hiram) # establish a screen to control this job screen -S mm10gorGor6 mkdir /hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20 cd /hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20 printf '# mouse vs. gorilla BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz # TARGET: Mouse Mm10 @@ -18248,16 +18260,119 @@ > swap.log 2>&1 & # real 24m33.940s sed -e 's/^/ # /;' fb.xenTro10.chainMm10Link.txt # 121679610 bases of 1448461978 (8.401%) in intersection sed -e 's/^/ # /;' fb.xenTro10.chainSynMm10Link.txt # 35210769 bases of 1448461978 (2.431%) in intersection time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` xenTro10 mm10) \ > rbest.log 2>&1 & # real 372m38.637s sed -e 's/^/ # /;' fb.xenTro10.chainRBest.Mm10.txt # 58901471 bases of 1448461978 (4.066%) in intersection -######################################################################### +############################################################################## +# LASTZ Ryukyu mouse GCF_900094665.1 (DONE - 2021-04-26 - Hiram) + mkdir /hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26 + cd /hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26 + + printf '# GCF_900094665.1 Mus caroli (Ryukyu mouse) vs mm10 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=400 +BLASTZ_E=30 +BLASTZ_M=254 +# default BLASTZ_Q score matrix: +# A C G T +# A 91 -114 -31 -123 +# C -114 100 -125 -31 +# G -31 -125 100 -114 +# T -123 -31 -114 91 + +# TARGET: Mouse Mm10 +SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit +SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=10 + +# QUERY: Mus croli - Ryukyu mouse GCF_900094665.1 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26 +TMPDIR=/dev/shm +' > DEF + +export targetDb="mm10" +export asmId="GCF_900094665.1" +export gcPath="GCF/900/094/665" +cd /hive/data/genomes/$targetDb/bed/lastz${asmId}.2021-04-26 +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -syntenicNet -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 +cat fb.${targetDb}.chain.${asmId}Link.txt +cat fb.${targetDb}.chainSyn.${asmId}Link.txt + +grep -w real do.log | sed -e 's/^/ # /;' + # real 207m59.745s + +sed -e 's/^/ # /;' fb.$targetDb.chain.${asmId}Link.txt + # 2303277151 bases of 2818974548 (81.706%) in intersection +sed -e 's/^/ # /;' fb.$targetDb.chainSyn.${asmId}Link.txt + # 2187910131 bases of 2818974548 (77.614%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \ +$targetDb ${asmId}) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 274m57.907s + +sed -e 's/^/ # /;' fb.$targetDb.chainRBest.$asmId.txt + # 2074680070 bases of 2818974548 (73.597%) in intersection + +# total time for all the above: + # real 482m57.733s + +####################################### +### the swap to the assembly hub +export target="mm10" +export Target="Mm10" +export query="GCF_900094665.1" +export asmId="GCF_900094665.1_CAROLI_EIJ_v1.1" +export gcPath="GCF/900/094/665" + +mkdir -p /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap +cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap + +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \ + /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1 +grep -w real swap.log | sed -e 's/^/ # /;' + # real 554m2.489s + +sed -e 's/^/ # /;' fb.${query}.chain.${Target}Link.txt + # 2116460904 bases of 2553121441 (82.897%) in intersection +sed -e 's/^/ # /;' fb.${query}.chainSyn.${Target}Link.txt + # 2081173211 bases of 2553121441 (81.515%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \ +$query $target) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 246m55.342s + +sed -e 's/^/ # /;' fb.${query}.chainRBest.${Target}.txt + # 2078102689 bases of 2553121441 (81.395%) in intersection + +# Complete run time for all the swap operation: + # real 367m14.987s + +##############################################################################