90af857c47a2063649df6d9526c94de4989f12ec hiram Tue Oct 19 12:03:52 2021 -0700 mouse and date palm lastz runs completed per user requests refs #28365 #28302 diff --git src/hg/makeDb/doc/asmHubs/lastzRuns.txt src/hg/makeDb/doc/asmHubs/lastzRuns.txt index 4db359e..85106ac 100644 --- src/hg/makeDb/doc/asmHubs/lastzRuns.txt +++ src/hg/makeDb/doc/asmHubs/lastzRuns.txt @@ -1,104 +1,347 @@ ############################################################################## # LASTZ tardigrades GCA_001949185.1 (DONE - 2021-10-06 - Gerardo) # should be able to run this from anywhere, this time it was run from: cd kent/src/hg/utils/automation time (~/kent/src/hg/utils/automation/pairLastz.sh \ GCA_001949185.1_Rvar_4.0 GCA_002082055.1_nHd_3.1 other other) \ > liftOverTest.log 2>&1 # check the total time grep -w real liftOverTest.log | tail -1 | sed -e 's/^/ # /;' # real 15m52.438s # this liftOverTest log file happens to have a copy of the make doc, as well # as the copy of the make doc left in the target assembly directory: # /hive/data/genomes/asmHubs/allBuild/GCA/001/949/185/GCA_001949185.1_Rvar_4.0/trackData/lastzGCA_002082055.1.2021-10-06/makeDoc.txt # this command outputs this makeDoc text: cat kent/src/hg/utils/automation/liftOverTest.log ############################################################################## # LASTZ tardigrades GCA_001949185.1 vs. tardigrades GCA_002082055.1 # (DONE - 2021-10-06 - Gerardo) mkdir /hive/data/genomes/asmHubs/allBuild/GCA/001/949/185/GCA_001949185.1_Rvar_4.0/trackData/lastzGCA_002082055.1.2021-10-06 cd /hive/data/genomes/asmHubs/allBuild/GCA/001/949/185/GCA_001949185.1_Rvar_4.0/trackData/lastzGCA_002082055.1.2021-10-06 printf '# tardigrades GCA_002082055.1 vs. tardigrades GCA_001949185.1 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz # TARGET: tardigrades GCA_001949185.1 SEQ1_DIR=/hive/data/genomes/asmHubs/GCA/001/949/185/GCA_001949185.1/GCA_001949185.1.2bit SEQ1_LEN=/hive/data/genomes/asmHubs/GCA/001/949/185/GCA_001949185.1/GCA_001949185.1.chrom.sizes.txt SEQ1_CHUNK=20000000 SEQ1_LAP=10000 SEQ1_LIMIT=40 # QUERY: tardigrades GCA_002082055.1 SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/002/082/055/GCA_002082055.1/GCA_002082055.1.2bit SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/002/082/055/GCA_002082055.1/GCA_002082055.1.chrom.sizes.txt SEQ2_CHUNK=20000000 SEQ2_LAP=0 SEQ2_LIMIT=100 BASE=/hive/data/genomes/asmHubs/allBuild/GCA/001/949/185/GCA_001949185.1_Rvar_4.0/trackData/lastzGCA_002082055.1.2021-10-06 TMPDIR=/dev/shm ' > DEF time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ -tAsmId GCA_001949185.1_Rvar_4.0 -qAsmId GCA_002082055.1_nHd_3.1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 grep -w real do.log | sed -e 's/^/ # /;' # real 10m31.512s sed -e 's/^/ # /;' fb.GCA_001949185.1.chainGCA_002082055.1Link.txt # 14450772 bases of 55842812 (25.878%) in intersection sed -e 's/^/ # /;' fb.GCA_001949185.1.chainSynGCA_002082055.1Link.txt # 6694962 bases of 55842812 (11.989%) in intersection time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ -target2Bit="/hive/data/genomes/asmHubs/GCA/001/949/185/GCA_001949185.1/GCA_001949185.1.2bit" \ -targetSizes="/hive/data/genomes/asmHubs/GCA/001/949/185/GCA_001949185.1/GCA_001949185.1.chrom.sizes.txt" \ -query2Bit="/hive/data/genomes/asmHubs/GCA/002/082/055/GCA_002082055.1/GCA_002082055.1.2bit" \ -querySizes="/hive/data/genomes/asmHubs/GCA/002/082/055/GCA_002082055.1/GCA_002082055.1.chrom.sizes.txt" \ GCA_001949185.1 GCA_002082055.1) > rbest.log 2>&1 grep -w real rbest.log | sed -e 's/^/ # /;' # real 1m39.278s sed -e 's/^/ # /;' fb.GCA_001949185.1.chainRBest.GCA_002082055.1.txt # 12952644 bases of 55842812 (23.195%) in intersection ### and for the swap cd /hive/data/genomes/asmHubs/allBuild/GCA/002/082/055/GCA_002082055.1_nHd_3.1/trackData/blastz.GCA_001949185.1.swap time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \ -tAsmId GCA_001949185.1_Rvar_4.0 -qAsmId GCA_002082055.1_nHd_3.1 /hive/data/genomes/asmHubs/allBuild/GCA/001/949/185/GCA_001949185.1_Rvar_4.0/trackData/lastzGCA_002082055.1.2021-10-06/DEF -swapDir=`pwd` \ -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 grep -w real swap.log | sed -e 's/^/ # /;' # real 2m1.276s sed -e 's/^/ # /;' fb.GCA_002082055.1.chainGCA_001949185.1Link.txt # 15159345 bases of 104154999 (14.555%) in intersection sed -e 's/^/ # /;' fb.GCA_002082055.1.chainSynGCA_001949185.1Link.txt # 7022280 bases of 104154999 (6.742%) in intersection \ time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ -query2bit="/hive/data/genomes/asmHubs/GCA/001/949/185/GCA_001949185.1/GCA_001949185.1.2bit" \ -querySizes="/hive/data/genomes/asmHubs/GCA/001/949/185/GCA_001949185.1/GCA_001949185.1.chrom.sizes.txt" \ -target2bit="/hive/data/genomes/asmHubs/GCA/002/082/055/GCA_002082055.1/GCA_002082055.1.2bit" \ -targetSizes="/hive/data/genomes/asmHubs/GCA/002/082/055/GCA_002082055.1/GCA_002082055.1.chrom.sizes.txt" \ GCA_002082055.1 GCA_001949185.1) > rbest.log 2>&1 grep -w real rbest.log | sed -e 's/^/ # /;' # real 1m39.919s sed -e 's/^/ # /;' fb.GCA_002082055.1.chainRBest.GCA_001949185.1.txt # 13053514 bases of 104154999 (12.533%) in intersection ############################################################################## +# GCF_900094665.1_CAROLI_EIJ_v1.1 GCF_900095145.1_PAHARI_EIJ_v1.1 +# (DONE - 2021-10-08 - Hiram) + + # to verify the sizes of the genomes to decide on target query + # (this function could be folded into the pairLastz.sh script) + + # using the 'goto' shell function from Hiram's ~/.bashrc.hiram + goto GCF_900094665.1 + n50.pl *.sizes +# reading: GCF_900094665.1_CAROLI_EIJ_v1.1.chrom.sizes +# contig count: 3162, total size: 2553121441, one half size: 1276560720 +# cumulative N50 count contig contig size +1253753066 8 chr5 145277780 +1276560720 one half size +1376380316 9 chr10 122627250 + + + goto GCF_900095145.1 + n50.pl *.sizes +# reading: GCF_900095145.1_PAHARI_EIJ_v1.1.chrom.sizes +# contig count: 2581, total size: 2475012951, one half size: 1237506475 +# cumulative N50 count contig contig size +1192339185 8 chr7 113323912 +1237506475 one half size +1303745413 9 chr8 111406228 + + # in this case, choosing the larger genome GCF_900094665.1_CAROLI_EIJ_v1.1 + # as target + + cd /cluster/home/hiram/kent/src/hg/makeDb/doc/asmHubs + # the single command to run everything: + + time (~/kent/src/hg/utils/automation/pairLastz.sh \ + GCF_900094665.1_CAROLI_EIJ_v1.1 \ + GCF_900095145.1_PAHARI_EIJ_v1.1 mammal mammal) >> mice.lastz.log 2>&1 & + + # copy of makeDoc from /hive/data/genomes/asmHubs/refseqBuild/GCF/900/094/665/GCF_900094665.1_CAROLI_EIJ_v1.1/trackData/lastzGCF_900095145.1.2021-10-08/makeDoc.txt + +############################################################################## +# LASTZ Ryukyu mouse GCF_900094665.1 vs. shrew mouse GCF_900095145.1 +# (DONE - 2021-10-08 - hiram) + + mkdir /hive/data/genomes/asmHubs/allBuild/GCF/900/094/665/GCF_900094665.1_CAROLI_EIJ_v1.1/trackData/lastzGCF_900095145.1.2021-10-08 + cd /hive/data/genomes/asmHubs/allBuild/GCF/900/094/665/GCF_900094665.1_CAROLI_EIJ_v1.1/trackData/lastzGCF_900095145.1.2021-10-08 + + printf '# shrew mouse GCF_900095145.1 vs. Ryukyu mouse GCF_900094665.1 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz + +# TARGET: Ryukyu mouse GCF_900094665.1 +SEQ1_DIR=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.2bit +SEQ1_LEN=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.chrom.sizes.txt +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: shrew mouse GCF_900095145.1 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/900/095/145/GCF_900095145.1/GCF_900095145.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/900/095/145/GCF_900095145.1/GCF_900095145.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/asmHubs/allBuild/GCF/900/094/665/GCF_900094665.1_CAROLI_EIJ_v1.1/trackData/lastzGCF_900095145.1.2021-10-08 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -tAsmId GCF_900094665.1_CAROLI_EIJ_v1.1 -qAsmId GCF_900095145.1_PAHARI_EIJ_v1.1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 547m27.209s + + sed -e 's/^/ # /;' fb.GCF_900094665.1.chainGCF_900095145.1Link.txt + # 2044833501 bases of 2553121441 (80.092%) in intersection + sed -e 's/^/ # /;' fb.GCF_900094665.1.chainSynGCF_900095145.1Link.txt + # 1909243125 bases of 2553121441 (74.781%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + -target2Bit="/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.chrom.sizes.txt" \ + -query2Bit="/hive/data/genomes/asmHubs/GCF/900/095/145/GCF_900095145.1/GCF_900095145.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/900/095/145/GCF_900095145.1/GCF_900095145.1.chrom.sizes.txt" \ + GCF_900094665.1 GCF_900095145.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 844m36.808s + + sed -e 's/^/ # /;' fb.GCF_900094665.1.chainRBest.GCF_900095145.1.txt + # 1922295923 bases of 2553121441 (75.292%) in intersection + + ### and for the swap + + cd /hive/data/genomes/asmHubs/allBuild/GCF/900/095/145/GCF_900095145.1_PAHARI_EIJ_v1.1/trackData/blastz.GCF_900094665.1.swap + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \ + -tAsmId GCF_900094665.1_CAROLI_EIJ_v1.1 -qAsmId GCF_900095145.1_PAHARI_EIJ_v1.1 /hive/data/genomes/asmHubs/allBuild/GCF/900/094/665/GCF_900094665.1_CAROLI_EIJ_v1.1/trackData/lastzGCF_900095145.1.2021-10-08/DEF -swapDir=`pwd` \ + -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 + + grep -w real swap.log | sed -e 's/^/ # /;' + # real 284m47.765s + + sed -e 's/^/ # /;' fb.GCF_900095145.1.chainGCF_900094665.1Link.txt + # 2030243186 bases of 2475012951 (82.030%) in intersection + sed -e 's/^/ # /;' fb.GCF_900095145.1.chainSynGCF_900094665.1Link.txt + # 1904468244 bases of 2475012951 (76.948%) in intersection +\ time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + -query2bit="/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.chrom.sizes.txt" \ + -target2bit="/hive/data/genomes/asmHubs/GCF/900/095/145/GCF_900095145.1/GCF_900095145.1.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/GCF/900/095/145/GCF_900095145.1/GCF_900095145.1.chrom.sizes.txt" \ + GCF_900095145.1 GCF_900094665.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 798m20.989s + + sed -e 's/^/ # /;' fb.GCF_900095145.1.chainRBest.GCF_900094665.1.txt + # 1922376371 bases of 2475012951 (77.671%) in intersection + +############################################################################## +# GCF_000413155.1_DPV01 GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p +# (DONE - 2021-10-08 - Hiram) + + # to verify the sizes of the genomes to decide on target query + # (this function could be folded into the pairLastz.sh script) + + # using the 'goto' shell function from Hiram's ~/.bashrc.hiram + goto GCF_000413155.1 + n50.pl *.sizes +n50.pl *.sizes +# reading: GCF_000413155.1_DPV01.chrom.sizes +# contig count: 80317, total size: 556480649, one half size: 278240324 +# cumulative N50 count contig contig size +278069349 324 NW_008246829.1 335344 +278240324 one half size +278404638 325 NW_008246830.1 335289 + + goto GCF_009389715.1 + n50.pl *.sizes +# reading: GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p.chrom.sizes +# contig count: 2391, total size: 773189301, one half size: 386594650 +# cumulative N50 count contig contig size +385590432 18 NC_052409.1 9812533 +386594650 one half size +390318775 19 NW_024067666.1 4728343 + + # in this case, choosing the larger genome + # GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p + # as target and GCF_000413155.1_DPV01 as query + + cd /cluster/home/hiram/kent/src/hg/makeDb/doc/asmHubs + # the single command to run everything: + + time (~/kent/src/hg/utils/automation/pairLastz.sh \ + GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p \ + GCF_000413155.1_DPV01 other other) >> datePalm.lastz.log 2>&1 & + +# real 585m49.200s +# user 0m3.435s +# sys 0m6.165s + +############################################################################## +# LASTZ date palm GCF_009389715.1 vs. date palm GCF_000413155.1 (DONE - 2021-10-18 - hiram) + mkdir /hive/data/genomes/asmHubs/allBuild/GCF/009/389/715/GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p/trackData/lastzGCF_000413155.1.2021-10-18 + cd /hive/data/genomes/asmHubs/allBuild/GCF/009/389/715/GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p/trackData/lastzGCF_000413155.1.2021-10-18 + + printf '# date palm GCF_000413155.1 vs. date palm GCF_009389715.1 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz + +# TARGET: date palm GCF_009389715.1 +SEQ1_DIR=/hive/data/genomes/asmHubs/GCF/009/389/715/GCF_009389715.1/GCF_009389715.1.2bit +SEQ1_LEN=/hive/data/genomes/asmHubs/GCF/009/389/715/GCF_009389715.1/GCF_009389715.1.chrom.sizes.txt +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: date palm GCF_000413155.1 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/000/413/155/GCF_000413155.1/GCF_000413155.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/000/413/155/GCF_000413155.1/GCF_000413155.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=500 + +BASE=/hive/data/genomes/asmHubs/allBuild/GCF/009/389/715/GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p/trackData/lastzGCF_000413155.1.2021-10-18 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -tAsmId GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p -qAsmId GCF_000413155.1_DPV01 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 316m4.783s + + sed -e 's/^/ # /;' fb.GCF_009389715.1.chainGCF_000413155.1Link.txt + # 611238317 bases of 773189301 (79.054%) in intersection + sed -e 's/^/ # /;' fb.GCF_009389715.1.chainSynGCF_000413155.1Link.txt + # 426120778 bases of 773189301 (55.112%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + -target2Bit="/hive/data/genomes/asmHubs/GCF/009/389/715/GCF_009389715.1/GCF_009389715.1.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/GCF/009/389/715/GCF_009389715.1/GCF_009389715.1.chrom.sizes.txt" \ + -query2Bit="/hive/data/genomes/asmHubs/GCF/000/413/155/GCF_000413155.1/GCF_000413155.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/000/413/155/GCF_000413155.1/GCF_000413155.1.chrom.sizes.txt" \ + GCF_009389715.1 GCF_000413155.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 18m42.901s + + sed -e 's/^/ # /;' fb.GCF_009389715.1.chainRBest.GCF_000413155.1.txt + # 394043180 bases of 773189301 (50.963%) in intersection + + ### and for the swap + + cd /hive/data/genomes/asmHubs/allBuild/GCF/000/413/155/GCF_000413155.1_DPV01/trackData/blastz.GCF_009389715.1.swap + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \ + -tAsmId GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p -qAsmId GCF_000413155.1_DPV01 /hive/data/genomes/asmHubs/allBuild/GCF/009/389/715/GCF_009389715.1_palm_55x_up_171113_PBpolish2nd_filt_p/trackData/lastzGCF_000413155.1.2021-10-18/DEF -swapDir=`pwd` \ + -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 + + grep -w real swap.log | sed -e 's/^/ # /;' + # real 217m53.130s + + sed -e 's/^/ # /;' fb.GCF_000413155.1.chainGCF_009389715.1Link.txt + # 470755615 bases of 556480649 (84.595%) in intersection + sed -e 's/^/ # /;' fb.GCF_000413155.1.chainSynGCF_009389715.1Link.txt + # 386385047 bases of 556480649 (69.434%) in intersection +\ time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + -query2bit="/hive/data/genomes/asmHubs/GCF/009/389/715/GCF_009389715.1/GCF_009389715.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/009/389/715/GCF_009389715.1/GCF_009389715.1.chrom.sizes.txt" \ + -target2bit="/hive/data/genomes/asmHubs/GCF/000/413/155/GCF_000413155.1/GCF_000413155.1.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/GCF/000/413/155/GCF_000413155.1/GCF_000413155.1.chrom.sizes.txt" \ + GCF_000413155.1 GCF_009389715.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 33m7.581s + + sed -e 's/^/ # /;' fb.GCF_000413155.1.chainRBest.GCF_009389715.1.txt + # 430390896 bases of 556480649 (77.342%) in intersection + +##############################################################################