7d0bd2b7d089f94c9a8260c417246d4dbfce2523 mspeir Sat Mar 29 18:51:22 2025 -0700 adding make docs for several lastz runs from the last 6 months or so diff --git src/hg/makeDb/doc/hg38/lastzRuns.txt src/hg/makeDb/doc/hg38/lastzRuns.txt index cba116aec5b..73a27d8497a 100644 --- src/hg/makeDb/doc/hg38/lastzRuns.txt +++ src/hg/makeDb/doc/hg38/lastzRuns.txt @@ -15264,15 +15264,340 @@ \ -target2bit="/hive/data/genomes/asmHubs/GCF/011/100/685/GCF_011100685.1/GCF_011100685.1.2bit" \ -targetSizes="/hive/data/genomes/asmHubs/GCF/011/100/685/GCF_011100685.1/GCF_011100685.1.chrom.sizes.txt" \ GCF_011100685.1 hg38) > rbest.log 2>&1 grep -w real rbest.log | sed -e 's/^/ # /;' # real 255m23.446s sed -e 's/^/ # /;' fb.GCF_011100685.1.chainRBest.Hg38.txt # 1425408450 bases of 2481983352 (57.430%) in intersection real 2300m12.938s user 0m2.731s sys 0m12.713s ############################################################################## +# LASTZ Human Hg38 vs. white-tufted-ear marmoset GCA_011100555.2 +# (DONE - 2024-11-22 - mspeir) + + mkdir /hive/data/genomes/hg38/bed/lastzGCA_011100555.2.2024-11-22 + cd /hive/data/genomes/hg38/bed/lastzGCA_011100555.2.2024-11-22 + + printf '# white-tufted-ear marmoset GCA_011100555.2 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=600 +BLASTZ_E=150 +BLASTZ_M=254 +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +# A C G T +# A 90 -330 -236 -356 +# C -330 100 -318 -236 +# G -236 -318 100 -330 +# T -356 -236 -330 90 + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: white-tufted-ear marmoset 2021-04-28 GCA_011100555.2_mCalJa1.2.pat.X +SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/011/100/555/GCA_011100555.2/GCA_011100555.2.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/011/100/555/GCA_011100555.2/GCA_011100555.2.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCA_011100555.2.2024-11-22 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCA_011100555.2_mCalJa1.2.pat.X -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=hgwdev \ + -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 118m17.235s + + sed -e 's/^/ # /;' fb.hg38.chainGCA_011100555.2Link.txt + # 2189250745 bases of 3299210039 (66.357%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCA_011100555.2Link.txt + # 2148028479 bases of 3299210039 (65.107%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCA/011/100/555/GCA_011100555.2/GCA_011100555.2.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCA/011/100/555/GCA_011100555.2/GCA_011100555.2.chrom.sizes.txt" \ + hg38 GCA_011100555.2) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 149m56.908s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCA_011100555.2.txt + # 1999972943 bases of 3299210039 (60.620%) in intersection + +real 562m50.862s +user 0m6.353s +sys 0m4.283s + +############################################################################## +# LASTZ Human Hg38 vs. olive baboon GCF_008728515.1 +# (DONE - 2024-11-22 - mspeir) + + mkdir /hive/data/genomes/hg38/bed/lastzGCF_008728515.1.2023-08-31 + cd /hive/data/genomes/hg38/bed/lastzGCF_008728515.1.2023-08-31 + + printf '# olive baboon GCF_008728515.1 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=600 +BLASTZ_E=150 +BLASTZ_M=254 +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +# A C G T +# A 90 -330 -236 -356 +# C -330 100 -318 -236 +# G -236 -318 100 -330 +# T -356 -236 -330 90 + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: olive baboon 2019-10-02 GCF_008728515.1_Panubis1.0 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/008/728/515/GCF_008728515.1/GCF_008728515.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/008/728/515/GCF_008728515.1/GCF_008728515.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCF_008728515.1.2023-08-31 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCF_008728515.1_Panubis1.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=hgwdev \ + -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 111m16.100s + + sed -e 's/^/ # /;' fb.hg38.chainGCF_008728515.1Link.txt + # 2638181355 bases of 3299210039 (79.964%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCF_008728515.1Link.txt + # 2586288619 bases of 3299210039 (78.391%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCF/008/728/515/GCF_008728515.1/GCF_008728515.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/008/728/515/GCF_008728515.1/GCF_008728515.1.chrom.sizes.txt" \ + hg38 GCF_008728515.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 109m19.490s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCF_008728515.1.txt + # 2400768727 bases of 3299210039 (72.768%) in intersection + +real 0m0.402s +user 0m0.087s +sys 0m0.154s + +############################################################################## +# LASTZ Human Hg38 vs. sooty mangabey GCF_000955945.1 +# (DONE - 2024-11-24 - mspeir) + + mkdir /hive/data/genomes/hg38/bed/lastzGCF_000955945.1.2024-11-24 + cd /hive/data/genomes/hg38/bed/lastzGCF_000955945.1.2024-11-24 + + printf '# sooty mangabey GCF_000955945.1 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=600 +BLASTZ_E=150 +BLASTZ_M=254 +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +# A C G T +# A 90 -330 -236 -356 +# C -330 100 -318 -236 +# G -236 -318 100 -330 +# T -356 -236 -330 90 + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: sooty mangabey 2015-03-19 GCF_000955945.1_Caty_1.0 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/000/955/945/GCF_000955945.1/GCF_000955945.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/000/955/945/GCF_000955945.1/GCF_000955945.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCF_000955945.1.2024-11-24 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCF_000955945.1_Caty_1.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=hgwdev \ + -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 140m41.682s + + sed -e 's/^/ # /;' fb.hg38.chainGCF_000955945.1Link.txt + # 2627114328 bases of 3299210039 (79.629%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCF_000955945.1Link.txt + # 2580198826 bases of 3299210039 (78.207%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCF/000/955/945/GCF_000955945.1/GCF_000955945.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/000/955/945/GCF_000955945.1/GCF_000955945.1.chrom.sizes.txt" \ + hg38 GCF_000955945.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 104m0.149s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCF_000955945.1.txt + # 2383807464 bases of 3299210039 (72.254%) in intersection + +real 467m35.051s +user 0m6.086s +sys 0m4.122s + +############################################################################## +# LASTZ Human Hg38 vs. pig-tailed macaque GCF_000956065.1 +# (DONE - 2024-11-24 - mspeir) + + mkdir /hive/data/genomes/hg38/bed/lastzGCF_000956065.1.2024-11-24 + cd /hive/data/genomes/hg38/bed/lastzGCF_000956065.1.2024-11-24 + + printf '# pig-tailed macaque GCF_000956065.1 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=600 +BLASTZ_E=150 +BLASTZ_M=254 +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +# A C G T +# A 90 -330 -236 -356 +# C -330 100 -318 -236 +# G -236 -318 100 -330 +# T -356 -236 -330 90 + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: pig-tailed macaque 2015-03-19 GCF_000956065.1_Mnem_1.0 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/000/956/065/GCF_000956065.1/GCF_000956065.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/000/956/065/GCF_000956065.1/GCF_000956065.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCF_000956065.1.2024-11-24 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCF_000956065.1_Mnem_1.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=hgwdev \ + -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 140m45.442s + + sed -e 's/^/ # /;' fb.hg38.chainGCF_000956065.1Link.txt + # 2617335207 bases of 3299210039 (79.332%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCF_000956065.1Link.txt + # 2559419006 bases of 3299210039 (77.577%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCF/000/956/065/GCF_000956065.1/GCF_000956065.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/000/956/065/GCF_000956065.1/GCF_000956065.1.chrom.sizes.txt" \ + hg38 GCF_000956065.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 102m13.608s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCF_000956065.1.txt + # 2376330049 bases of 3299210039 (72.027%) in intersection + +real 471m16.938s +user 0m6.847s +sys 0m3.999s + +############################################################################## +# LASTZ Human Hg38 vs. dog GCF_014441545.1 +# (DONE - 2024-11-24 - mspeir) + + mkdir /hive/data/genomes/hg38/bed/lastzGCF_014441545.1.2024-11-24 + cd /hive/data/genomes/hg38/bed/lastzGCF_014441545.1.2024-11-24 + + printf '# dog GCF_014441545.1 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: dog 2020-09-03 GCF_014441545.1_ROS_Cfam_1.0 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/014/441/545/GCF_014441545.1/GCF_014441545.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/014/441/545/GCF_014441545.1/GCF_014441545.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCF_014441545.1.2024-11-24 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCF_014441545.1_ROS_Cfam_1.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=hgwdev \ + -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 518m26.232s + + sed -e 's/^/ # /;' fb.hg38.chainGCF_014441545.1Link.txt + # 1585328629 bases of 3299210039 (48.052%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCF_014441545.1Link.txt + # 1518416772 bases of 3299210039 (46.024%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCF/014/441/545/GCF_014441545.1/GCF_014441545.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCF/014/441/545/GCF_014441545.1/GCF_014441545.1.chrom.sizes.txt" \ + hg38 GCF_014441545.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 252m7.964s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCF_014441545.1.txt + +real 1208m0.275s +user 0m2.919s +sys 0m3.733s