0a532517e84e7897ed50f2ea333134daaa1387c0 hiram Mon Apr 10 07:49:48 2023 -0700 one HPRC lastz run outstanding refs #30912 diff --git src/hg/makeDb/doc/hg38/hprcLastz.txt src/hg/makeDb/doc/hg38/hprcLastz.txt index b9a97d5..410eaf8 100644 --- src/hg/makeDb/doc/hg38/hprcLastz.txt +++ src/hg/makeDb/doc/hg38/hprcLastz.txt @@ -83,30 +83,214 @@ -target2bit="/hive/data/genomes/asmHubs/GCA/018/503/255/GCA_018503255.1/GCA_018503255.1.2bit" \ -targetSizes="/hive/data/genomes/asmHubs/GCA/018/503/255/GCA_018503255.1/GCA_018503255.1.chrom.sizes.txt" \ GCA_018503255.1 hg38) > rbest.log 2>&1 grep -w real rbest.log | sed -e 's/^/ # /;' # real 79m48.978s sed -e 's/^/ # /;' fb.GCA_018503255.1.chainRBest.Hg38.txt # 2833510609 bases of 3055692855 (92.729%) in intersection real 420m25.386s user 0m1.028s sys 0m1.944s ############################################################################## +# LASTZ Human Hg38 vs. human GCA_021951015.1 +# (DONE - 2023-04-09 - hiram) + + mkdir /hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29 + cd /hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29 + + printf '# human GCA_021951015.1 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=600 +BLASTZ_E=150 +BLASTZ_M=254 +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +# A C G T +# A 90 -330 -236 -356 +# C -330 100 -318 -236 +# G -236 -318 100 -330 +# T -356 -236 -330 90 + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: human 2022-02-04 GCA_021951015.1_HG002.mat.cur.20211005 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCA_021951015.1_HG002.mat.cur.20211005 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 683m39.622s + + sed -e 's/^/ # /;' fb.hg38.chainGCA_021951015.1Link.txt + # 3023153297 bases of 3272116950 (92.391%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCA_021951015.1Link.txt + # 3019104161 bases of 3272116950 (92.268%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.chrom.sizes.txt" \ + hg38 GCA_021951015.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 80m40.540s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCA_021951015.1.txt + # 2838656141 bases of 3272116950 (86.753%) in intersection + + ### and for the swap + + cd /hive/data/genomes/asmHubs/allBuild/GCA/021/951/015/GCA_021951015.1_HG002.mat.cur.20211005/trackData/blastz.hg38.swap + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \ + -qAsmId GCA_021951015.1_HG002.mat.cur.20211005 /hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29/DEF -swapDir=`pwd` \ + -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1 + + grep -w real swap.log | sed -e 's/^/ # /;' + # real 133m16.011s + + sed -e 's/^/ # /;' fb.GCA_021951015.1.chainHg38Link.txt + # 2881437844 bases of 3061735012 (94.111%) in intersection + sed -e 's/^/ # /;' fb.GCA_021951015.1.chainSynHg38Link.txt + # 2872962839 bases of 3061735012 (93.834%) in intersection +\ time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -target2bit="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.chrom.sizes.txt" \ + GCA_021951015.1 hg38) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 84m40.905s + + sed -e 's/^/ # /;' fb.GCA_021951015.1.chainRBest.Hg38.txt + # 2836030662 bases of 3061735012 (92.628%) in intersection + +############################################################################## +# LASTZ Human Hg38 vs. human GCA_021950905.1 +# (DONE - 2023-04-09 - hiram) + + mkdir /hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29 + cd /hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29 + + printf '# human GCA_021950905.1 vs. Human Hg38 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +BLASTZ_T=2 +BLASTZ_O=600 +BLASTZ_E=150 +BLASTZ_M=254 +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +# A C G T +# A 90 -330 -236 -356 +# C -330 100 -318 -236 +# G -236 -318 100 -330 +# T -356 -236 -330 90 + +# TARGET: Human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 +SEQ1_LIMIT=40 + +# QUERY: human 2022-02-04 GCA_021950905.1_HG002.pat.cur.20211005 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29 +TMPDIR=/dev/shm + +' > DEF + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -qAsmId GCA_021950905.1_HG002.pat.cur.20211005 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1 + grep -w real do.log | sed -e 's/^/ # /;' + # real 588m18.846s + + sed -e 's/^/ # /;' fb.hg38.chainGCA_021950905.1Link.txt + # 2895410388 bases of 3272116950 (88.487%) in intersection + sed -e 's/^/ # /;' fb.hg38.chainSynGCA_021950905.1Link.txt + # 2889961705 bases of 3272116950 (88.321%) in intersection + + time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -query2Bit="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.chrom.sizes.txt" \ + hg38 GCA_021950905.1) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 77m20.514s + + sed -e 's/^/ # /;' fb.hg38.chainRBest.GCA_021950905.1.txt + # 2709013105 bases of 3272116950 (82.791%) in intersection + + ### and for the swap + + cd /hive/data/genomes/asmHubs/allBuild/GCA/021/950/905/GCA_021950905.1_HG002.pat.cur.20211005/trackData/blastz.hg38.swap + + time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \ + -qAsmId GCA_021950905.1_HG002.pat.cur.20211005 /hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29/DEF -swapDir=`pwd` \ + -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1 + + grep -w real swap.log | sed -e 's/^/ # /;' + # real 128m39.988s + + sed -e 's/^/ # /;' fb.GCA_021950905.1.chainHg38Link.txt + # 2752515526 bases of 2959277077 (93.013%) in intersection + sed -e 's/^/ # /;' fb.GCA_021950905.1.chainSynHg38Link.txt + # 2743116590 bases of 2959277077 (92.695%) in intersection +\ time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ + \ + -target2bit="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.chrom.sizes.txt" \ + GCA_021950905.1 hg38) > rbest.log 2>&1 + + grep -w real rbest.log | sed -e 's/^/ # /;' + # real 76m56.576s + + sed -e 's/^/ # /;' fb.GCA_021950905.1.chainRBest.Hg38.txt + # 2708210994 bases of 2959277077 (91.516%) in intersection + +############################################################################## # LASTZ Human Hg38 vs. human GCA_018506975.1 # (DONE - 2023-04-02 - hiram) mkdir /hive/data/genomes/hg38/bed/lastzGCA_018506975.1.2023-04-02 cd /hive/data/genomes/hg38/bed/lastzGCA_018506975.1.2023-04-02 printf '# human GCA_018506975.1 vs. Human Hg38 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz BLASTZ_T=2 BLASTZ_O=600 BLASTZ_E=150 BLASTZ_M=254 BLASTZ_K=4500 BLASTZ_Y=15000 BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q