4efd040664982d940ec8244cacb18724c08ac64f hiram Mon Feb 8 14:04:43 2021 -0800 add the make procedure for the assembly hub chainNet runs refs #26815 diff --git src/hg/makeDb/doc/mm39/lastzRuns.txt src/hg/makeDb/doc/mm39/lastzRuns.txt index 9cdb6db..a2860b8 100644 --- src/hg/makeDb/doc/mm39/lastzRuns.txt +++ src/hg/makeDb/doc/mm39/lastzRuns.txt @@ -1844,15 +1844,267 @@ # real 4m19.907s sed -e 's/^/ #/;' fb.danRer11.chainMm39Link.txt # 46907638 bases of 1674677181 (2.801%) in intersection sed -e 's/^/ #/;' fb.danRer11.chainSynMm39Link.txt # 6476709 bases of 1674677181 (0.387%) in intersection time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \ danRer11 mm39) > rbest.log 2>&1 # real 96m59.148s sed -e 's/^/ #/;' fb.danRer11.chainRBest.Mm39.txt # 25019759 bases of 1674677181 (1.494%) in intersection ############################################################################## +# LASTZ American beaver GCF_001984765.1 (DONE - 2021-01-22 - Hiram) + mkdir /hive/data/genomes/mm39/bed/lastzGCF_001984765.1.2021-01-22 + cd /hive/data/genomes/mm39/bed/lastzGCF_001984765.1.2021-01-22 + + printf '# GCF_001984765.1 vs mm39 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +# TARGET: Mouse Mm39 +SEQ1_DIR=/hive/data/genomes/mm39/mm39.2bit +SEQ1_LEN=/hive/data/genomes/mm39/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 + +# QUERY: American beaver GCF_001984765.1 +# GCF/001/984/765/GCF_001984765.1_C.can_genome_v1.0 +# should be the same as casCan1 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/001/984/765/GCF_001984765.1/GCF_001984765.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/001/984/765/GCF_001984765.1/GCF_001984765.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/mm39/bed/lastzGCF_001984765.1.2021-01-22 +TMPDIR=/dev/shm +' > DEF + +export targetDb="mm39" +export asmId="GCF_001984765.1" +export accessionId="${asmId}_C.can_genome_v1.0" +export gcPath="GCF/001/984/765" +cd /hive/data/genomes/${targetDb}/bed/lastz${asmId}.2021-01-22 +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 +grep -w real do.log | sed -e 's/^/ # /;' + # real 159m54.416s + + +sed -e 's/^/ # /;' fb.$targetDb.chain.${asmId}Link.txt + # 965922133 bases of 2728222451 (35.405%) in intersection +sed -e 's/^/ # /;' fb.$targetDb.chainSyn.${asmId}Link.txt + # 861770999 bases of 2728222451 (31.587%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \ +$targetDb ${asmId}) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 308m15.730s + +sed -e 's/^/ # /;' fb.$targetDb.chainRBest.$asmId.txt + # 913563482 bases of 2728222451 (33.486%) in intersection + +# real 468m10.166s total time all of above + + +export target="mm39" +export Target="Mm39" +export query="GCF_001984765.1" +export asmId="GCF_001984765.1_C.can_genome_v1.0" +export gcPath="GCF/001/984/765" + +mkdir /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap +cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap + +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \ + /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1 +grep -w real swap.log | sed -e 's/^/ # /;' + # real 70m57.610s + +sed -e 's/^/ # /;' fb.${query}.chain.${Target}Link.txt + # 1021869459 bases of 2518306565 (40.578%) in intersection +sed -e 's/^/ # /;' fb.${query}.chainSyn.${Target}Link.txt + # 879061651 bases of 2518306565 (34.907%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \ +$query $target) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 294m45.010s + +sed -e 's/^/ # /;' fb.${query}.chainRBest.${Target}.txt + # 913513030 bases of 2518306565 (36.275%) in intersection + +############################################################################## +# LASTZ lance-tailed manakin GCF_009829145.1 (DONE - 2020-11-24 - Hiram) + mkdir /hive/data/genomes/mm39/bed/lastzGCF_009829145.1.2020-11-24 + cd /hive/data/genomes/mm39/bed/lastzGCF_009829145.1.2020-11-24 + + printf '# GCF_009829145.1 vs mm39 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +# TARGET: Mouse Mm39 +SEQ1_DIR=/hive/data/genomes/mm39/mm39.2bit +SEQ1_LEN=/hive/data/genomes/mm39/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 + +# QUERY: lance-tailed manakin GCF_009829145.1 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/009/829/145/GCF_009829145.1/GCF_009829145.1.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/009/829/145/GCF_009829145.1/GCF_009829145.1.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/mm39/bed/lastzGCF_009829145.1.2020-11-24 +TMPDIR=/dev/shm +' > DEF + +export targetDb="mm39" +export asmId="GCF_009829145.1" +export accessionId="${asmId}_bChiLan1.pri" +export gcPath="GCF/009/829/145" +cd /hive/data/genomes/${targetDb}/bed/lastz${asmId}.2020-11-24 +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 +grep -w real do.log | sed -e 's/^/ # /;' + # real 159m54.416s missed this measurement + +sed -e 's/^/ # /;' fb.$targetDb.chain.${asmId}Link.txt + # 67418947 bases of 2728222451 (2.471%) in intersection +sed -e 's/^/ # /;' fb.$targetDb.chainSyn.${asmId}Link.txt + # 12681011 bases of 2728222451 (0.465%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \ +$targetDb ${asmId}) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 323m14.591s + +sed -e 's/^/ # /;' fb.$targetDb.chainRBest.$asmId.txt + # 52282223 bases of 2728222451 (1.916%) in intersection + +### XXX haven't done this yet +export target="mm39" +export Target="Mm39" +export query="GCF_009829145.1" +export asmId="GCF_009829145.1_bChiLan1.pri" +export gcPath="GCF/009/829/145" + +mkdir /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap +cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap + +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \ + /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1 +grep -w real swap.log | sed -e 's/^/ # /;' + # real 70m57.610s + +sed -e 's/^/ # /;' fb.${query}.chain.${Target}Link.txt + # 1021869459 bases of 2518306565 (40.578%) in intersection +sed -e 's/^/ # /;' fb.${query}.chainSyn.${Target}Link.txt + # 879061651 bases of 2518306565 (34.907%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \ +$query $target) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 294m45.010s + +sed -e 's/^/ # /;' fb.${query}.chainRBest.${Target}.txt + # 913513030 bases of 2518306565 (36.275%) in intersection + +############################################################################## +# LASTZ Chinese hamster CHO GCF_003668045.3 (DONE - 2020-12-08 - Hiram) + mkdir /hive/data/genomes/mm39/bed/lastzGCF_003668045.3.2020-12-08 + cd /hive/data/genomes/mm39/bed/lastzGCF_003668045.3.2020-12-08 + + printf '# GCF_003668045.3 vs mm39 +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz +# TARGET: Mouse Mm39 +SEQ1_DIR=/hive/data/genomes/mm39/mm39.2bit +SEQ1_LEN=/hive/data/genomes/mm39/chrom.sizes +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 + +# QUERY: Chinese hamster GCF_003668045.3 +SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/003/668/045/GCF_003668045.3/GCF_003668045.3.2bit +SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/003/668/045/GCF_003668045.3/GCF_003668045.3.chrom.sizes.txt +SEQ2_CHUNK=20000000 +SEQ2_LAP=0 +SEQ2_LIMIT=100 + +BASE=/hive/data/genomes/mm39/bed/lastzGCF_003668045.3.2020-12-08 +TMPDIR=/dev/shm +' > DEF + +export targetDb="mm39" +export asmId="GCF_003668045.3" +export accessionId="${asmId}_CriGri-PICRH-1.0" +export gcPath="GCF/003/668/045" +cd /hive/data/genomes/${targetDb}/bed/lastz${asmId}.2020-12-08 +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 +grep -w real do.log | sed -e 's/^/ # /;' + # real 291m46.571s + +sed -e 's/^/ # /;' fb.$targetDb.chain.${asmId}Link.txt + # 1582370207 bases of 2728222451 (58.000%) in intersection + +sed -e 's/^/ # /;' fb.$targetDb.chainSyn.${asmId}Link.txt + # 1484311234 bases of 2728222451 (54.406%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \ +-querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \ +$targetDb ${asmId}) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 821m47.400s + +sed -e 's/^/ # /;' fb.$targetDb.chainRBest.$asmId.txt + # 1455588785 bases of 2728222451 (53.353%) in intersection + +### XXX haven't done this yet +export target="mm39" +export Target="Mm39" +export query="GCF_003668045.3" +export asmId="GCF_003668045.3_CriGri-PICRH-1.0" +export gcPath="GCF/003/668/045" + +mkdir /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap +cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap + +time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \ + /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \ + -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \ + -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1 +grep -w real swap.log | sed -e 's/^/ # /;' + # real 70m57.610s missed this measurement + +sed -e 's/^/ # /;' fb.${query}.chain.${Target}Link.txt + # 1557925479 bases of 2366634374 (65.829%) in intersection +sed -e 's/^/ # /;' fb.${query}.chainSyn.${Target}Link.txt + # 1455333889 bases of 2366634374 (61.494%) in intersection + +time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \ +-target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \ +-targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \ +$query $target) >> rbest.log 2>&1 +grep -w real rbest.log | sed -e 's/^/ # /;' + # real 798m57.398s + +sed -e 's/^/ # /;' fb.${query}.chainRBest.${Target}.txt + # 1456469555 bases of 2366634374 (61.542%) in intersection + +##############################################################################