3da24c002ed1f1bba8eab282fccaf194ac80b1fe hiram Tue Jul 9 09:28:39 2019 -0700 lastz chainNet to regenRn0 and rheMac10 refs #21579 #23771 diff --git src/hg/makeDb/doc/hg38/lastzRuns.txt src/hg/makeDb/doc/hg38/lastzRuns.txt index a756d58..72cae22 100644 --- src/hg/makeDb/doc/hg38/lastzRuns.txt +++ src/hg/makeDb/doc/hg38/lastzRuns.txt @@ -12085,15 +12085,161 @@ # real 60m31.849s cat fb.GRCm38B.chainHg38Link.txt # 941205213 bases of 2655285420 (35.446%) in intersection cat fb.GRCm38B.chainSynHg38Link.txt # 891450770 bases of 2655285420 (33.573%) in intersection time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` GRCm38B hg38) \ > rbest.log 2>&1 & # real 331m35.578s cat fb.GRCm38B.chainRBest.Hg38.txt # 893587236 bases of 2655285420 (33.653%) in intersection ######################################################################### +# LASTZ human/hg38 vs. rat/regenRn0 - (DONE - 2019-07-01 - Hiram) + mkdir /hive/data/genomes/hg38/bed/lastzRegenRn0.2019-07-01 + cd /hive/data/genomes/hg38/bed/lastzRegenRn0.2019-07-01 + + printf '# human vs rat +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz + +# TARGET: human hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit +SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes +SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift +SEQ1_CHUNK=40000000 +SEQ1_LIMIT=30 +SEQ1_LAP=10000 + +# QUERY: rat regenRn0 +SEQ2_DIR=/hive/data/genomes/regenRn0/regenRn0.2bit +SEQ2_LEN=/hive/data/genomes/regenRn0/chrom.sizes +SEQ2_CHUNK=20000000 +SEQ2_LIMIT=30 +SEQ2_LAP=0 + +BASE=/hive/data/genomes/hg38/bed/lastzRegenRn0.2019-07-01 +TMPDIR=/dev/shm +' > DEF + + time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \ + -chainMinScore=3000 -chainLinearGap=medium -noDbNameCheck \ + -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \ + -syntenicNet) > do.log 2>&1 & + # real 256m8.479s + + cat fb.hg38.chainRegenRn0Link.txt + # 930788096 bases of 3095998939 (30.064%) in intersection + cat fb.hg38.chainSynRegenRn0Link.txt + # 870556908 bases of 3095998939 (28.119%) in intersection + + time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` hg38 regenRn0) > rbest.log 2>&1 & + # real 328m4.069s + + cat fb.hg38.chainRBest.RegenRn0.txt + # 857127621 bases of 3095998939 (27.685%) in intersection + + # and for the swap: + mkdir /hive/data/genomes/regenRn0/bed/blastz.hg38.swap + cd /hive/data/genomes/regenRn0/bed/blastz.hg38.swap + + time (doBlastzChainNet.pl -verbose=2 \ + /hive/data/genomes/hg38/bed/lastzRegenRn0.2019-07-01/DEF \ + -swap -chainMinScore=3000 -chainLinearGap=medium -noDbNameCheck \ + -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \ + -syntenicNet) > swap.log 2>&1 + # real 64m34.515s + + cat fb.regenRn0.chainHg38Link.txt + # 906790361 bases of 2534810853 (35.773%) in intersection + cat fb.regenRn0.chainSynHg38Link.txt + # 849997639 bases of 2534810853 (33.533%) in intersection + + time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` regenRn0 hg38) > rbest.log 2>&1 + # real 300m30.411s + + cat fb.regenRn0.chainRBest.Hg38.txt + # 858923025 bases of 2534810853 (33.885%) in intersection + +######################################################################### +# LASTZ human/hg38 Tree chimp/rheMac10 - (DONE - 2019-07-03 - Hiram) + mkdir /hive/data/genomes/hg38/bed/lastzRheMac10.2019-07-03 + cd /hive/data/genomes/hg38/bed/lastzRheMac10.2019-07-03 + + printf '# human vs macaca mulatta +BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz +# maximum M allowed with lastz is only 254 +BLASTZ_M=254 +BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q +BLASTZ_O=600 +BLASTZ_E=150 +# other parameters from panTro2 vs hg18 lastz on advice from Webb +BLASTZ_K=4500 +BLASTZ_Y=15000 +BLASTZ_T=2 + +# TARGET: Human Hg38 +SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit +SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes +SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit +SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes +SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift +SEQ1_CHUNK=20000000 +SEQ1_LAP=10000 + +# QUERY: Macaca Mulatta RheMac10 +SEQ2_DIR=/hive/data/genomes/rheMac10/rheMac10.2bit +SEQ2_LEN=/hive/data/genomes/rheMac10/chrom.sizes +SEQ2_CHUNK=20000000 +SEQ2_LIMIT=600 +SEQ2_LAP=0 +SEQ2_IN_CONTIGS=0 + +BASE=/hive/data/genomes/hg38/bed/lastzRheMac10.2019-07-03 +TMPDIR=/dev/shm +' > DEF + # << happy emacs + + time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \ + -syntenicNet -fileServer=hgwdev \ + -chainMinScore=5000 -chainLinearGap=medium \ + -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1 + # real 97m36.383s + + cat fb.hg38.chainRheMac10Link.txt + # 2596157811 bases of 3095998939 (83.855%) in intersection + cat fb.hg38.chainSynRheMac10Link.txt + # 2564291936 bases of 3095998939 (82.826%) in intersection + + time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` hg38 rheMac10) > rbest.log 2>&1 & + # real 120m8.526s + + cat fb.hg38.chainRBest.RheMac10.txt + # 2422981614 bases of 3095998939 (78.262%) in intersection + + # and for the swap: + mkdir /hive/data/genomes/rheMac10/bed/blastz.hg38.swap + cd /hive/data/genomes/rheMac10/bed/blastz.hg38.swap + + time (doBlastzChainNet.pl -verbose=2 \ + /hive/data/genomes/hg38/bed/lastzRheMac10.2019-07-03/DEF \ + -swap -chainMinScore=5000 -chainLinearGap=medium \ + -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \ + -syntenicNet) > swap.log 2>&1 + # real 61m39.505s + + cat fb.rheMac10.chainHg38Link.txt + # 2485840115 bases of 2936892733 (84.642%) in intersection + cat fb.rheMac10.chainSynHg38Link.txt + # 2465210442 bases of 2936892733 (83.939%) in intersection + + time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` rheMac10 hg38) > rbest.log 2>&1 + # real 126m29.919s + + cat fb.rheMac10.chainRBest.Hg38.txt + # 2424028492 bases of 2936892733 (82.537%) in intersection + +#########################################################################