12b6db960b81f9c254ca6f864052b7ef58e20e91
hiram
  Wed Mar 30 15:09:57 2022 -0700
chainNet lastz runs to CHM13, HG002 maternal, and HG002 paternal refs #28907

diff --git src/hg/makeDb/doc/hg38/lastzRuns.txt src/hg/makeDb/doc/hg38/lastzRuns.txt
index d03c73e..dcf568a 100644
--- src/hg/makeDb/doc/hg38/lastzRuns.txt
+++ src/hg/makeDb/doc/hg38/lastzRuns.txt
@@ -13345,15 +13345,291 @@
     sed -e 's/^/    # /;' fb.GCF_001704415.1.chainSynHg38Link.txt
     # 1290249255 bases of 2922813246 (44.144%) in intersection
 \    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
     \
    -target2bit="/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.1/GCF_001704415.1.2bit" \
 -targetSizes="/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.1/GCF_001704415.1.chrom.sizes.txt" \
    GCF_001704415.1 hg38) > rbest.log 2>&1
 
     grep -w real rbest.log | sed -e 's/^/    # /;'
     # real	367m13.529s
 
     sed -e 's/^/    # /;' fb.GCF_001704415.1.chainRBest.Hg38.txt
     # 1283079843 bases of 2922813246 (43.899%) in intersection
 
 ##############################################################################
+# LASTZ Human Hg38 vs. human GCA_009914755.4
+#    (DONE - 2022-03-28 - hiram)
+
+    mkdir /hive/data/genomes/hg38/bed/lastzGCA_009914755.4.2022-03-28
+    cd /hive/data/genomes/hg38/bed/lastzGCA_009914755.4.2022-03-28
+
+    printf '# human GCA_009914755.4 vs. Human Hg38
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+BLASTZ_T=2
+BLASTZ_O=600
+BLASTZ_E=150
+BLASTZ_M=254
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q
+#       A     C     G     T
+# A    90  -330  -236  -356
+# C  -330   100  -318  -236
+# G  -236  -318   100  -330
+# T  -356  -236  -330    90
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: human GCA_009914755.4
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/hg38/bed/lastzGCA_009914755.4.2022-03-28
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCA_009914755.4_CHM13_T2T_v2.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	357m59.184s
+
+    sed -e 's/^/    # /;' fb.hg38.chainGCA_009914755.4Link.txt
+    # 3037208207 bases of 3272116950 (92.821%) in intersection
+    sed -e 's/^/    # /;' fb.hg38.chainSynGCA_009914755.4Link.txt
+    # 3034364357 bases of 3272116950 (92.734%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.chrom.sizes.txt" \
+        hg38 GCA_009914755.4) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	80m56.343s
+
+    sed -e 's/^/    # /;' fb.hg38.chainRBest.GCA_009914755.4.txt
+    # 2867463309 bases of 3272116950 (87.633%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCA/009/914/755/GCA_009914755.4_CHM13_T2T_v2.0/trackData/blastz.hg38.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCA_009914755.4_CHM13_T2T_v2.0 /hive/data/genomes/hg38/bed/lastzGCA_009914755.4.2022-03-28/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	158m23.085s
+
+    sed -e 's/^/    # /;' fb.GCA_009914755.4.chainHg38Link.txt
+    # 2906145588 bases of 3117292070 (93.227%) in intersection
+    sed -e 's/^/    # /;' fb.GCA_009914755.4.chainSynHg38Link.txt
+    # 2896674405 bases of 3117292070 (92.923%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.chrom.sizes.txt" \
+   GCA_009914755.4 hg38) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	105m37.568s
+
+    sed -e 's/^/    # /;' fb.GCA_009914755.4.chainRBest.Hg38.txt
+    # 2863655887 bases of 3117292070 (91.864%) in intersection
+
+##############################################################################
+# LASTZ Human Hg38 vs. human GCA_021951015.1
+#    (DONE - 2022-03-29 - hiram)
+
+    mkdir /hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29
+    cd /hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29
+
+    printf '# human GCA_021951015.1 vs. Human Hg38
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+BLASTZ_T=2
+BLASTZ_O=600
+BLASTZ_E=150
+BLASTZ_M=254
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q
+#       A     C     G     T
+# A    90  -330  -236  -356
+# C  -330   100  -318  -236
+# G  -236  -318   100  -330
+# T  -356  -236  -330    90
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: human GCA_021951015.1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCA_021951015.1_HG002.mat.cur.20211005 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	683m39.622s
+
+    sed -e 's/^/    # /;' fb.hg38.chainGCA_021951015.1Link.txt
+    # 3023153297 bases of 3272116950 (92.391%) in intersection
+    sed -e 's/^/    # /;' fb.hg38.chainSynGCA_021951015.1Link.txt
+    # 3019104161 bases of 3272116950 (92.268%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.chrom.sizes.txt" \
+        hg38 GCA_021951015.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	80m40.540s
+
+    sed -e 's/^/    # /;' fb.hg38.chainRBest.GCA_021951015.1.txt
+    # 2838656141 bases of 3272116950 (86.753%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCA/021/951/015/GCA_021951015.1_HG002.mat.cur.20211005/trackData/blastz.hg38.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCA_021951015.1_HG002.mat.cur.20211005 /hive/data/genomes/hg38/bed/lastzGCA_021951015.1.2022-03-29/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	133m16.011s
+
+    sed -e 's/^/    # /;' fb.GCA_021951015.1.chainHg38Link.txt
+    # 2881437844 bases of 3061735012 (94.111%) in intersection
+    sed -e 's/^/    # /;' fb.GCA_021951015.1.chainSynHg38Link.txt
+    # 2872962839 bases of 3061735012 (93.834%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCA/021/951/015/GCA_021951015.1/GCA_021951015.1.chrom.sizes.txt" \
+   GCA_021951015.1 hg38) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	84m40.905s
+
+    sed -e 's/^/    # /;' fb.GCA_021951015.1.chainRBest.Hg38.txt
+    # 2836030662 bases of 3061735012 (92.628%) in intersection
+
+##############################################################################
+# LASTZ Human Hg38 vs. human GCA_021950905.1
+#    (DONE - 2022-03-29 - hiram)
+
+    mkdir /hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29
+    cd /hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29
+
+    printf '# human GCA_021950905.1 vs. Human Hg38
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+BLASTZ_T=2
+BLASTZ_O=600
+BLASTZ_E=150
+BLASTZ_M=254
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q
+#       A     C     G     T
+# A    90  -330  -236  -356
+# C  -330   100  -318  -236
+# G  -236  -318   100  -330
+# T  -356  -236  -330    90
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: human GCA_021950905.1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCA_021950905.1_HG002.pat.cur.20211005 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	588m18.846s
+
+    sed -e 's/^/    # /;' fb.hg38.chainGCA_021950905.1Link.txt
+    # 2895410388 bases of 3272116950 (88.487%) in intersection
+    sed -e 's/^/    # /;' fb.hg38.chainSynGCA_021950905.1Link.txt
+    # 2889961705 bases of 3272116950 (88.321%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.chrom.sizes.txt" \
+        hg38 GCA_021950905.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	77m20.514s
+
+    sed -e 's/^/    # /;' fb.hg38.chainRBest.GCA_021950905.1.txt
+    # 2709013105 bases of 3272116950 (82.791%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCA/021/950/905/GCA_021950905.1_HG002.pat.cur.20211005/trackData/blastz.hg38.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCA_021950905.1_HG002.pat.cur.20211005 /hive/data/genomes/hg38/bed/lastzGCA_021950905.1.2022-03-29/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	128m39.988s
+
+    sed -e 's/^/    # /;' fb.GCA_021950905.1.chainHg38Link.txt
+    # 2752515526 bases of 2959277077 (93.013%) in intersection
+    sed -e 's/^/    # /;' fb.GCA_021950905.1.chainSynHg38Link.txt
+    # 2743116590 bases of 2959277077 (92.695%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCA/021/950/905/GCA_021950905.1/GCA_021950905.1.chrom.sizes.txt" \
+   GCA_021950905.1 hg38) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	76m56.576s
+
+    sed -e 's/^/    # /;' fb.GCA_021950905.1.chainRBest.Hg38.txt
+    # 2708210994 bases of 2959277077 (91.516%) in intersection
+
+##############################################################################