271b05ba2f4fa9cc7e587faf606d9ad54e65e2f1
gperez2
  Tue Dec 6 21:33:28 2022 -0800
Updating and adding mm39 vs. mouse strains lastz/chain/net run for users, refs #30097 #30195

diff --git src/hg/makeDb/doc/mm39/lastzRuns.txt src/hg/makeDb/doc/mm39/lastzRuns.txt
index 14184eb..682cfdf 100644
--- src/hg/makeDb/doc/mm39/lastzRuns.txt
+++ src/hg/makeDb/doc/mm39/lastzRuns.txt
@@ -2318,30 +2318,34 @@
     sed -e 's/^/    # /;' fb.hg19.chainRBest.Mm39.txt
     # 892863094 bases of 2991710746 (29.845%) in intersection
 
 ##############################################################################
 # LASTZ Mouse Mm39 vs. Seba's short-tailed bat GCA_004027735.1 (DONE - 2022-10-10 - Gerardo)
 
 # should be able to run this from anywhere, this time it was run from:
     cd kent/src/hg/utils/automation
 
   time (~/kent/src/hg/utils/automation/pairLastz.sh \
 	mm39 GCA_004027735.1_CarPer_v1_BIUU mammal mammal) \
 	   > mm39.GCA_004027735.1_20221010.log 2>&1 &
   # check the total time
 grep -w real  mm39.GCA_004027735.1_20221010.log  | tail -1 | sed -e 's/^/    # /;'
     # real      4941m5.588s
+
+    # this command outputs this makeDoc text:
+
+    cat /hive/data/genomes/mm39/bed/lastzGCA_004027735.1.2022-10-10/makeDoc.txt
 ##############################################################################
 # LASTZ Mouse Mm39 vs. Seba's short-tailed bat GCA_004027735.1
 #    (DONE - 2022-10-10 - Gerardo)
 
     mkdir /hive/data/genomes/mm39/bed/lastzGCA_004027735.1.2022-10-10
     cd /hive/data/genomes/mm39/bed/lastzGCA_004027735.1.2022-10-10
 
     printf '# Seba's short-tailed bat GCA_004027735.1 vs. Mouse Mm39
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
 
 # TARGET: Mouse  mm39
 SEQ1_DIR=/hive/data/genomes/mm39/mm39.2bit
 SEQ1_LEN=/hive/data/genomes/mm39/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
@@ -2397,16 +2401,201 @@
     sed -e 's/^/    # /;' fb.GCA_004027735.1.chainMm39Link.txt
     # 687442098 bases of 2689411905 (25.561%) in intersection
     sed -e 's/^/    # /;' fb.GCA_004027735.1.chainSynMm39Link.txt
     # 261303649 bases of 2689411905 (9.716%) in intersection
 \    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
     \
    -target2bit="/hive/data/genomes/asmHubs/GCA/004/027/735/GCA_004027735.1/GCA_004027735.1.2bit" \
 -targetSizes="/hive/data/genomes/asmHubs/GCA/004/027/735/GCA_004027735.1/GCA_004027735.1.chrom.sizes.txt" \
    GCA_004027735.1 mm39) > rbest.log 2>&1
 
     grep -w real rbest.log | sed -e 's/^/    # /;'
     # real	1818m46.969s
 
     sed -e 's/^/    # /;' fb.GCA_004027735.1.chainRBest.Mm39.txt
     # 614276787 bases of 2689411905 (22.841%) in intersection
+##############################################################################
+#  Mm39 vs. eastern European house mouse GCA_001624775.1 (DONE - 2022-10-29 - Gerardo)
+
+  time (~/kent/src/hg/utils/automation/pairLastz.sh \
+	mm39 GCA_001624775.1_PWK_PhJ_v1 mammal mammal) \
+	   > mm39.GCA_001624775.1_20221029.log 2>&1 &
+  # check the total time
+grep -w real mm39.GCA_001624775.1_20221029.log | tail -1 | sed -e 's/^/    # /;'
+    # real     1337m18.429s
+
+    # this command outputs this makeDoc text:
+
+    cat /hive/data/genomes/mm39/bed/lastzGCA_001624775.1.2022-10-29/makeDoc.txt
+##############################################################################
+# LASTZ Mouse Mm39 vs. eastern European house mouse GCA_001624775.1
+#    (DONE - 2022-10-29 - Gerardo)
+
+    mkdir /hive/data/genomes/mm39/bed/lastzGCA_001624775.1.2022-10-29
+    cd /hive/data/genomes/mm39/bed/lastzGCA_001624775.1.2022-10-29
+
+    printf '# eastern European house mouse GCA_001624775.1 vs. Mouse Mm39
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Mouse  mm39
+SEQ1_DIR=/hive/data/genomes/mm39/mm39.2bit
+SEQ1_LEN=/hive/data/genomes/mm39/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: eastern European house mouse 2016-04-26 GCA_001624775.1_PWK_PhJ_v1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/001/624/775/GCA_001624775.1/GCA_001624775.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/001/624/775/GCA_001624775.1/GCA_001624775.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/mm39/bed/lastzGCA_001624775.1.2022-10-29
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCA_001624775.1_PWK_PhJ_v1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	796m20.050s
+
+    sed -e 's/^/    # /;' fb.mm39.chainGCA_001624775.1Link.txt
+    # 2352701230 bases of 2728222451 (86.236%) in intersection
+    sed -e 's/^/    # /;' fb.mm39.chainSynGCA_001624775.1Link.txt
+    # 2250688932 bases of 2728222451 (82.497%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCA/001/624/775/GCA_001624775.1/GCA_001624775.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCA/001/624/775/GCA_001624775.1/GCA_001624775.1.chrom.sizes.txt" \
+        mm39 GCA_001624775.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	140m24.553s
+
+    sed -e 's/^/    # /;' fb.mm39.chainRBest.GCA_001624775.1.txt
+    # 2217435105 bases of 2728222451 (81.278%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCA/001/624/775/GCA_001624775.1_PWK_PhJ_v1/trackData/blastz.mm39.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCA_001624775.1_PWK_PhJ_v1 /hive/data/genomes/mm39/bed/lastzGCA_001624775.1.2022-10-29/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	272m39.311s
+
+    sed -e 's/^/    # /;' fb.GCA_001624775.1.chainMm39Link.txt
+    # 2243083403 bases of 2559987392 (87.621%) in intersection
+    sed -e 's/^/    # /;' fb.GCA_001624775.1.chainSynMm39Link.txt
+    # 2222161260 bases of 2559987392 (86.804%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCA/001/624/775/GCA_001624775.1/GCA_001624775.1.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCA/001/624/775/GCA_001624775.1/GCA_001624775.1.chrom.sizes.txt" \
+   GCA_001624775.1 mm39) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	127m54.093s
+
+    sed -e 's/^/    # /;' fb.GCA_001624775.1.chainRBest.Mm39.txt
+    # 2221859352 bases of 2559987392 (86.792%) in intersection
+##############################################################################
+# LASTZ Mouse Mm39 vs. western European house mouse GCA_001624835.1 (DONE - 2022-10-29 - Gerardo)
+
+  time (~/kent/src/hg/utils/automation/pairLastz.sh \
+	mm39 GCA_001624835.1_WSB_EiJ_v1 mammal mammal) \
+	   > mm39.GCA_001624835.1_20221029.log 2>&1 &
+  # check the total time
+grep -w real  mm39.GCA_001624835.1_20221029.log  | tail -1 | sed -e 's/^/    # /;'
+    # real      1435m16.301s
+
+    # this command outputs this makeDoc text:
+
+    cat /hive/data/genomes/mm39/bed/lastzGCA_001624835.1.2022-10-29/makeDoc.txt
+##############################################################################
+# LASTZ Mouse Mm39 vs. western European house mouse GCA_001624835.1
+#    (DONE - 2022-10-29 - Gerardo)
+
+    mkdir /hive/data/genomes/mm39/bed/lastzGCA_001624835.1.2022-10-29
+    cd /hive/data/genomes/mm39/bed/lastzGCA_001624835.1.2022-10-29
+
+    printf '# western European house mouse GCA_001624835.1 vs. Mouse Mm39
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Mouse  mm39
+SEQ1_DIR=/hive/data/genomes/mm39/mm39.2bit
+SEQ1_LEN=/hive/data/genomes/mm39/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: western European house mouse 2016-04-26 GCA_001624835.1_WSB_EiJ_v1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCA/001/624/835/GCA_001624835.1/GCA_001624835.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCA/001/624/835/GCA_001624835.1/GCA_001624835.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/mm39/bed/lastzGCA_001624835.1.2022-10-29
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCA_001624835.1_WSB_EiJ_v1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	1043m35.512s
+
+    sed -e 's/^/    # /;' fb.mm39.chainGCA_001624835.1Link.txt
+    # 2340951592 bases of 2728222451 (85.805%) in intersection
+    sed -e 's/^/    # /;' fb.mm39.chainSynGCA_001624835.1Link.txt
+    # 2248126645 bases of 2728222451 (82.403%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCA/001/624/835/GCA_001624835.1/GCA_001624835.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCA/001/624/835/GCA_001624835.1/GCA_001624835.1.chrom.sizes.txt" \
+        mm39 GCA_001624835.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	83m41.863s
+
+    sed -e 's/^/    # /;' fb.mm39.chainRBest.GCA_001624835.1.txt
+    # 2216500165 bases of 2728222451 (81.243%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCA/001/624/835/GCA_001624835.1_WSB_EiJ_v1/trackData/blastz.mm39.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCA_001624835.1_WSB_EiJ_v1 /hive/data/genomes/mm39/bed/lastzGCA_001624835.1.2022-10-29/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	245m21.222s
+
+    sed -e 's/^/    # /;' fb.GCA_001624835.1.chainMm39Link.txt
+    # 2231272516 bases of 2689657557 (82.957%) in intersection
+    sed -e 's/^/    # /;' fb.GCA_001624835.1.chainSynMm39Link.txt
+    # 2222181450 bases of 2689657557 (82.619%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCA/001/624/835/GCA_001624835.1/GCA_001624835.1.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCA/001/624/835/GCA_001624835.1/GCA_001624835.1.chrom.sizes.txt" \
+   GCA_001624835.1 mm39) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	62m37.202s
+
+    sed -e 's/^/    # /;' fb.GCA_001624835.1.chainRBest.Mm39.txt
+    # 2218790179 bases of 2689657557 (82.493%) in intersection
+