c5a0639c12640fb8d4688ccb7acbd1298713ee33
lrnassar
  Wed Aug 14 17:56:58 2024 -0700
Running some liftOver chains requested by 2 MLQs.

diff --git src/hg/makeDb/doc/susScr11/lastzRuns.txt src/hg/makeDb/doc/susScr11/lastzRuns.txt
index 8a7c9f4..8e5fa17 100644
--- src/hg/makeDb/doc/susScr11/lastzRuns.txt
+++ src/hg/makeDb/doc/susScr11/lastzRuns.txt
@@ -1,102 +1,260 @@
 ##############################################################################
 # LASTZ sheep GCA_002844635.1 (DONE - 2021-04-20 - Hiram)
     mkdir /hive/data/genomes/susScr11/bed/lastzGCA_002844635.1.2021-04-20
     cd /hive/data/genomes/susScr11/bed/lastzGCA_002844635.1.2021-04-20
 
     printf '# GCA_002844635.1 vs hg38
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
 BLASTZ_T=2
 BLASTZ_O=400
 BLASTZ_E=30
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: Human Hg38
 SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
 SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: sheep GCA_002844635.1
 SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/002/742/125/GCA_002844635.1/GCA_002844635.1.2bit
 SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/002/742/125/GCA_002844635.1/GCA_002844635.1.chrom.sizes.txt
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/hive/data/genomes/hg38/bed/lastzGCA_002844635.1.2021-04-20
 TMPDIR=/dev/shm
 ' > DEF
 
 export targetDb="hg38"
 export asmId="GCA_002844635.1"
 export gcPath="GCA/002/844/635"
 cd /hive/data/genomes/$targetDb/bed/lastz${asmId}.2021-04-20
 time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF \
    -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
      -syntenicNet -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
 cat fb.${targetDb}.chain.${asmId}Link.txt
 cat fb.${targetDb}.chainSyn.${asmId}Link.txt
 
 grep -w real do.log | sed -e 's/^/    # /;'
     # real      318m24.582s
 
 sed -e 's/^/    # /;' fb.$targetDb.chain.${asmId}Link.txt
     # 2431952924 bases of 2501912388 (97.204%) in intersection
 sed -e 's/^/    # /;' fb.$targetDb.chainSyn.${asmId}Link.txt
     # 2409981908 bases of 2501912388 (96.326%) in intersection
 
 time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
 -query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \
 -querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \
 $targetDb ${asmId}) >> rbest.log 2>&1
 grep -w real rbest.log | sed -e 's/^/    # /;'
     # real      93m2.731s
 
 sed -e 's/^/    # /;' fb.$targetDb.chainRBest.$asmId.txt
     # 2381230203 bases of 2501912388 (95.176%) in intersection
 
 ## Overall time for the above:
 # real    411m27.346s
 
 ### the swap to the assembly hub
 export target="susScr11"
 export Target="SusScr11"
 export query="GCA_002844635.1"
 export asmId="GCA_002844635.1_USMARCv1.0"
 export gcPath="GCA/002/844/635"
 
 mkdir -p /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap
 cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap
 
 time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \
     /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \
   -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
     -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1
 grep -w real swap.log | sed -e 's/^/    # /;'
     # real      90m38.556s
 
 sed -e 's/^/    # /;' fb.${query}.chain.${Target}Link.txt
     # 2543057365 bases of 2755438182 (92.292%) in intersection
 sed -e 's/^/    # /;' fb.${query}.chainSyn.${Target}Link.txt
     # 2514166037 bases of 2755438182 (91.244%) in intersection
 
 time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
 -target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \
 -targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \
 $query $target) >> rbest.log 2>&1
 grep -w real rbest.log | sed -e 's/^/    # /;'
     # real      86m43.960s
 
 sed -e 's/^/    # /;' fb.${query}.chainRBest.${Target}.txt
     # 2388751543 bases of 2755438182 (86.692%) in intersection
 
 # Complete run time for all the swap operation:
     # real    285m1.340s
 
 ##############################################################################
+# LASTZ Pig SusScr11 vs. Cow BosTau9
+#    (DONE - 2024-08-09 - lrnassar)
+
+    mkdir /hive/data/genomes/susScr11/bed/lastzBosTau9.2024-08-09
+    cd /hive/data/genomes/susScr11/bed/lastzBosTau9.2024-08-09
+
+    printf '# Cow BosTau9 vs. Pig SusScr11
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Pig  susScr11
+SEQ1_DIR=/hive/data/genomes/susScr11/susScr11.2bit
+SEQ1_LEN=/hive/data/genomes/susScr11/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: Cow  bosTau9
+SEQ2_DIR=/hive/data/genomes/bosTau9/bosTau9.2bit
+SEQ2_LEN=/hive/data/genomes/bosTau9/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/susScr11/bed/lastzBosTau9.2024-08-09
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl  -verbose=2 `pwd`/DEF -syntenicNet \
+        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real      844m2.282s
+
+    sed -e 's/^/    # /;' fb.susScr11.chainBosTau9Link.txt
+    # 1542224987 bases of 2472073034 (62.386%) in intersection
+    sed -e 's/^/    # /;' fb.susScr11.chainSynBosTau9Link.txt
+    # 1458188575 bases of 2472073034 (58.986%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl  -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+       \
+        susScr11 bosTau9) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      379m32.380s
+
+    sed -e 's/^/    # /;' fb.susScr11.chainRBest.BosTau9.txt
+    # 1451300013 bases of 2472073034 (58.708%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/bosTau9/bed/blastz.susScr11.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl  -swap -verbose=2 \
+    /hive/data/genomes/susScr11/bed/lastzBosTau9.2024-08-09/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real      237m58.241s
+
+    sed -e 's/^/    # /;' fb.bosTau9.chainSusScr11Link.txt
+    # 1542795700 bases of 2715853792 (56.807%) in intersection
+    sed -e 's/^/    # /;' fb.bosTau9.chainSynSusScr11Link.txt
+    # 1464525591 bases of 2715853792 (53.925%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl  -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+    \
+   bosTau9 susScr11) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      418m4.992s
+
+    sed -e 's/^/    # /;' fb.bosTau9.chainRBest.SusScr11.txt
+    # 1452583914 bases of 2715853792 (53.485%) in intersection
+
+##############################################################################
+# LASTZ Pig SusScr11 vs. goat GCF_001704415.2
+#    (DONE - 2024-08-12 - lrnassar)
+
+    mkdir /hive/data/genomes/susScr11/bed/lastzGCF_001704415.2.2024-08-12
+    cd /hive/data/genomes/susScr11/bed/lastzGCF_001704415.2.2024-08-12
+
+    printf '# goat GCF_001704415.2 vs. Pig SusScr11
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Pig  susScr11
+SEQ1_DIR=/hive/data/genomes/susScr11/susScr11.2bit
+SEQ1_LEN=/hive/data/genomes/susScr11/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: goat 2016-08-24 GCF_001704415.2_ARS1.2
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.2/GCF_001704415.2.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.2/GCF_001704415.2.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/susScr11/bed/lastzGCF_001704415.2.2024-08-12
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCF_001704415.2_ARS1.2 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real      568m2.570s
+
+    sed -e 's/^/    # /;' fb.susScr11.chainGCF_001704415.2Link.txt
+    # 1532623567 bases of 2501912388 (61.258%) in intersection
+    sed -e 's/^/    # /;' fb.susScr11.chainSynGCF_001704415.2Link.txt
+    # 1448119731 bases of 2501912388 (57.881%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.2/GCF_001704415.2.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.2/GCF_001704415.2.chrom.sizes.txt" \
+        susScr11 GCF_001704415.2) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      495m38.221s
+
+    sed -e 's/^/    # /;' fb.susScr11.chainRBest.GCF_001704415.2.txt
+    # 1444668348 bases of 2501912388 (57.743%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCF/001/704/415/GCF_001704415.2_ARS1.2/trackData/blastz.susScr11.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCF_001704415.2_ARS1.2 /hive/data/genomes/susScr11/bed/lastzGCF_001704415.2.2024-08-12/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real      413m9.780s
+
+    sed -e 's/^/    # /;' fb.GCF_001704415.2.chainSusScr11Link.txt
+    # 1557596633 bases of 2922617086 (53.295%) in intersection
+    sed -e 's/^/    # /;' fb.GCF_001704415.2.chainSynSusScr11Link.txt
+    # 1455695959 bases of 2922617086 (49.808%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.2/GCF_001704415.2.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCF/001/704/415/GCF_001704415.2/GCF_001704415.2.chrom.sizes.txt" \
+   GCF_001704415.2 susScr11) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      578m18.029s
+
+    sed -e 's/^/    # /;' fb.GCF_001704415.2.chainRBest.SusScr11.txt
+    # 1446482758 bases of 2922617086 (49.493%) in intersection
+
+##############################################################################