80a6b42f5687642d83715afc58c46a095610c2dc
hiram
  Tue Apr 27 08:10:49 2021 -0700
liftOver chainNet to GCF_900094665.1 per user request and cleaning hive of crispr results refs #27344

diff --git src/hg/makeDb/doc/mm10.txt src/hg/makeDb/doc/mm10.txt
index 9304e86..8296bb1 100644
--- src/hg/makeDb/doc/mm10.txt
+++ src/hg/makeDb/doc/mm10.txt
@@ -14941,30 +14941,35 @@
 
     ~/kent/src/hg/utils/automation/doCrispr.pl -continue=offTargets \
       -stop=offTargets -buildDir=`pwd` mm10 ensGene
 # Completed: 77942 of 77942 jobs
 # CPU time in finished jobs:    1397706s   23295.10m   388.25h   16.18d  0.044 y
 # IO & Wait Time:                313616s    5226.94m    87.12h    3.63d  0.010 y
 # Average job time:                  22s       0.37m     0.01h    0.00d
 # Longest finished job:              35s       0.58m     0.01h    0.00d
 # Submission to last job:          9239s     153.98m     2.57h    0.11d
 
 
     ~/kent/src/hg/utils/automation/doCrispr.pl -continue=load \
       -stop=load -buildDir=`pwd` mm10 ensGene
     # real    235m41.378s
 
+    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
+       -continue=cleanup mm10 -fileServer=hgwdev -buildDir=`pwd` \
+         -smallClusterHub=hgwdev -bigClusterHub=ku \
+      -workhorse=hgwdev) > cleanup.log 2>&1
+    # real    100m50.151s
     ##########################################################################
     # FIXUP broken files (working - Max and Hiram - 2018-04,05)
 
     # Max generated a new specScores.tab, add in the chrM specScores
     # and make a unique set in a new specScores.tab file
 
     cd /hive/data/genomes/mm10/bed/crispr.10K/uniqSpecScores
 
     printf "targetSeq\tmitSpecScore\tofftargetCount\ttargetGenomeGeneLocus\n" \
 	> max.withChrM.specScores.tab
 
     grep -h -v targetSeq ../specScores.max.tab ../addChrM/specScores.tab \
 	| $HOME/bin/x86_64/gnusort -S100G --parallel=32 -u \
 	>> max.withChrM.specScores.tab
     # real    1m39.468s
@@ -17470,30 +17475,37 @@
 # effScores:
 Completed: 27697 of 27697 jobs
 CPU time in finished jobs:   14348277s  239137.94m  3985.63h  166.07d  0.455 y
 IO & Wait Time:                150120s    2502.01m    41.70h    1.74d  0.005 y
 Average job time:                 523s       8.72m     0.15h    0.01d
 Longest finished job:            1966s      32.77m     0.55h    0.02d
 Submission to last job:         15067s     251.12m     4.19h    0.17d
 
 # offTargets:
 Completed: 147394 of 147394 jobs
 CPU time in finished jobs:    2213680s   36894.66m   614.91h   25.62d  0.070 y
 IO & Wait Time:               2663355s   44389.25m   739.82h   30.83d  0.084 y
 Average job time:                  33s       0.55m     0.01h    0.00d
 Longest finished job:              68s       1.13m     0.02h    0.00d
 
+    # cleaning up 2021-04-24 - Hiram
+    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
+       -continue=cleanup mm10 -tableName=crisprAll -fileServer=hgwdev \
+    -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
+      -workhorse=hgwdev) > cleanup.log 2>&1
+    # real    430m18.499s
+
 #########################################################################
 
 # For ENCODE 3 tracks, see doc/encode3/mouse.txt
 
 ##############################################################################
 # LASTZ Gorilla gorGor6 (DONE - 2019-11-20 - Hiram)
     #	establish a screen to control this job
     screen -S mm10gorGor6
     mkdir /hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20
     cd /hive/data/genomes/mm10/bed/lastzGorGor6.2019-11-20
 
     printf '# mouse vs. gorilla
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 
 # TARGET: Mouse Mm10
@@ -18248,16 +18260,119 @@
 	> swap.log 2>&1 &
     #	real    24m33.940s
 
     sed -e 's/^/    # /;' fb.xenTro10.chainMm10Link.txt
     # 121679610 bases of 1448461978 (8.401%) in intersection
     sed -e 's/^/    # /;' fb.xenTro10.chainSynMm10Link.txt
     # 35210769 bases of 1448461978 (2.431%) in intersection
 
   time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` xenTro10 mm10) \
          > rbest.log 2>&1 &
     # real    372m38.637s
 
     sed -e 's/^/    # /;' fb.xenTro10.chainRBest.Mm10.txt
     # 58901471 bases of 1448461978 (4.066%) in intersection
 
-#########################################################################
+##############################################################################
+# LASTZ Ryukyu mouse GCF_900094665.1 (DONE - 2021-04-26 - Hiram)
+    mkdir /hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26
+    cd /hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26
+
+    printf '# GCF_900094665.1 Mus caroli (Ryukyu mouse) vs mm10
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+BLASTZ_T=2
+BLASTZ_O=400
+BLASTZ_E=30
+BLASTZ_M=254
+# default BLASTZ_Q score matrix:
+#       A     C     G     T
+# A    91  -114   -31  -123
+# C  -114   100  -125   -31
+# G   -31  -125   100  -114
+# T  -123   -31  -114    91
+
+# TARGET: Mouse Mm10
+SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
+SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=10
+
+# QUERY: Mus croli - Ryukyu mouse GCF_900094665.1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/900/094/665/GCF_900094665.1/GCF_900094665.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/mm10/bed/lastzGCF_900094665.1.2021-04-26
+TMPDIR=/dev/shm
+' > DEF
+
+export targetDb="mm10"
+export asmId="GCF_900094665.1"
+export gcPath="GCF/900/094/665"
+cd /hive/data/genomes/$targetDb/bed/lastz${asmId}.2021-04-26
+time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF \
+   -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+     -syntenicNet -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+cat fb.${targetDb}.chain.${asmId}Link.txt
+cat fb.${targetDb}.chainSyn.${asmId}Link.txt
+
+grep -w real do.log | sed -e 's/^/    # /;'
+    # real      207m59.745s
+
+sed -e 's/^/    # /;' fb.$targetDb.chain.${asmId}Link.txt
+    # 2303277151 bases of 2818974548 (81.706%) in intersection
+sed -e 's/^/    # /;' fb.$targetDb.chainSyn.${asmId}Link.txt
+    # 2187910131 bases of 2818974548 (77.614%) in intersection
+
+time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+-query2Bit="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/$gcPath/${asmId}/${asmId}.chrom.sizes.txt" \
+$targetDb ${asmId}) >> rbest.log 2>&1
+grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      274m57.907s
+
+sed -e 's/^/    # /;' fb.$targetDb.chainRBest.$asmId.txt
+    # 2074680070 bases of 2818974548 (73.597%) in intersection
+
+# total time for all the above:
+    # real    482m57.733s
+
+#######################################
+### the swap to the assembly hub
+export target="mm10"
+export Target="Mm10"
+export query="GCF_900094665.1"
+export asmId="GCF_900094665.1_CAROLI_EIJ_v1.1"
+export gcPath="GCF/900/094/665"
+
+mkdir -p /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap
+cd /hive/data/genomes/asmHubs/refseqBuild/$gcPath/$asmId/trackData/blastz.$target.swap
+
+time (doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 -swapDir=`pwd` \
+    /hive/data/genomes/${target}/bed/lastz.${query}/DEF -syntenicNet \
+  -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -swap -chainMinScore=3000 -chainLinearGap=medium) >> swap.log 2>&1
+grep -w real swap.log | sed -e 's/^/    # /;'
+    # real    554m2.489s
+
+sed -e 's/^/    # /;' fb.${query}.chain.${Target}Link.txt
+    # 2116460904 bases of 2553121441 (82.897%) in intersection
+sed -e 's/^/    # /;' fb.${query}.chainSyn.${Target}Link.txt
+    # 2081173211 bases of 2553121441 (81.515%) in intersection
+
+time (doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+-target2Bit="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/$gcPath/${query}/${query}.chrom.sizes.txt" \
+$query $target) >> rbest.log 2>&1
+grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      246m55.342s
+
+sed -e 's/^/    # /;' fb.${query}.chainRBest.${Target}.txt
+    # 2078102689 bases of 2553121441 (81.395%) in intersection
+
+# Complete run time for all the swap operation:
+    # real    367m14.987s
+
+##############################################################################