0f52e6e2738f1a2a84e178d24e144efdeeb81a36
hiram
  Tue Apr 27 14:14:21 2021 -0700
running cleanup step on crispr tracks no redmine

diff --git src/hg/makeDb/doc/hg38/crispr.txt src/hg/makeDb/doc/hg38/crispr.txt
index 48440ee..f381ece 100644
--- src/hg/makeDb/doc/hg38/crispr.txt
+++ src/hg/makeDb/doc/hg38/crispr.txt
@@ -1,56 +1,62 @@
 # See ../../crisprTrack/README.txt (Done. 2016-09-15 max)
 doCrispr.sh hg38 knownGene
 
 ###############################################################################
 # with 10K shoulders (working - 2018-06-06 - Hiram)
 
 mkdir /hive/data/genomes/hg38/bed/crispr.10K
 cd /hive/data/genomes/hg38/bed/crispr.10K
 
 time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \
    -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \
         -workhorse=hgwdev -stop=ranges hg38 knownGene) > ranges.log 2>&1
 # real    0m52.339s
 
 time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \
    -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \
         -continue=guides -stop=guides -workhorse=hgwdev hg38 knownGene) \
 	> guides.log 2>&1
 # real    12m40.910s
 
 time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \
    -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \
         -continue=specScores -stop=specScores -workhorse=hgwdev hg38 \
 	knownGene) > specScores.log 2>&1
 # this failed when trying to run on ku:
 # Traceback (most recent call last):
 #   File "/hive/data/outside/crisprTrack/scripts/splitGuidesSpecScore.py", line 39, in <module>
 #     guideCounts[guide[:20]]+=1
 # MemoryError
 # Command failed:
 # ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' ku nice /hive/data/genomes/hg38/bed/crispr.10K/specScores/runSpecScores.bash
 
 # real    22m46.921s
 
     # run this command on hgwdev
     time /cluster/software/bin/python /hive/data/outside/crisprTrack/scripts/splitGuidesSpecScore.py ../allGuides.txt tmp/inFa jobNames.txt
 
 Writing tmp/inFa/1781201.fa
 Writing tmp/inFa/1781202.fa
 124684218 sequences written
 3368831 sequences removed as they are non-unique
 0 sequences removed as they were already done before
 
 real    35m17.468s
 
     # then, continuing the kluster run:
     ssh ku
     cd /hive/data/genomes/hg38/bed/crispr.10K/specScores
     gensub2 jobNames.txt single gsub jobList
     time para create jobList
 
 #     1781204 jobs written to /hive/data/genomes/hg38/bed/crispr.10K/specScores/batch
 
 # real    15m24.369s
 
+    # hive cleaning 2021-04-27
+    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
+       -continue=cleanup hg38 -fileServer=hgwdev -buildDir=`pwd` \
+         -smallClusterHub=hgwdev -bigClusterHub=ku -workhorse=hgwdev) \
+             > cleanup.log 2>&1
+
 ###############################################################################