0f52e6e2738f1a2a84e178d24e144efdeeb81a36 hiram Tue Apr 27 14:14:21 2021 -0700 running cleanup step on crispr tracks no redmine diff --git src/hg/makeDb/doc/hg38/crispr.txt src/hg/makeDb/doc/hg38/crispr.txt index 48440ee..f381ece 100644 --- src/hg/makeDb/doc/hg38/crispr.txt +++ src/hg/makeDb/doc/hg38/crispr.txt @@ -1,56 +1,62 @@ # See ../../crisprTrack/README.txt (Done. 2016-09-15 max) doCrispr.sh hg38 knownGene ############################################################################### # with 10K shoulders (working - 2018-06-06 - Hiram) mkdir /hive/data/genomes/hg38/bed/crispr.10K cd /hive/data/genomes/hg38/bed/crispr.10K time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \ -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \ -workhorse=hgwdev -stop=ranges hg38 knownGene) > ranges.log 2>&1 # real 0m52.339s time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \ -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \ -continue=guides -stop=guides -workhorse=hgwdev hg38 knownGene) \ > guides.log 2>&1 # real 12m40.910s time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \ -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \ -continue=specScores -stop=specScores -workhorse=hgwdev hg38 \ knownGene) > specScores.log 2>&1 # this failed when trying to run on ku: # Traceback (most recent call last): # File "/hive/data/outside/crisprTrack/scripts/splitGuidesSpecScore.py", line 39, in <module> # guideCounts[guide[:20]]+=1 # MemoryError # Command failed: # ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' ku nice /hive/data/genomes/hg38/bed/crispr.10K/specScores/runSpecScores.bash # real 22m46.921s # run this command on hgwdev time /cluster/software/bin/python /hive/data/outside/crisprTrack/scripts/splitGuidesSpecScore.py ../allGuides.txt tmp/inFa jobNames.txt Writing tmp/inFa/1781201.fa Writing tmp/inFa/1781202.fa 124684218 sequences written 3368831 sequences removed as they are non-unique 0 sequences removed as they were already done before real 35m17.468s # then, continuing the kluster run: ssh ku cd /hive/data/genomes/hg38/bed/crispr.10K/specScores gensub2 jobNames.txt single gsub jobList time para create jobList # 1781204 jobs written to /hive/data/genomes/hg38/bed/crispr.10K/specScores/batch # real 15m24.369s + # hive cleaning 2021-04-27 + time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \ + -continue=cleanup hg38 -fileServer=hgwdev -buildDir=`pwd` \ + -smallClusterHub=hgwdev -bigClusterHub=ku -workhorse=hgwdev) \ + > cleanup.log 2>&1 + ###############################################################################