e20e93ca2a36c232d2fe3b2d918d03674f89d5d4
hiram
  Fri Jul 7 08:56:14 2017 -0700
the linked lists are too slow, maybe try arrays refs #18969

diff --git src/utils/crisprKmers/crisprKmers.c src/utils/crisprKmers/crisprKmers.c
index 8d2d122..a1e0304 100644
--- src/utils/crisprKmers/crisprKmers.c
+++ src/utils/crisprKmers/crisprKmers.c
@@ -1,83 +1,93 @@
 /* crisprKmers - find and annotate crispr sequences. */
 #include <popcntintrin.h>
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "dnaLoad.h"
 #include "portable.h"
 #include "basicBed.h"
 #include "sqlNum.h"
 #include "binRange.h"
+#include "obscure.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "crisprKmers - annotate crispr sequences\n"
   "usage:\n"
   "   crisprKmers <sequence>\n"
   "options:\n"
   "   where <sequence> is a .2bit file or .fa fasta sequence\n"
   "   -verbose=N - control processing steps with level of verbose:\n"
   "                default N=1 - only find crisprs, set up listings\n"
   "                N=2 - empty process crispr list into new list\n"
   "                N=3 - only count identicals during processing\n"
   "                N=4 - attempt measure all off targets and\n"
   "                      print out all crisprs as bed format\n"
   "   -bed=<file> - output kmers to given bed9+ file\n"
   "   -ranges=<file> - use specified bed3 file to limit which crisprs are\n"
-  "                  - measured, only those within bed items."
+  "                  - measured, only those with any overlap to these bed items.\n"
+  "   -dumpKmers=<file> - after scan of sequence, output kmers to file\n"
+  "                     - process will exit after this, use -loadKmers to continue\n"
+  "   -loadKmers=<file> - load kmers from previous scan of sequence from -dumpKmers\n"
+  "   NOTE: It is faster to simply scan the sequence to get the system ready\n"
+  "         to go.  Reading in the kmer file takes longer than scanning."
   );
 }
 
 static char *bedFileOut = NULL;	/* set with -bed=<file> argument */
 static char *ranges = NULL;	/* set with -ranges=<file> argument */
 static struct hash *rangesHash = NULL;	/* ranges into hash + binkeeper */
+static char *dumpKmers = NULL;	/* file name to write out kmers from scan */
+static char *loadKmers = NULL;	/* file name to read in kmers from previous scan */
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"bed", OPTION_STRING},
    {"ranges", OPTION_STRING},
+   {"dumpKmers", OPTION_STRING},
+   {"loadKmers", OPTION_STRING},
    {NULL, 0},
 };
 
 struct crispr
 /* one chromosome set of crisprs */
     {
     struct crispr *next;		/* Next in list. */
     long long sequence;	/* sequence value in 2bit format */
-    unsigned long long start;		/* chromosome start 0-relative */
+    long long start;		/* chromosome start 0-relative */
     char strand;			/* strand: + or - */
     int offBy0;		/* counting number of off targets, 0 mismatch */
     int offBy1;		/* counting number of off targets, 1 mismatch */
     int offBy2;		/* counting number of off targets, 2 mismatch */
     int offBy3;		/* counting number of off targets, 3 mismatch */
     int offBy4;		/* counting number of off targets, 4 mismatch */
     };
 
 struct crisprList
 /* all chromosome sets of crisprs */
     {
     struct crisprList *next;	/* Next in list. */
     char *chrom;		/* chrom name */
     int size;			/* chrom size */
     struct crispr *chromCrisprs;	/* all the crisprs on this chrom */
-    unsigned long long crisprCount;	/* number of crisprs on this chrom */
+    long long crisprCount;	/* number of crisprs on this chrom */
     };
 
 #define A_BASE	0
 #define C_BASE	1
 #define G_BASE	2
 #define T_BASE	3
 #define U_BASE	3
 static int orderedNtVal[256];	/* values in alpha order: ACGT 00 01 10 11 */
 				/* for easier sorting and complementing */
 static char bases[4];  /* for binary to ascii conversion */
 
 #define fortySixBits	0x3fffffffffff
 #define fortyEixhtBits	0xffffffffffff
 #define high32bits	0xffffffff00000000
 #define low32bits	0x00000000ffffffff
@@ -95,30 +105,41 @@
 {
 int i;
 for (i=0; i<ArraySize(orderedNtVal); i++)
     orderedNtVal[i] = -1;
 orderedNtVal['A'] = orderedNtVal['a'] = A_BASE;
 orderedNtVal['C'] = orderedNtVal['c'] = C_BASE;
 orderedNtVal['G'] = orderedNtVal['g'] = G_BASE;
 orderedNtVal['T'] = orderedNtVal['t'] = T_BASE;
 orderedNtVal['U'] = orderedNtVal['u'] = T_BASE;
 bases[A_BASE] = 'A';
 bases[C_BASE] = 'C';
 bases[G_BASE] = 'G';
 bases[T_BASE] = 'T';
 }	//	static void initOrderedNtVal()
 
+static void timingMessage(char *prefix, long long count, char *message,
+    long ms, char *units)
+{
+double perSecond = 0.0;
+if (ms > 0)
+    perSecond = 1000.0 * count / ms;
+
+verbose(1, "# %s: %lld %s @ %ld ms -> %.2f %s\n", prefix, count, message, ms, perSecond, units);
+}
+
+
 static struct hash *readRanges(char *bedFile)
 /* Read bed and return it as a hash keyed by chromName
  * with binKeeper values.  (from: src/hg/bedIntersect/bedIntersec.c) */
 {
 struct hash *hash = newHash(0);	/* key is chromName, value is binkeeper data */
 struct lineFile *lf = lineFileOpen(bedFile, TRUE);
 char *row[3];
 
 while (lineFileNextRow(lf, row, 3))
     {
     struct binKeeper *bk;
     struct bed3 *item;
     struct hashEl *hel = hashLookup(hash, row[0]);
     if (hel == NULL)
        {
@@ -209,41 +230,41 @@
 v = ((v & high8bits) >> 8) | ((v & low8bits) << 8);
 v = ((v & high4bits) >> 4) | ((v & low4bits) << 4);
 v = ((v & high2bits) >> 2) | ((v & low2bits) << 2);
 return v;
 }	//	static long long revComp(long long val)
 
 static struct crisprList *generateKmers(struct dnaSeq *seq)
 {
 struct crispr *crisprSet = NULL;
 struct crisprList *returnList = NULL;
 AllocVar(returnList);
 returnList->chrom = cloneString(seq->name);
 returnList->size = seq->size;
 int i;
 DNA *dna;
-unsigned long long chromPosition = 0;
-unsigned long long startGap = 0;
-unsigned long long gapCount = 0;
+long long chromPosition = 0;
+long long startGap = 0;
+long long gapCount = 0;
 int kmerLength = 0;
 long long kmerVal = 0;
 long long endsGG = (G_BASE << 2) | G_BASE;
 long long beginsCC = (long long)((C_BASE << 2) | C_BASE) << 42;
 long long reverseMask = (long long)0xf << 42;
 verbose(4, "#   endsGG: %032llx\n", endsGG);
 verbose(4, "# beginsCC: %032llx\n", beginsCC);
-verbose(4, "#  46 bits: %032llx\n", (unsigned long long) fortySixBits);
+verbose(4, "#  46 bits: %032llx\n", (long long) fortySixBits);
 
 dna=seq->dna;
 for (i=0; i < seq->size; ++i)
     {
     int val = orderedNtVal[(int)dna[i]];
     if (val >= 0)
 	{
         kmerVal = fortySixBits & ((kmerVal << 2) | val);
         ++kmerLength;
 	if (kmerLength > 22)
 	    {
 	    if (endsGG == (kmerVal & 0xf))	/* check positive strand */
                 {
                 struct crispr *oneCrispr = NULL;
                 AllocVar(oneCrispr);
@@ -262,105 +283,104 @@
                 slAddHead(&crisprSet, oneCrispr);
                 }
 	    }
         }	// if (val >= 0)
         else
             {
             ++gapCount;
             startGap = chromPosition;
             /* skip all N's == any value not = 0,1,2,3 */
             while ( ((i+1) < seq->size) && (orderedNtVal[(int)dna[i+1]] < 0))
                 {
                 ++chromPosition;
                 ++i;
                 }
             if (startGap != chromPosition)
-                verbose(4, "#GAP %s\t%llu\t%llu\t%llu\t%llu\t%s\n", seq->name, startGap, chromPosition, gapCount, chromPosition-startGap, "+");
+                verbose(4, "#GAP %s\t%lld\t%lld\t%lld\t%lld\t%s\n", seq->name, startGap, chromPosition, gapCount, chromPosition-startGap, "+");
             else
-                verbose(4, "#GAP %s\t%llu\t%llu\t%llu\t%llu\t%s\n", seq->name, startGap, chromPosition+1, gapCount, 1+chromPosition-startGap, "+");
+                verbose(4, "#GAP %s\t%lld\t%lld\t%lld\t%lld\t%s\n", seq->name, startGap, chromPosition+1, gapCount, 1+chromPosition-startGap, "+");
             kmerLength = 0;  /* reset, start over */
             kmerVal = 0;
             }	// else if (val >= 0)
     ++chromPosition;
     }	// for (i=0; i < seq->size; ++i)
 // slReverse(&crisprSet);	// save time, order not important at this time
 returnList->chromCrisprs = crisprSet;
 returnList->crisprCount = slCount(crisprSet);
 return returnList;
 }	// static struct crisprList *generateKmers(struct dnaSeq *seq)
 
 static void showCounts(struct crisprList *all)
 /* everything has been scanned and counted, print out all the data */
 {
 FILE *bedFH = NULL;
 if (bedFileOut)
     bedFH = mustOpen(bedFileOut, "w");
 
 verbose(1, "#\tprint out all data\n");
 struct crisprList *list;
-unsigned long long totalChecked = 0;
+long long totalChecked = 0;
 verbose(1, "# %d number of chromosomes to display\n", slCount(all));
 for (list = all; list; list = list->next)
     {
     struct crispr *c = NULL;
     struct crispr *next = NULL;
-    verbose(1, "# crisprs count %llu on chrom %s\n", list->crisprCount, list->chrom);
+    verbose(1, "# crisprs count %lld on chrom %s\n", list->crisprCount, list->chrom);
     for (c = list->chromCrisprs; c; c = next)
 	{
 	int negativeOffset = 0;
         if (c->strand == '-')
 	    negativeOffset = 3;
-	unsigned long long txStart = c->start + negativeOffset;
-	unsigned long long txEnd = c->start+20 + negativeOffset;
+	long long txStart = c->start + negativeOffset;
+	long long txEnd = c->start+20 + negativeOffset;
 	if (bedFH)
-	    fprintf(bedFH, "%s\t%llu\t%llu\t%s\t%d\t%c\t%llu\t%llu\t%s\t%d\t%d\t%d\t%d\n", list->chrom, c->start, c->start+23, kmerValToString(c, 3), c->offBy0, c->strand, txStart, txEnd, kmerPAMString(c), c->offBy1, c->offBy2, c->offBy3, c->offBy4);
-
-	verbose(4, "%s\t%llu\t%llu\t%s\t%d\t%c\t%llu\t%llu\t%d\t%d\t%d\t%d\n", list->chrom, c->start, c->start+23, kmerValToString(c, 3), c->offBy0, c->strand, txStart, txEnd, c->offBy1, c->offBy2, c->offBy3, c->offBy4);
+	    fprintf(bedFH, "%s\t%lld\t%lld\t%s\t%d\t%c\t%lld\t%lld\t%s\t%d\t%d\t%d\t%d\n", list->chrom, c->start, c->start+23, kmerValToString(c, 3), c->offBy0, c->strand, txStart, txEnd, kmerPAMString(c), c->offBy1, c->offBy2, c->offBy3, c->offBy4);
 
         if (c->offBy0)
-           verbose(3, "# identical: %d %s:%llu %c\t%s\n", c->offBy0, list->chrom, c->start, c->strand, kmerValToString(c, 3));
+           verbose(3, "# identical: %d %s:%lld %c\t%s\n", c->offBy0, list->chrom, c->start, c->strand, kmerValToString(c, 3));
 	next = c->next;
 	++totalChecked;
 	}
     }
-verbose(1, "# total crisprs displayed: %llu\n", totalChecked);
+verbose(1, "# total crisprs displayed: %lld\n", totalChecked);
+if (bedFH)
+    carefulClose(&bedFH);
 }	// static void showCounts(struct crisprList *all)
 
-static unsigned long long countOffTargets(char *chrom, struct crispr *c, struct crisprList *all)
+static long long countOffTargets(char *chrom, struct crispr *c, struct crisprList *all)
 /* given one crispr c, scan against all others to find off targets */
 {
 struct crisprList *listPtr;
-unsigned long long totalCompares = 0;
+long long totalCompares = 0;
 for (listPtr = all; listPtr; listPtr = listPtr->next)
     {
     struct crispr *crisprPtr = NULL;
     struct crispr *next = NULL;
     for (crisprPtr = listPtr->chromCrisprs; crisprPtr; crisprPtr = next)
 	{
 	/* the XOR will determine differences in two sequences
 	 *  the shift right 6 removes the PAM sequence
 	 */
-        unsigned long long misMatch = (c->sequence ^ crisprPtr->sequence) >> 6;
+        long long misMatch = (c->sequence ^ crisprPtr->sequence) >> 6;
         if (! misMatch)	/* no misMatch, identical crisprs */
 	    {
             c->offBy0 += 1;
             crisprPtr->offBy0 += 1;
-//            verbose(4, "# identical: %d %s:%llu %c == %s:%llu %c\t%s == %s\n", c->offBy0, chrom, c->start, c->strand, listPtr->chrom, crisprPtr->start, crisprPtr->strand, kmerValToString(c, 0), kmerValToString(crisprPtr, 0));
 	    }
         else
 	    {
-            if (verboseLevel() > 3)
+            if (verboseLevel() > 2)
 		{
                 /* possible misMatch bit values: 01 10 11
 		 *  turn those three values into just: 01
 		 */
 		misMatch = (misMatch | (misMatch > 1)) & 0x5555555555;
 		int bitsOn = _mm_popcnt_u64(misMatch);
                 switch(bitsOn)
                     {
                     case 1:
                         c->offBy1 += 1;
                         crisprPtr->offBy1 += 1;
                         break;
                     case 2:
                         c->offBy2 += 1;
                         crisprPtr->offBy2 += 1;
@@ -369,83 +389,83 @@
                         c->offBy3 += 1;
                         crisprPtr->offBy3 += 1;
                         break;
                     case 4:
                         c->offBy4 += 1;
                         crisprPtr->offBy4 += 1;
                         break;
                     default: break;
                     }
                 }
 	    }
         ++totalCompares;
 	next = crisprPtr->next;
 	}
     }
-// verbose(3,"#\ttotal comparisons %llu\n", totalCompares);
 return totalCompares;
 }	// static void countOffTargets( . . . )
 
 static struct crisprList *scanSequence(char *inFile)
 /* scan the given file, return list of crisprs */
 {
 verbose(1, "#\tscanning sequence file: %s\n", inFile);
 dnaUtilOpen();
 struct dnaLoad *dl = dnaLoadOpen(inFile);
 struct dnaSeq *seq;
 struct crisprList *listReturn = NULL;
 
-double perSecond = 0.0;
 long elapsedMs = 0;
 long scanStart = clock1000();
-unsigned long long totalCrisprs = 0;
+long long totalCrisprs = 0;
 
 /* scanning all sequences, setting up crisprs on the listReturn */
 while ((seq = dnaLoadNext(dl)) != NULL)
     {
+    if (startsWithNoCase("chrUn", seq->name) ||
+         rStringIn("hap", seq->name) || rStringIn("_alt", seq->name) )
+	{
+	verbose(1, "# skip chrom: %s\n", seq->name);
+	continue;
+	}
+
     long startTime = clock1000();
     struct crisprList *oneList = generateKmers(seq);
     slAddHead(&listReturn, oneList);
-    elapsedMs = clock1000() - startTime;
     totalCrisprs += oneList->crisprCount;
-    perSecond = 0.0;
-    if (elapsedMs > 0)
-	perSecond = 1000.0 * oneList->crisprCount / elapsedMs;
-    verbose(1, "# %s, %d bases, %llu crisprs @ %ld ms -> %.2f crisprs/sec\n",
-	seq->name, seq->size, oneList->crisprCount, elapsedMs, perSecond);
+    elapsedMs = clock1000() - startTime;
+    timingMessage(seq->name, oneList->crisprCount, "crisprs", elapsedMs, "crisprs/sec");
     }
 
 elapsedMs = clock1000() - scanStart;
-perSecond = 0.0;
-if (elapsedMs > 0)
-    perSecond = 1000.0 * totalCrisprs / elapsedMs;
-verbose(1, "# %llu total crisprs @ %ld ms -> %.2f crisprs/sec\n", totalCrisprs, elapsedMs, perSecond);
-
+timingMessage("scanSequence", totalCrisprs, "total crisprs", elapsedMs, "crisprs/sec");
 return listReturn;
 }	/*	static crisprList *scanSequence(char *inFile)	*/
 
 static struct crisprList *rangeExtraction(struct crisprList *all)
 /* given ranges in global rangesHash, construct new list of crisprs that
  *     have any type of overlap with the ranges, also extract those items from
  *         the all list.  Returns new list.
  */
 {
 struct crisprList *listReturn = NULL;
 struct crisprList *listPtr = NULL;
 int inputChromCount = slCount(all);
-unsigned long long returnListCrisprCount = 0;
-unsigned long long examinedCrisprCount = 0;
+long long returnListCrisprCount = 0;
+long long examinedCrisprCount = 0;
+
+long elapsedMs = 0;
+long scanStart = clock1000();
 
 for (listPtr = all; listPtr; listPtr = listPtr->next)
     {
     struct crispr *c = NULL;
     struct binKeeper *bk = hashFindVal(rangesHash, listPtr->chrom);
     struct crispr *newCrispr = NULL;
     if (bk != NULL)
         {
 	struct crispr *prevCrispr = NULL;
 	struct crispr *next = NULL;
         for (c = listPtr->chromCrisprs; c; c = next)
             {
 	    ++examinedCrisprCount;
             struct binElement *hitList = NULL;
 	    next = c->next;	// remember before perhaps lost
@@ -468,193 +488,303 @@
 	    slFreeList(&hitList);
             }
 	}
     if (newCrispr)
 	{
 	struct crisprList *newItem = NULL;
 	AllocVar(newItem);
 	newItem->chrom = cloneString(listPtr->chrom);
 	newItem->size = listPtr->size;
 	newItem->chromCrisprs = newCrispr;
 	newItem->crisprCount = slCount(newCrispr);
 	slAddHead(&listReturn, newItem);
 	}
     }	//	for (listPtr = all; listPtr; listPtr = listPtr->next)
 
-verbose(1, "# range limits, input %d chromosomes, return %d chromosomes\n",
+elapsedMs = clock1000() - scanStart;
+verbose(1, "# range scan, input %d chromosomes, return %d chromosomes\n",
     inputChromCount, slCount(listReturn));
-verbose(1, "# range limits, input %llu crisprs, return %llu crisprs\n",
-    examinedCrisprCount, returnListCrisprCount);
+timingMessage("range scan", examinedCrisprCount, "examined crisprs", elapsedMs, "crisprs/sec");
+timingMessage("range scan", returnListCrisprCount, "returned crisprs", elapsedMs, "crisprs/sec");
 return listReturn;
 }	//	static crisprList *rangeExtraction(crisprList *all)
 
 static struct crisprList *queryVsAll(struct crisprList *query,
     struct crisprList *target)
 /* run the query crisprs list against the target list */
 {
 struct crisprList *listReturn = NULL;
 
-unsigned long long totalCrisprsQuery = 0;
-unsigned long long totalCompares = 0;
+long long totalCrisprsQuery = 0;
+long long totalCompares = 0;
 int targetChrCount = slCount(target);
 int queryChrCount = slCount(query);
 verbose(1, "# queryVsAll: target %d chromosomes vs. query %d chromosomes\n",
     targetChrCount, queryChrCount);
 
 struct crisprList *listPtr = NULL;
 long processStart = clock1000();
 long elapsedMs = 0;
-double perSecond = 0.0;
 
 for (listPtr = query; listPtr; listPtr = listPtr->next)
     {
     struct crispr *c = NULL;
     totalCrisprsQuery += listPtr->crisprCount;
-    verbose(1, "# queryVsAll: crispr count: %llu on %s\n", listPtr->crisprCount, listPtr->chrom);
-    int crisprsDone = 0;
+    verbose(1, "# queryVsAll: crispr count: %lld on %s\n", listPtr->crisprCount, listPtr->chrom);
+    long long crisprsDone = 0;
     for (c = listPtr->chromCrisprs; c; c = c->next)
         {
-        verbose(4, "%s\t%s\t%llu\t%c\n", kmerValToString(c, 0), listPtr->chrom, c->start, c->strand);
+        verbose(4, "%s\t%s\t%lld\t%c\n", kmerValToString(c, 0), listPtr->chrom, c->start, c->strand);
         totalCompares += countOffTargets(listPtr->chrom, c, target);
         ++crisprsDone;
+	if (crisprsDone < 9)
+	    {
 	    elapsedMs = clock1000() - processStart;
-perSecond = 0.0;
-if (elapsedMs > 0)
-perSecond = 1000.0 * crisprsDone / elapsedMs;
-verbose(1, "# queryVsAll: processed %d crisprs @ %ld ms -> %.2f crisprs/sec\n", crisprsDone, elapsedMs, perSecond);
-verbose(1, "# %d %s:%llu %c %s, offBy0: %d\n", crisprsDone, listPtr->chrom, c->start, c->strand, kmerValToString(c, 0), c->offBy0);
-perSecond = 0.0;
-if (elapsedMs > 0)
-perSecond = 1000.0 * totalCompares / elapsedMs;
-verbose(1, "# queryVsAll: processed %d crisprs total compares %llu @ %ld ms -> %.2f crisprs/sec\n", crisprsDone, totalCompares, elapsedMs, perSecond);
+	    timingMessage("queryVsAll", totalCompares, "total comparisons", elapsedMs, "compares/sec");
+	    }
         }
     }	// for (listPtr = target; listPtr; listPtr = nextChr)
 
 elapsedMs = clock1000() - processStart;
-perSecond = 0.0;
-if (elapsedMs > 0)
-    perSecond = 1000.0 * totalCrisprsQuery / elapsedMs;
-verbose(1, "# queryVsAll: %llu total crisprs processed @ %ld ms -> %.2f crisprs/sec\n", totalCrisprsQuery, elapsedMs, perSecond);
-perSecond = 0.0;
-if (elapsedMs > 0)
-    perSecond = 1000.0 * totalCompares / elapsedMs;
-verbose(1, "# queryVsAll: %llu total comparisons @ %ld ms -> %.2f crisprs/sec\n", totalCompares, elapsedMs, perSecond);
+timingMessage("queryVsAll", totalCrisprsQuery, "crisprs processed", elapsedMs, "crisprs/sec");
+timingMessage("queryVsAll", totalCompares, "total comparisons", elapsedMs, "compares/sec");
 
 return listReturn;
 }
 
 static struct crisprList *allVsAll(struct crisprList *all,
     struct crisprList *addTo)
 /* run the all list against itself, add to 'addTo' if given */
 {
 struct crisprList *listReturn = NULL;
 if (addTo)
     listReturn = addTo;
 
-unsigned long long totalCrisprsIn = 0;
-unsigned long long totalCrisprsOut = 0;
-unsigned long long totalCompares = 0;
+long long totalCrisprsIn = 0;
+long long totalCrisprsOut = 0;
+long long totalCompares = 0;
 int chrCount = slCount(all);
 verbose(1, "# allVsAll: crispr list, scanning %d chromosomes, now processing . . .\n", chrCount);
 struct crisprList *listPtr = NULL;
 struct crisprList *nextChr = NULL;
 long processStart = clock1000();
 double perSecond = 0.0;
 long elapsedMs = 0;
 
 for (listPtr = all; listPtr; listPtr = nextChr)
     {
     struct crispr *newCrispr = NULL;
     struct crispr *c = NULL;
     totalCrisprsIn += listPtr->crisprCount;	// to verify same in and out
-    verbose(1, "# allVsAll: crispr count: %llu on %s\n", listPtr->crisprCount, listPtr->chrom);
+    verbose(1, "# allVsAll: crispr count: %lld on %s\n", listPtr->crisprCount, listPtr->chrom);
     struct crispr *next;
     int crisprsDone = 0;
     for (c = listPtr->chromCrisprs; c; c = next)
         {
-        verbose(4, "%s\t%s\t%llu\t%c\n", kmerValToString(c, 0), listPtr->chrom, c->start, c->strand);
+        verbose(4, "%s\t%s\t%lld\t%c\n", kmerValToString(c, 0), listPtr->chrom, c->start, c->strand);
         next = c->next;	// remember before lost
         c->next = NULL;	// taking this one out of list
         listPtr->chromCrisprs = next; // this first item removed from list
         if ((verboseLevel() > 2) && next)
             totalCompares += countOffTargets(listPtr->chrom, c, all);
         slAddHead(&newCrispr, c);	// constructing new list
         ++crisprsDone;
 elapsedMs = clock1000() - processStart;
 perSecond = 0.0;
 if (elapsedMs > 0)
 perSecond = 1000.0 * crisprsDone / elapsedMs;
 verbose(1, "# allVsAll: processed %d crisprs @ %ld ms -> %.2f crisprs/sec\n", crisprsDone, elapsedMs, perSecond);
-verbose(1, "# %d %s:%llu %c %s, offBy0: %d\n", crisprsDone, listPtr->chrom, c->start, c->strand, kmerValToString(c, 0), c->offBy0);
+verbose(1, "# %d %s:%lld %c %s, offBy0: %d\n", crisprsDone, listPtr->chrom, c->start, c->strand, kmerValToString(c, 0), c->offBy0);
 perSecond = 0.0;
 if (elapsedMs > 0)
 perSecond = 1000.0 * totalCompares / elapsedMs;
-verbose(1, "# allVsAll: processed %d crisprs total compares %llu @ %ld ms -> %.2f crisprs/sec\n", crisprsDone, totalCompares, elapsedMs, perSecond);
+verbose(1, "# allVsAll: processed %d crisprs total compares %lld @ %ld ms -> %.2f crisprs/sec\n", crisprsDone, totalCompares, elapsedMs, perSecond);
         }
     nextChr = listPtr->next;	// remember before lost
     listPtr->next = NULL;		// taking out of list
     listPtr->chromCrisprs = newCrispr;	// the new crispr list
     listPtr->crisprCount = slCount(newCrispr);	// the new crispr list
     totalCrisprsOut += listPtr->crisprCount;	// to verify correct
     all = nextChr;		// removes this one from list
     slAddHead(&listReturn, listPtr);
     }	// for (listPtr = all; listPtr; listPtr = nextChr)
 
 if (slCount(listReturn) != chrCount)
     verbose(1, "#\tERROR: transferred crispr list chrom count %d != beginning count %d\n", slCount(listReturn), chrCount);
 if (totalCrisprsIn != totalCrisprsOut)
-    verbose(1, "#\tERROR: initial crispr list count %llu != output count %llu\n", totalCrisprsIn, totalCrisprsOut);
+    verbose(1, "#\tERROR: initial crispr list count %lld != output count %lld\n", totalCrisprsIn, totalCrisprsOut);
 elapsedMs = clock1000() - processStart;
 perSecond = 0.0;
 if (elapsedMs > 0)
     perSecond = 1000.0 * totalCrisprsIn / elapsedMs;
-verbose(1, "# %llu total crisprs processed @ %ld ms -> %.2f crisprs/sec\n", totalCrisprsIn, elapsedMs, perSecond);
+verbose(1, "# %lld total crisprs processed @ %ld ms -> %.2f crisprs/sec\n", totalCrisprsIn, elapsedMs, perSecond);
 perSecond = 0.0;
 if (elapsedMs > 0)
     perSecond = 1000.0 * totalCompares / elapsedMs;
-verbose(1, "# %llu total comparisons @ %ld ms -> %.2f crisprs/sec\n", totalCompares, elapsedMs, perSecond);
+verbose(1, "# %lld total comparisons @ %ld ms -> %.2f crisprs/sec\n", totalCompares, elapsedMs, perSecond);
 
 return listReturn;
+}	//	static struct crisprList *allVsAll(struct crisprList *all,
+			// struct crisprList *addTo)
+
+static struct crisprList *readKmers(char *fileIn)
+/* read in kmer list from 'fileIn', return list structure */
+{
+verbose(1, "# reading crisprs from: %s\n", fileIn);
+struct crisprList *listReturn = NULL;
+struct lineFile *lf = lineFileOpen(fileIn, TRUE);
+char *row[10];
+int wordCount = 0;
+long long crisprsInput = 0;
+
+long startMs = clock1000();
+
+while (0 < (wordCount = lineFileChopNextTab(lf, row, ArraySize(row))) )
+    {
+    if (3 != wordCount)
+	errAbort("expecing three words at line %d in file %s, found: %d",
+	    lf->lineIx, fileIn, wordCount);
+    struct crisprList *newItem = NULL;
+    AllocVar(newItem);
+    newItem->chrom = cloneString(row[0]);
+    newItem->crisprCount = sqlLongLong(row[1]);
+    newItem->size = sqlLongLong(row[2]);
+    newItem->chromCrisprs = NULL;
+    slAddHead(&listReturn, newItem);
+    long long verifyCount = 0;
+    while ( (verifyCount < newItem->crisprCount) &&  (0 < (wordCount = lineFileChopNextTab(lf, row, ArraySize(row)))) )
+	{
+        if (3 != wordCount)
+	    errAbort("expecing three words at line %d in file %s, found: %d",
+		lf->lineIx, fileIn, wordCount);
+        ++verifyCount;
+        struct crispr *oneCrispr = NULL;
+        AllocVar(oneCrispr);
+        oneCrispr->sequence = sqlLongLong(row[0]);
+        oneCrispr->start = sqlLongLong(row[1]);
+        oneCrispr->strand = row[2][0];
+        oneCrispr->offBy0 = 0;
+        oneCrispr->offBy1 = 0;
+        oneCrispr->offBy2 = 0;
+        oneCrispr->offBy3 = 0;
+        oneCrispr->offBy4 = 0;
+        slAddHead(&newItem->chromCrisprs, oneCrispr);
+	}
+    if (verifyCount != newItem->crisprCount)
+	errAbort("expecting %lld kmer items at line %d in file %s, found: %lld",
+	    newItem->crisprCount, lf->lineIx, fileIn, verifyCount);
+    crisprsInput += verifyCount;
+    }
+
+lineFileClose(&lf);
+
+long elapsedMs = clock1000() - startMs;
+timingMessage("readKmers", crisprsInput, "crisprs read in", elapsedMs, "crisprs/sec");
+
+return listReturn;
+}	//	static struct crisprList *readKmers(char *fileIn)
+
+static void writeKmers(struct crisprList *all, char *fileOut)
+/* write kmer list 'all' to 'fileOut' */
+{
+FILE *fh = mustOpen(fileOut, "w");
+struct crisprList *list = NULL;
+long long crisprsWritten = 0;
+
+long startMs = clock1000();
+
+slReverse(&all);
+for (list = all; list; list = list->next)
+    {
+    fprintf(fh, "%s\t%lld\t%d\n", list->chrom, list->crisprCount, list->size);
+    struct crispr *c = NULL;
+    slReverse(&list->chromCrisprs);
+    for (c = list->chromCrisprs; c; c = c->next)
+	{
+	fprintf(fh, "%lld\t%lld\t%c\n", c->sequence,
+	    c->start, c->strand);
+	++crisprsWritten;
+	}
     }
+carefulClose(&fh);
+
+long elapsedMs = clock1000() - startMs;
+timingMessage("writeKmers", crisprsWritten, "crisprs written", elapsedMs, "crisprs/sec");
+}	//	static void writeKmers(struct crisprList *all, char *fileOut)
 
 static void crisprKmers(char *sequence)
 /* crisprKmers - find and annotate crispr sequences. */
 {
-struct crisprList *allCrisprs = scanSequence(sequence);
 struct crisprList *queryCrisprs = NULL;
 struct crisprList *countedCrisprs = NULL;
+struct crisprList *allCrisprs = NULL;
+
+
+if (loadKmers)
+    allCrisprs = readKmers(loadKmers);
+else
+    allCrisprs = scanSequence(sequence);
+
+#ifdef NOT
+// timing walking through the linked lists
+long startMs = clock1000();
+long long kmerCount = 0;
+struct crisprList *list = NULL;
+for (list = allCrisprs; list; list = list->next)
+    kmerCount += slCount(list->chromCrisprs);
+long elapsedMs = clock1000() - startMs;
+timingMessage("slCount", kmerCount, "kmers counted", elapsedMs, "kmers/sec");
+startMs = clock1000();
+slReverse(&allCrisprs);
+for (list = allCrisprs; list; list = list->next)
+    slReverse(&list->chromCrisprs);
+elapsedMs = clock1000() - startMs;
+timingMessage("slReverse", kmerCount, "kmers reversed", elapsedMs, "kmers/sec");
+
+return;
+#endif
+
+if (dumpKmers)
+    {
+    writeKmers(allCrisprs, dumpKmers);
+    return;
+    }
 
 /* processing those crisprs */
 if (verboseLevel() > 1)
     {
     /* if ranges have been specified, construct queryList of kmers to measure */
     if (rangesHash)
         {
         queryCrisprs = rangeExtraction(allCrisprs);
         /* first run up query vs. all, no list mucking about */
         countedCrisprs = queryVsAll(queryCrisprs, allCrisprs);
         /* then run up query vs. itself, add to countedCrisprs */
         countedCrisprs = allVsAll(queryCrisprs, countedCrisprs);
         }
     else
         countedCrisprs = allVsAll(allCrisprs, NULL);
 
     showCounts(countedCrisprs);
     }
 }	// static void crisprKmers(char *sequence)
 
 int main(int argc, char *argv[])
 /* Process command line, initialize translation arrays and call the process */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
 bedFileOut = optionVal("bed", bedFileOut);
+dumpKmers = optionVal("dumpKmers", dumpKmers);
+loadKmers = optionVal("loadKmers", loadKmers);
 ranges = optionVal("ranges", ranges);
 if (ranges)
     rangesHash = readRanges(ranges);
 
 initOrderedNtVal();	/* set up orderedNtVal[] */
 crisprKmers(argv[1]);
+if (verboseLevel() > 1)
+    printVmPeak();
 return 0;
 }