3f6442f9cbf277adf44d00d78a5aaf53e5b0e4c7
hiram
  Thu Jul 20 12:47:20 2017 -0700
failed experiment to utilize CPU caching, does not help, just for the record checked in, refs #18969

diff --git src/utils/crisprKmers/crisprKmers.c src/utils/crisprKmers/crisprKmers.c
index 7460eac..0f6a72c 100644
--- src/utils/crisprKmers/crisprKmers.c
+++ src/utils/crisprKmers/crisprKmers.c
@@ -60,58 +60,66 @@
 #include "sqlNum.h"
 #include "binRange.h"
 #include "obscure.h"
 #include "memalloc.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "crisprKmers - annotate crispr guide sequences\n"
   "usage:\n"
   "   crisprKmers <sequence>\n"
   "options:\n"
   "   where <sequence> is a .2bit file or .fa fasta sequence\n"
   "   -verbose=N - for debugging control processing steps with level of verbose:\n"
+  "              - verbose < 2 - scan sequence only, no comparisons\n"
+  "              - verbose > 1 - run comparisons\n"
   "   -bed=<file> - output results to given bed9+ file\n"
   "   -offTargets=<file> - output off target data to given file\n"
   "   -ranges=<file> - use specified bed3 file to limit which guides are\n"
   "                  - measured, only those with any overlap to these bed items.\n"
   "   -memLimit=N - N number of gigabytes for each thread, default: no threading\n"
   "               when memory size goes beyond N gB program will thread gB/N times\n"
+  "   -cacheCount=N - number of guides to check at one time vs. targets,\n"
+  "                 - to take advantage of CPU cache, e.g. 1Mb of cache\n"
+  "                 - could hold 16,384 64bit words.\n"
   "   -dumpKmers=<file> - NOT VALID after scan of sequence, output kmers to file\n"
   "                     - process will exit after this, use -loadKmers to continue\n"
   "   -loadKmers=<file> - NOT VALID load kmers from previous scan of sequence from -dumpKmers"
   );
 }
 
 static char *bedFileOut = NULL;	/* set with -bed=<file> argument, kmer output */
 static char *offTargets = NULL;	/* write offTargets to <file> */
 static FILE *offFile = NULL;	/* file handle to write off targets */
 static char *ranges = NULL;	/* use ranges <file> to limit scanning */
 static struct hash *rangesHash = NULL;	/* ranges into hash + binkeeper */
 static char *dumpKmers = NULL;	/* file name to write out kmers from scan */
 static char *loadKmers = NULL;	/* file name to read in kmers from previous scan */
 static int memLimit = 0;	/* gB limit before going into thread mode */
+static int cacheCount = 0;	/* number of query guides to run through
+				 * for each target to utilize CPU cache */
 static int threadCount = 1;	/* will be adjusted depending upon vmPeak */
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"bed", OPTION_STRING},
    {"offTargets", OPTION_STRING},
    {"ranges", OPTION_STRING},
    {"memLimit", OPTION_INT},
+   {"cacheCount", OPTION_INT},
    {"dumpKmers", OPTION_STRING},
    {"loadKmers", OPTION_STRING},
    {NULL, 0},
 };
 
 #define	guideSize	20	// 20 bases
 #define	pamSize		3	//  3 bases
 #define negativeStrand	0x0000800000000000
 
 #define fortyEightBits	0xffffffffffff
 #define fortySixBits	0x3fffffffffff
 #define fortyBits	0xffffffffff
 #define high32bits	0xffffffff00000000
 #define low32bits	0x00000000ffffffff
 #define high16bits	0xffff0000ffff0000
@@ -199,41 +207,43 @@
 int i;
 for (i=0; i<ArraySize(orderedNtVal); i++)
     orderedNtVal[i] = -1;
 orderedNtVal['A'] = orderedNtVal['a'] = A_BASE;
 orderedNtVal['C'] = orderedNtVal['c'] = C_BASE;
 orderedNtVal['G'] = orderedNtVal['g'] = G_BASE;
 orderedNtVal['T'] = orderedNtVal['t'] = T_BASE;
 orderedNtVal['U'] = orderedNtVal['u'] = T_BASE;
 bases[A_BASE] = 'A';
 bases[C_BASE] = 'C';
 bases[G_BASE] = 'G';
 bases[T_BASE] = 'T';
 }	//	static void initOrderedNtVal()
 
 static void timingMessage(char *prefix, long long count, char *message,
-    long ms, char *units, char *invUnits)
+    long startMs, char *units, char *invUnits)
 {
+long elapsedMs = clock1000() - startMs;
 double perSecond = 0.0;
 double inverse = 0.0;
-if ((ms > 0) && (count > 0))
+if ((elapsedMs > 0) && (count > 0))
     {
-    perSecond = 1000.0 * count / ms;
+    perSecond = 1000.0 * count / elapsedMs;
     inverse = 1.0 / perSecond;
     }
 
-verbose(1, "# %s: %lld %s @ %ld ms\n#\t%.2f %s == %g %s\n", prefix, count, message, ms, perSecond, units, inverse, invUnits);
+verbose(1, "# %s: %lld %s @ %ld ms\n#\t%.2f %s == %g %s\n", prefix, count,
+    message, elapsedMs, perSecond, units, inverse, invUnits);
 }
 
 static void setLoopEnds(struct loopControl *control, long long listSize,
     int partCount, int partNumber)
 /* for multiple thread processing, given a list of listSize,
  #    a partCount and a partNumber
  *    return listStart, listEnd indexes in control structure
  */
 {
 long long partSize = 1 + listSize / partCount;
 long long listStart = partSize * partNumber;
 long long listEnd = partSize * (partNumber + 1);
 if (listEnd > listSize)
     listEnd = listSize;
 control->listStart = listStart;
@@ -339,78 +349,79 @@
  *     masking does the reversing.
  */
 register long long v = (val ^ fortySixBits) << 18;
 v = ((v & high32bits) >> 32) | ((v & low32bits) << 32);
 v = ((v & high16bits) >> 16) | ((v & low16bits) << 16);
 v = ((v & high8bits) >> 8) | ((v & low8bits) << 8);
 v = ((v & high4bits) >> 4) | ((v & low4bits) << 4);
 v = ((v & high2bits) >> 2) | ((v & low2bits) << 2);
 return v;
 }	//	static long long revComp(long long val)
 
 static void copyToArray(struct crisprList *list)
 /* copy the crispr list data into arrays */
 {
 long startTime = clock1000();
-struct crisprList *cl = NULL;
+struct crisprList *cl;
 long long itemsCopied = 0;
 
 for (cl = list; cl; cl = cl->next)
     {
     size_t memSize = cl->crisprCount * sizeof(long long);
     cl->sequence = (long long *)needLargeMem(memSize);
     cl->start = (long long *)needLargeMem(memSize);
     memSize = 5 * sizeof(int *);
     cl->offBy = (int **)needLargeMem(memSize);
     memSize = cl->crisprCount * sizeof(int);
-    int r = 0;
+    int r;
     for (r = 0; r < 5; ++r)
         cl->offBy[r] = (int *)needLargeZeroedMem(memSize);
     memSize = cl->crisprCount * sizeof(float);
     cl->mitSum = (float *)needLargeMem(memSize);
 
     long long i = 0;
-    struct crispr *c = NULL;
+    struct crispr *c;
     for (c = cl->chromCrisprs; c; c = c->next)
         {
 	++itemsCopied;
         cl->sequence[i] = c->sequence;
         cl->start[i] = c->start;
         cl->mitSum[i] = 0.0;
 	++i;
         }
     }
-long elapsedMs = clock1000() - startTime;
-timingMessage("copyToArray", itemsCopied, "items copied", elapsedMs, "items/sec", "seconds/item");
+
+timingMessage("copyToArray", itemsCopied, "items copied", startTime,
+    "items/sec", "seconds/item");
+
 }	//	static void copyToArray(struct crisprList *list)	*/
 
 static struct crisprList *generateKmers(struct dnaSeq *seq)
 {
 struct crispr *crisprSet = NULL;
 struct crisprList *returnList = NULL;
 AllocVar(returnList);
 returnList->chrom = cloneString(seq->name);
 returnList->size = seq->size;
 int i;
 DNA *dna;
 long long chromPosition = 0;
 long long startGap = 0;
 long long gapCount = 0;
 int kmerLength = 0;
 long long kmerVal = 0;
 long long reverseMask = (long long)0xf << 42;
-verbose(4, "#  46 bits: %032llx\n", (long long) fortySixBits);
 
 dna=seq->dna;
 for (i = 0; i < seq->size; ++i)
     {
     int val = orderedNtVal[(int)dna[i]];
     if (val >= 0)
 	{
         kmerVal = fortySixBits & ((kmerVal << 2) | val);
         ++kmerLength;
 	if (kmerLength > 22)
 	    {
 	    if ((endsAG == (kmerVal & 0xf)) || (endsGG == (kmerVal & 0xf)))
                 {	/* have match for positive strand */
                 struct crispr *oneCrispr = NULL;
                 AllocVar(oneCrispr);
@@ -443,177 +454,173 @@
             if (startGap != chromPosition)
                 verbose(4, "#GAP %s\t%lld\t%lld\t%lld\t%lld\t%s\n", seq->name, startGap, chromPosition, gapCount, chromPosition-startGap, "+");
             else
                 verbose(4, "#GAP %s\t%lld\t%lld\t%lld\t%lld\t%s\n", seq->name, startGap, chromPosition+1, gapCount, 1+chromPosition-startGap, "+");
             kmerLength = 0;  /* reset, start over */
             kmerVal = 0;
             }	// else if (val >= 0)
     ++chromPosition;
     }	// for (i = 0; i < seq->size; ++i)
 // slReverse(&crisprSet);	// save time, order not important at this time
 returnList->chromCrisprs = crisprSet;
 returnList->crisprCount = slCount(crisprSet);
 return returnList;
 }	// static struct crisprList *generateKmers(struct dnaSeq *seq)
 
-static char *itemColor(int **offBy, long long index)
-/* temporary itemColor, post processing will do this correctly */
-{
-static char *notUnique = "150,150,150";
-// static char *notUniqueAlt = "150,150,150";
-static char *twoMany = "120,120,120";
-// static char *twoManyAlt = "120,120,120";
-static char *highScore = "0,255,0";	// > 70
-static char *mediumScore = "128,128,0";	// > 50
-static char *lowScore = "255,0,0";	// >= 50
-// static char *lowestScore = "80,80,80";	// < 50
-// static char *lowestScoreAlt = "80,80,80";	// < 50
-if (offBy[0][index])
-    return notUnique;
-else
-    {
-    int offSum = offBy[1][index] + offBy[2][index] + offBy[3][index] +
-		offBy[4][index];
-    if (offSum < 100)
-       return highScore;
-    else if (offSum < 150)
-       return mediumScore;
-    else if (offSum < 250)
-       return lowScore;
+// for future reference, note the Cpf1 system:
+//	https://benchling.com/pub/cpf1
+
+static char *scoreToColor(long mitScore)
+/* following scheme from Max's python scripts */
+{
+static char *green = "50,205,50";	/* #32cd32 */
+static char *yellow = "255,255,0";	/* #ffff00 */
+static char *black = "0,0,0";
+static char *red = "170,1,20";		/* #aa0114 */
+if (mitScore > 50)
+    return green;
+else if (mitScore > 20)
+    return yellow;
+else if (mitScore == -1)
+    return black;
 else
-       return twoMany;
-    }
+    return red;
 }
 
 static void countsOutput(struct crisprList *all, FILE *bedFH)
 /* everything has been scanned and counted, print out all the data from arrays*/
 {
 long startTime = clock1000();
 long long itemsOutput = 0;
 
 struct crisprList *list;
 long long totalOut = 0;
 for (list = all; list; list = list->next)
     {
-    long long c = 0;
+    long long c;
     for (c = 0; c < list->crisprCount; ++c)
         {
 	++itemsOutput;
 	int negativeOffset = 0;
         char strand = '+';
         if (negativeStrand & list->sequence[c])
 	    {
             strand = '-';
 	    negativeOffset = pamSize;
 	    }
 	long long txStart = list->start[c] + negativeOffset;
 	long long txEnd = txStart + guideSize;
 
         int mitScoreCount = + list->offBy[1][c] +
 	    list->offBy[2][c] + list->offBy[3][c] + list->offBy[4][c];
 
-        char *color = itemColor(list->offBy, c);
         char kmerString[33];
         kmerValToString(kmerString, list->sequence[c], pamSize);
         char pamString[33];
         kmerPAMString(pamString, list->sequence[c]);
 
 	if (bedFH)
 	    {
-	    long mitScore = 0;
+	    long mitScore = 1.0;
             if (mitScoreCount > 0)  /* note: interger <- float conversion  */
+	    {
 		mitScore = roundf((list->mitSum[c] / (float) mitScoreCount));
-	    fprintf(bedFH, "%s\t%lld\t%lld\t%d,%d,%d,%d,%d\t%d\t%c\t%lld\t%lld\t%s\t%s\t%s\t%ld\t%d\t%d\t%d\t%d\n", list->chrom, list->start[c], list->start[c]+pamSize+guideSize, list->offBy[0][c], list->offBy[1][c], list->offBy[2][c], list->offBy[3][c], list->offBy[4][c], list->offBy[0][c], strand, txStart, txEnd, color, kmerString, pamString, mitScore, list->offBy[1][c], list->offBy[2][c], list->offBy[3][c], list->offBy[4][c]);
+	    }
+	    else if (list->offBy[0][c] > 0)
+	    {
+		mitScore = 0.0;
+	    }
+            char *color = scoreToColor(mitScore);
+	    fprintf(bedFH, "%s\t%lld\t%lld\t\t%ld\t%c\t%lld\t%lld\t%s\t%s\t%s\t%ld\t%d,%d,%d,%d,%d\n", list->chrom, list->start[c], list->start[c]+pamSize+guideSize, mitScore, strand, txStart, txEnd, color, kmerString, pamString, mitScore, list->offBy[0][c], list->offBy[1][c], list->offBy[2][c], list->offBy[3][c], list->offBy[4][c]);
 	    }
 	++totalOut;
 	}
     }
 if (bedFH)
     carefulClose(&bedFH);
 
-long elapsedMs = clock1000() - startTime;
-timingMessage("countsOutput:", itemsOutput, "items output", elapsedMs, "items/sec", "seconds/item");
+timingMessage("countsOutput:", itemsOutput, "items output", startTime,
+    "items/sec", "seconds/item");
+
 }	//	static void countsOutput(struct crisprList *all, FILE *bedFH)
 
 static struct crisprList *scanSequence(char *inFile)
 /* scan the given file, return list of crisprs */
 {
 verbose(1, "#\tscanning sequence file: %s\n", inFile);
 dnaUtilOpen();
 struct dnaLoad *dl = dnaLoadOpen(inFile);
 struct dnaSeq *seq;
 struct crisprList *listReturn = NULL;
 
-long elapsedMs = 0;
-long scanStart = clock1000();
+long startTime = clock1000();
 long long totalCrisprs = 0;
 
 /* scanning all sequences, setting up crisprs on the listReturn */
 while ((seq = dnaLoadNext(dl)) != NULL)
     {
     if (startsWithNoCase("chrUn", seq->name) ||
          rStringIn("hap", seq->name) || rStringIn("_alt", seq->name) )
 	{
 	verbose(4, "# skip chrom: %s\n", seq->name);
 	continue;
 	}
 
     long startTime = clock1000();
     struct crisprList *oneList = generateKmers(seq);
     slAddHead(&listReturn, oneList);
     totalCrisprs += oneList->crisprCount;
-    elapsedMs = clock1000() - startTime;
     if (verboseLevel() > 3)
-	timingMessage(seq->name, oneList->crisprCount, "crisprs", elapsedMs, "crisprs/sec", "seconds/crispr");
+	timingMessage(seq->name, oneList->crisprCount, "crisprs", startTime,
+	    "crisprs/sec", "seconds/crispr");
     }
 
-elapsedMs = clock1000() - scanStart;
-timingMessage("scanSequence", totalCrisprs, "total crisprs", elapsedMs, "crisprs/sec", "seconds/crispr");
+timingMessage("scanSequence", totalCrisprs, "total crisprs", startTime,
+    "crisprs/sec", "seconds/crispr");
 
 return listReturn;
 }	/*	static crisprList *scanSequence(char *inFile)	*/
 
 static struct crisprList *rangeExtraction(struct crisprList **allReference)
 /* given ranges in global rangesHash, construct new list of crisprs that
  *     have any type of overlap with the ranges, also extract those items from
  *         the all list.  Returns new list.
  */
 {
 struct crisprList *all = *allReference;
 struct crisprList *listReturn = NULL;
-struct crisprList *list = NULL;
+struct crisprList *list;
 int inputChromCount = slCount(all);
 long long returnListCrisprCount = 0;
 long long examinedCrisprCount = 0;
 struct crisprList *prevChromList = NULL;
 
-long elapsedMs = 0;
-long scanStart = clock1000();
+long startTime = clock1000();
 
 struct crisprList *nextList = NULL;
 for (list = all; list; list = nextList)
     {
     nextList = list->next;	// remember before perhaps lost
     long long beforeCrisprCount = list->crisprCount;
     examinedCrisprCount += list->crisprCount;
-    struct crispr *c = NULL;
     struct binKeeper *bk = hashFindVal(rangesHash, list->chrom);
     struct crispr *newCrispr = NULL;
     if (bk != NULL)
         {
 	struct crispr *prevCrispr = NULL;
 	struct crispr *next = NULL;
+	struct crispr *c;
         for (c = list->chromCrisprs; c; c = next)
             {
             struct binElement *hitList = NULL;
 	    next = c->next;	// remember before perhaps lost
 	    // select any guide that is at least half contained in any range
 	    int midPoint = c->start + ((pamSize + guideSize) >> 1);
 //            if (negativeStrand & c->sequence)
 //                start += 2;
 //            int end = midPoint + 1;
             hitList = binKeeperFind(bk, midPoint, midPoint + 1);
             if (hitList)
 		{
 		if (prevCrispr)	// remove this one from the 'all' list
 		    prevCrispr->next = next;
                 else
@@ -642,37 +649,41 @@
 		prevChromList->next = nextList;
 	    else
                 all = nextList;	// removing the first one
 	    }
 	else
 	    {
 	    list->crisprCount = slCount(list->chromCrisprs);
             long long removedCrisprs = beforeCrisprCount - list->crisprCount;
 	    verbose(1, "# range scan chrom %s had %lld crisprs, removed %lld leaving %lld target\n",
 		list->chrom, beforeCrisprCount, removedCrisprs, list->crisprCount);
 	    }
 	}
     prevChromList = list;
     }	//	for (list = all; list; list = list->next)
 
-elapsedMs = clock1000() - scanStart;
 verbose(1, "# range scanning %d chroms, return %d selected chroms, leaving %d chroms\n",
     inputChromCount, slCount(listReturn), slCount(all));
+
 long long targetCrisprCount = examinedCrisprCount - returnListCrisprCount;
-timingMessage("range scan", examinedCrisprCount, "examined crisprs", elapsedMs, "crisprs/sec", "seconds/crispr");
-timingMessage("range scan", targetCrisprCount, "remaining target crisprs", elapsedMs, "crisprs/sec", "seconds/crispr");
-timingMessage("range scan", returnListCrisprCount, "returned query crisprs", elapsedMs, "crisprs/sec", "seconds/crispr");
+timingMessage("range scan", examinedCrisprCount, "examined crisprs",
+    startTime, "crisprs/sec", "seconds/crispr");
+timingMessage("range scan", targetCrisprCount, "remaining target crisprs",
+    startTime, "crisprs/sec", "seconds/crispr");
+timingMessage("range scan", returnListCrisprCount, "returned query crisprs",
+    startTime, "crisprs/sec", "seconds/crispr");
+
 if (NULL == all)
     {
     allReference = NULL;	// they have all been removed
     verbose(1, "# range scan, every single chrom has been removed\n");
     }
 else if (*allReference != all)
     {
     verbose(1, "# range scan, first chrom list has been moved from %p to %p\n", (void *)*allReference, (void *)all);
     *allReference = all;
     }
 return listReturn;
 }	//	static crisprList *rangeExtraction(crisprList *all)
 
 static float hitScoreM[20] =
 {
@@ -684,31 +695,31 @@
 
 static float calcHitScore(long long sequence1, long long sequence2)
 /* calcHitScore - from Max's crispor.py script and paper:
  *    https://www.ncbi.nlm.nih.gov/pubmed/27380939 */
 {
 float score1 = 1.0;
 int mmCount = 0;
 int lastMmPos = -1;
 /* the XOR determines differences in two sequences, the shift
  * right 6 removes the PAM sequence and the 'fortyBits &' eliminates
  * the negativeStrand bit
  */
 long long misMatch = fortyBits & ((sequence1 ^ sequence2) >> 6);
 int distCount = 0;
 int distSum = 0;
-int pos = 0;
+int pos;
 for (pos = 0; pos < guideSize; ++pos)
     {
     int diff = misMatch & 0x3;
     if (diff)
 	{
 	++mmCount;
         if (lastMmPos != -1)
 	    {
 	    distSum += pos-lastMmPos;
 	    ++distCount;
 	    }
 	score1 *= 1 - hitScoreM[pos];
 	lastMmPos = pos;
 	}
     misMatch >>= 2;
@@ -911,188 +922,205 @@
     ++i;
     twoBitMask >>= 2;
     }
 stringReturn[i] = 0;
 }	// static void misMatchString(char *returnString, long long misMatch)
 
 static void recordOffTargets(struct crisprList *query,
     struct crisprList *target, int bitsOn, long long qIndex,
 	long long tIndex, long long twoBitMisMatch)
 /* bitsOn is from 1 to 4, record this match when less than 1000 total */
 {
 float mitScore =
     calcHitScore(query->sequence[qIndex], target->sequence[tIndex]);
 float cfdScore =
     calcCfdScore(query->sequence[qIndex], target->sequence[tIndex]);
+/* note: interger cfdInt <- cfdScore float conversion  */
+int cfdInt = round(cfdScore * 1000);
 
 /* Note from Max's script:
  *	this is a heuristic based on the guideSeq data where alternative
  *	PAMs represent only ~10% of all cleaveage events.
  *	We divide the MIT score by 5 to make sure that these off-targets
  *	are not ranked among the top but still appear in the list somewhat
  */
 if ( (endsGG == (query->sequence[qIndex] & 0xf)) &&
 	(endsGG != (target->sequence[tIndex] & 0xf) ) )
     mitScore *= 0.2;
 
 query->mitSum[qIndex] += mitScore;
 
 if (query->offBy[0][qIndex] ) // no need to accumulate if 0 mismatch > 0
     return;
 
 if (offFile)
     {
     char queryString[33];	/* local storage for string */
     char targetString[33];	/* local storage for string */
     char queryPAM[33];	/* local storage for string */
     char targetPAM[33];	/* local storage for string */
     char misMatch[33];	/* ...*.....*.*... ascii string represent misMatch */
 
-    int i = 0;
-    int bitsOnSum = 0;
+    int i;
+    int offTargetCount = 0;
     for (i = 1; i < 5; ++i)
-        bitsOnSum += query->offBy[i][qIndex];
+        offTargetCount += query->offBy[i][qIndex];
 
-    if (bitsOnSum < 1000)	// could be command line option limit
+    if (offTargetCount < 1000)	// could be command line option limit
         {
         misMatchString(misMatch, twoBitMisMatch);
         kmerValToString(queryString, query->sequence[qIndex], pamSize);
         kmerValToString(targetString, target->sequence[tIndex], pamSize);
 	kmerPAMString(queryPAM, query->sequence[qIndex]);
 	kmerPAMString(targetPAM, target->sequence[tIndex]);
-        fprintf(offFile, "%s:%lld %c %s%s %s%s %s %d %.8f %.8f\t%s:%lld %c\n",
+        fprintf(offFile, "%s:%lld %c %s %s %s %s %s;%lld%c;%d %s %d %.8f %.8f %s:%lld %c\n",
 	    query->chrom, query->start[qIndex],
 		negativeStrand & query->sequence[qIndex] ? '-' : '+',
                 queryString, queryPAM, targetString, targetPAM,
+                target->chrom, target->start[tIndex],
+		negativeStrand & target->sequence[tIndex] ? '-' : '+', cfdInt,
 		misMatch, bitsOn, mitScore, cfdScore, target->chrom,
 		target->start[tIndex],
 		negativeStrand & target->sequence[tIndex] ? '-' : '+');
         }
     }
 }	//	static void recordOffTargets(struct crisprList *query,
 	//	    struct crisprList *target, int bitsOn, long long qIndex,
 	//		long long tIndex)
 
 
 /* this queryVsTarget can be used by threads, it should be safe,
  * remains to be proven.
  */
 static void queryVsTarget(struct crisprList *query, struct crisprList *target,
     int threadCount, int threadId)
 /* run the query crisprs list against the target list in the array structures */
 {
-struct crisprList *qList = NULL;
+struct crisprList *qList;
 long long totalCrisprsQuery = 0;
 long long totalCrisprsTarget = 0;
 long long totalCompares = 0;
 struct loopControl *control = NULL;
 AllocVar(control);
 
-long processStart = clock1000();
-long elapsedMs = 0;
+long startTime = clock1000();
 
 for (qList = query; qList; qList = qList->next)
     {
-    long long qCount = 0;
     totalCrisprsQuery += qList->crisprCount;
     if (threadCount > 1)
 	{
 	setLoopEnds(control, qList->crisprCount, threadCount, threadId);
 	if (control->listStart >= qList->crisprCount)
 	    continue;	/* next chrom, no part to be done for this thread */
 	}
     else
 	{
 	control->listStart = 0;
 	control->listEnd = qList->crisprCount;
 	}
     verbose(1, "# thread %d of %d running %s %lld items [ %lld : %lld )\n",
 	1 + threadId, threadCount, qList->chrom,
 	    control->listEnd - control->listStart, control->listStart,
 		control->listEnd);
     totalCrisprsTarget += control->listEnd - control->listStart;
-    for (qCount = control->listStart; qCount < control->listEnd; ++qCount)
-	{
-        struct crisprList *tList = NULL;
+    long long qStart = control->listStart;
+    long long qEnd = control->listEnd;
+    long long qCount;
+    for (qCount = control->listStart; qCount < control->listEnd; qCount = qEnd)
+	{
+	qStart = qCount;
+	qEnd = qStart + cacheCount;
+	if (cacheCount < 1)
+		qEnd = qStart + 1;
+	if (qEnd > control->listEnd)
+		qEnd = control->listEnd;
+        struct crisprList *tList;
         for (tList = target; tList; tList = tList->next)
             {
-            long long tCount = 0;
+            long long tCount;
             totalCompares += tList->crisprCount;
             for (tCount = 0; tCount < tList->crisprCount; ++tCount)
                 {
+		long long q;
+                for (q = qStart; q < qEnd; ++q)
+                    {
                     /* the XOR determine differences in two sequences, the
                      * shift right 6 removes the PAM sequence and
                      * the 'fortyBits &' eliminates the negativeStrand bit
                      */
                     long long misMatch = fortyBits &
-                    ((qList->sequence[qCount] ^ tList->sequence[tCount]) >> 6);
+                        ((qList->sequence[q] ^ tList->sequence[tCount]) >> 6);
                     if (misMatch)
                         {
                         /* possible misMatch bit values: 01 10 11
                          *  turn those three values into just: 01
                          */
                         misMatch = (misMatch | (misMatch >> 1)) & 0x5555555555;
                         int bitsOn = _mm_popcnt_u64(misMatch);
                         if (bitsOn < 5)
                             {
-			recordOffTargets(qList, tList, bitsOn, qCount, tCount, misMatch);
-			qList->offBy[bitsOn][qCount] += 1;
+                            recordOffTargets(qList, tList, bitsOn, q, tCount, misMatch);
+                            qList->offBy[bitsOn][q] += 1;
     //			tList->offBy[bitsOn][tCount] += 1; not needed
                             }
                         }
                     else
                         { 	/* no misMatch, identical crisprs */
-                    qList->offBy[0][qCount] += 1;
+                        qList->offBy[0][q] += 1;
     //                  tList->offBy[0][tCount] += 1;	not needed
                         }
+                    } // for (q = qStart; q < qEnd; ++q)
                 } // for (tCount = 0; tCount < tList->crisprCount; ++tCount)
             }	//	for (tList = target; tList; tList = tList->next)
 	}	//	for (qCount = 0; qCount < qList->crisprCount; ++qCount)
     }	//	for (qList = query; qList; qList = qList->next)
-verbose(1, "# done with scanning, check timing\n");
-elapsedMs = clock1000() - processStart;
-timingMessage("queryVsTarget", totalCrisprsQuery, "query crisprs processed", elapsedMs, "crisprs/sec", "seconds/crispr");
-timingMessage("queryVsTarget", totalCrisprsTarget, "vs target crisprs", elapsedMs, "crisprs/sec", "seconds/crispr");
-timingMessage("queryVsTarget", totalCompares, "total comparisons", elapsedMs, "compares/sec", "seconds/compare");
+
+timingMessage("queryVsTarget", totalCrisprsQuery, "query crisprs processed",
+    startTime, "crisprs/sec", "seconds/crispr");
+timingMessage("queryVsTarget", totalCrisprsTarget, "vs target crisprs",
+    startTime, "crisprs/sec", "seconds/crispr");
+timingMessage("queryVsTarget", totalCompares, "total comparisons",
+    startTime, "compares/sec", "seconds/compare");
+
 }	/* static struct crisprList *queryVsTarget(struct crisprList *query,
 	    struct crisprList *target) */
 
 static void queryVsSelf(struct crisprList *all)
 /* run this 'all' list vs. itself avoiding self to self comparisons */
 {
-struct crisprList *qList = NULL;
+struct crisprList *qList;
 long long totalCrisprsQuery = 0;
 long long totalCrisprsCompare = 0;
 
-long processStart = clock1000();
-long elapsedMs = 0;
+long startTime = clock1000();
 
 /* query runs through all chroms */
 for (qList = all; qList; qList = qList->next)
     {
-    long long qCount = 0;
+    long long qCount;
     totalCrisprsQuery += qList->crisprCount;
     verbose(1, "# queryVsSelf %lld query crisprs on chrom %s\n", qList->crisprCount, qList->chrom);
     for (qCount = 0; qCount < qList->crisprCount; ++qCount)
 	{
 	/* target starts on same chrom as query, and
 	   at next kmer after query for this first chrom */
         long long tStart = qCount+1;
-        struct crisprList *tList = NULL;
+        struct crisprList *tList;
         for (tList = qList; tList; tList = tList->next)
             {
-            long long tCount = tStart;
+            long long tCount;
 	    totalCrisprsCompare += tList->crisprCount - tStart;
             for (tCount = tStart; tCount < tList->crisprCount; ++tCount)
                 {
 		/* the XOR determine differences in two sequences, the
 		 * shift right 6 removes the PAM sequence and
 		 * the 'fortyBits &' eliminates the negativeStrand bit
 		 */
                 long long misMatch = fortyBits &
                     ((qList->sequence[qCount] ^ tList->sequence[tCount]) >> 6);
                 if (misMatch)
                     {
                     /* possible misMatch bit values: 01 10 11
                      *  turn those three values into just: 01
                      */
                     misMatch = (misMatch | (misMatch >> 1)) & 0x5555555555;
@@ -1102,47 +1130,50 @@
 			recordOffTargets(qList, tList, bitsOn, qCount, tCount, misMatch);
 			qList->offBy[bitsOn][qCount] += 1;
 			tList->offBy[bitsOn][tCount] += 1;
 			}
                     }
                 else
                     { 	/* no misMatch, identical crisprs */
                     qList->offBy[0][qCount] += 1;
                     tList->offBy[0][tCount] += 1;
                     }
                 } // for (tCount = 0; tCount < tList->crisprCount; ++tCount)
                 tStart = 0;	/* following chroms run through all */
             }	//	for (tList = target; tList; tList = tList->next)
 	}	//	for (qCount = 0; qCount < qList->crisprCount; ++qCount)
     }	//	for (qList = query; qList; qList = qList->next)
-elapsedMs = clock1000() - processStart;
-timingMessage("queryVsSelf", totalCrisprsQuery, "crisprs processed", elapsedMs, "crisprs/sec", "seconds/crispr");
-timingMessage("queryVsSelf", totalCrisprsCompare, "total comparisons", elapsedMs, "compares/sec", "seconds/compare");
+
+timingMessage("queryVsSelf", totalCrisprsQuery, "crisprs processed",
+    startTime, "crisprs/sec", "seconds/crispr");
+timingMessage("queryVsSelf", totalCrisprsCompare, "total comparisons",
+    startTime, "compares/sec", "seconds/compare");
+
 }	/* static void queryVsSelf(struct crisprList *all) */
 
 static struct crisprList *readKmers(char *fileIn)
 /* read in kmer list from 'fileIn', return list structure */
 {
 errAbort("# XXX readKmers function not implemented\n");
 verbose(1, "# reading crisprs from: %s\n", fileIn);
 struct crisprList *listReturn = NULL;
 struct lineFile *lf = lineFileOpen(fileIn, TRUE);
 char *row[10];
 int wordCount = 0;
 long long crisprsInput = 0;
 
-long startMs = clock1000();
+long startTime = clock1000();
 
 while (0 < (wordCount = lineFileChopNextTab(lf, row, ArraySize(row))) )
     {
     if (3 != wordCount)
 	errAbort("expecing three words at line %d in file %s, found: %d",
 	    lf->lineIx, fileIn, wordCount);
     struct crisprList *newItem = NULL;
     AllocVar(newItem);
     newItem->chrom = cloneString(row[0]);
     newItem->crisprCount = sqlLongLong(row[1]);
     newItem->size = sqlLongLong(row[2]);
     newItem->chromCrisprs = NULL;
     slAddHead(&listReturn, newItem);
     long long verifyCount = 0;
     while ( (verifyCount < newItem->crisprCount) &&  (0 < (wordCount = lineFileChopNextTab(lf, row, ArraySize(row)))) )
@@ -1153,89 +1184,90 @@
         ++verifyCount;
         struct crispr *oneCrispr = NULL;
         AllocVar(oneCrispr);
         oneCrispr->sequence = sqlLongLong(row[0]);
         oneCrispr->start = sqlLongLong(row[1]);
         slAddHead(&newItem->chromCrisprs, oneCrispr);
 	}
     if (verifyCount != newItem->crisprCount)
 	errAbort("expecting %lld kmer items at line %d in file %s, found: %lld",
 	    newItem->crisprCount, lf->lineIx, fileIn, verifyCount);
     crisprsInput += verifyCount;
     }
 
 lineFileClose(&lf);
 
-long elapsedMs = clock1000() - startMs;
-timingMessage("readKmers", crisprsInput, "crisprs read in", elapsedMs, "crisprs/sec", "seconds/crispr");
+timingMessage("readKmers", crisprsInput, "crisprs read in", startTime,
+    "crisprs/sec", "seconds/crispr");
 
 return listReturn;
 }	//	static struct crisprList *readKmers(char *fileIn)
 
 static void writeKmers(struct crisprList *all, char *fileOut)
 /* write kmer list 'all' to 'fileOut' */
 {
 errAbort("# XXX writeKmers function not implemented\n");
 FILE *fh = mustOpen(fileOut, "w");
-struct crisprList *list = NULL;
+struct crisprList *list;
 long long crisprsWritten = 0;
 char kmerString[33];
 char pamString[33];
 
-long startMs = clock1000();
+long startTime = clock1000();
 
 slReverse(&all);
 for (list = all; list; list = list->next)
     {
     fprintf(fh, "%s\t%lld\t%d\n", list->chrom, list->crisprCount, list->size);
-    struct crispr *c = NULL;
+    struct crispr *c;
     slReverse(&list->chromCrisprs);
     for (c = list->chromCrisprs; c; c = c->next)
 	{
         kmerValToString(kmerString, c->sequence, pamSize);
         kmerPAMString(pamString, c->sequence);
 	fprintf(fh, "%s\t%s\t%lld\t%c\n", kmerString, pamString,
 	    c->start, negativeStrand & c->sequence ? '-' : '+');
 	++crisprsWritten;
 	}
     }
 carefulClose(&fh);
 
-long elapsedMs = clock1000() - startMs;
-timingMessage("writeKmers", crisprsWritten, "crisprs written", elapsedMs, "crisprs/sec", "seconds/crispr");
+timingMessage("writeKmers", crisprsWritten, "crisprs written", startTime,
+    "crisprs/sec", "seconds/crispr");
+
 }	//	static void writeKmers(struct crisprList *all, char *fileOut)
 
 static void *threadFunction(void *id)
 /* thread entry */
 {
 struct threadControl *tId = (struct threadControl *)id;
 
 queryVsTarget(tId->query, tId->target, tId->threadCount, tId->threadId);
 
 return NULL;
 }
 
 static void runThreads(int threadCount, struct crisprList *query,
     struct crisprList *target)
 {
 struct threadControl *threadIds = NULL;
 AllocArray(threadIds, threadCount);
 
 pthread_t *threads = NULL;
 AllocArray(threads, threadCount);
 
-int pt = 0;
+int pt;
 for (pt = 0; pt < threadCount; ++pt)
     {
     struct threadControl *threadData;
     AllocVar(threadData);
     threadData->threadId = pt;
     threadData->threadCount = threadCount;
     threadData->query = query;
     threadData->target = target;
     threadIds[pt] = *threadData;
     int rc = pthread_create(&threads[pt], NULL, threadFunction, &threadIds[pt]);
     if (rc)
         {
         errAbort("Unexpected error %d from pthread_create(): %s",rc,strerror(rc));
         }
     }
@@ -1314,30 +1346,42 @@
 
 int main(int argc, char *argv[])
 /* Process command line, initialize translation arrays and call the process */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
 verbose(0, "# running verboseLevel: %d\n", verboseLevel());
 
 bedFileOut = optionVal("bed", bedFileOut);
 dumpKmers = optionVal("dumpKmers", dumpKmers);
 loadKmers = optionVal("loadKmers", loadKmers);
 offTargets = optionVal("offTargets", offTargets);
 memLimit = optionInt("memLimit", memLimit);
+if (memLimit < 0)
+    errAbort("specified memLimi is less than 0: %d\n", memLimit);
+if (memLimit > 0)
+    verbose(1, "# memLimit %d gB\n", memLimit);
+cacheCount = optionInt("cacheCount", cacheCount);
+if (cacheCount < 0)
+    errAbort("specified cacheCount is less than 0: %d\n", cacheCount);
+if (cacheCount > 0)
+    verbose(1, "# cacheCount %d items\n", cacheCount);
 ranges = optionVal("ranges", ranges);
 if (ranges)
     rangesHash = readRanges(ranges);
 
 FILE *bedFH = NULL;
 if (bedFileOut)
+    {
     bedFH = mustOpen(bedFileOut, "w");
+    verbose(1, "# output to bed file: %s\n", bedFileOut);
+    }
 
 initOrderedNtVal();	/* set up orderedNtVal[] */
 crisprKmers(argv[1], bedFH);
 
 if (verboseLevel() > 1)
     printVmPeak();
 return 0;
 }