f8b37f0a0b7ac9bbd5529692fa61b921a6a2b67f
hiram
  Fri Jul 7 23:57:27 2017 -0700
check in before removing all the garbage, appears to be working with arrays much faster refs #18969

diff --git src/utils/crisprKmers/crisprKmers.c src/utils/crisprKmers/crisprKmers.c
index a1e0304..6149968 100644
--- src/utils/crisprKmers/crisprKmers.c
+++ src/utils/crisprKmers/crisprKmers.c
@@ -1,29 +1,30 @@
 /* crisprKmers - find and annotate crispr sequences. */
 #include <popcntintrin.h>
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "dnaLoad.h"
 #include "portable.h"
 #include "basicBed.h"
 #include "sqlNum.h"
 #include "binRange.h"
 #include "obscure.h"
+#include "memalloc.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "crisprKmers - annotate crispr sequences\n"
   "usage:\n"
   "   crisprKmers <sequence>\n"
   "options:\n"
   "   where <sequence> is a .2bit file or .fa fasta sequence\n"
   "   -verbose=N - control processing steps with level of verbose:\n"
   "                default N=1 - only find crisprs, set up listings\n"
   "                N=2 - empty process crispr list into new list\n"
   "                N=3 - only count identicals during processing\n"
   "                N=4 - attempt measure all off targets and\n"
@@ -64,30 +65,34 @@
     int offBy0;		/* counting number of off targets, 0 mismatch */
     int offBy1;		/* counting number of off targets, 1 mismatch */
     int offBy2;		/* counting number of off targets, 2 mismatch */
     int offBy3;		/* counting number of off targets, 3 mismatch */
     int offBy4;		/* counting number of off targets, 4 mismatch */
     };
 
 struct crisprList
 /* all chromosome sets of crisprs */
     {
     struct crisprList *next;	/* Next in list. */
     char *chrom;		/* chrom name */
     int size;			/* chrom size */
     struct crispr *chromCrisprs;	/* all the crisprs on this chrom */
     long long crisprCount;	/* number of crisprs on this chrom */
+    long long *sequence;	/* array of the sequences */
+    long long *start;		/* array of the starts */
+    char *strand;		/* array of the strand characters */
+    int **offBy;		/* offBy[5][n] */
     };
 
 #define A_BASE	0
 #define C_BASE	1
 #define G_BASE	2
 #define T_BASE	3
 #define U_BASE	3
 static int orderedNtVal[256];	/* values in alpha order: ACGT 00 01 10 11 */
 				/* for easier sorting and complementing */
 static char bases[4];  /* for binary to ascii conversion */
 
 #define fortySixBits	0x3fffffffffff
 #define fortyEixhtBits	0xffffffffffff
 #define high32bits	0xffffffff00000000
 #define low32bits	0x00000000ffffffff
@@ -163,88 +168,169 @@
 /* Compare slPairs on start value */
 {
 const struct crispr *a = *((struct crispr **)va);
 const struct crispr *b = *((struct crispr **)vb);
 long long aVal = a->start;
 long long bVal = b->start;
 if (aVal < bVal)
     return -1;
 else if (aVal > bVal)
     return 1;
 else
     return 0;
 }
 #endif
 
+static char *kmerPAMStringArray(long long val)
+/* return the ASCII string for last three bases in then binary sequence value */
+{
+static char pamString[32];
+long long twoBitMask = 0x30;
+int shiftCount = 4;
+int baseCount = 0;
+
+while (twoBitMask)
+    {
+    int base = (val & twoBitMask) >> shiftCount;
+    pamString[baseCount++] = bases[base];
+    twoBitMask >>= 2;
+    shiftCount -= 2;
+    }
+pamString[baseCount] = 0;
+return pamString;
+}	//	static char *kmerValToStringArray(struct crispr *c, int trim)
+
+#ifdef NOT
 static char *kmerPAMString(struct crispr *c)
 /* return the ASCII string for last three bases in then binary sequence value */
 {
 long long val = c->sequence;
 static char pamString[32];
 long long twoBitMask = 0x30;
 int shiftCount = 4;
 int baseCount = 0;
 
 while (twoBitMask)
     {
     int base = (val & twoBitMask) >> shiftCount;
     pamString[baseCount++] = bases[base];
     twoBitMask >>= 2;
     shiftCount -= 2;
     }
 pamString[baseCount] = 0;
 return pamString;
-}	//	static char *kmerValToString(struct crispr *c, int trim)
+}	//	static char *kmerPAMString(struct crispr *c)
+#endif
 
+static char *kmerValToStringArray(long long val, int trim)
+/* return ASCII string for binary sequence value */
+{
+static char kmerString[32];
+long long twoBitMask = 0x300000000000;
+int shiftCount = 44;
+int baseCount = 0;
+
+while (twoBitMask && (shiftCount >= (2*trim)))
+    {
+    int base = (val & twoBitMask) >> shiftCount;
+    kmerString[baseCount++] = bases[base];
+    twoBitMask >>= 2;
+    shiftCount -= 2;
+    }
+kmerString[baseCount] = 0;
+return kmerString;
+}	//	static char *kmerValToStringArray(long long val, int trim)
+
+#ifdef NOT
 static char *kmerValToString(struct crispr *c, int trim)
 /* print out ASCII string for binary sequence value */
 {
 long long val = c->sequence;
 static char kmerString[32];
 long long twoBitMask = 0x300000000000;
 int shiftCount = 44;
 int baseCount = 0;
 
 while (twoBitMask && (shiftCount >= (2*trim)))
     {
     int base = (val & twoBitMask) >> shiftCount;
     kmerString[baseCount++] = bases[base];
     twoBitMask >>= 2;
     shiftCount -= 2;
     }
 kmerString[baseCount] = 0;
 return kmerString;
 }	//	static char *kmerValToString(struct crispr *c, int trim)
- 
+#endif
 static long long revComp(long long val)
 /* reverse complement the 2-bit numerical value kmer */
 {
 /* complement bases and add 18 0 bits
  *    because this divide and conquer works on 64 bits, not 46,
  *      hence the addition of the 18 zero bits which fall out
  * The 'val ^ fortySixBits' does the complement, the shifting and
  *     masking does the reversing.
  */
 register long long v = (val ^ fortySixBits) << 18;
 v = ((v & high32bits) >> 32) | ((v & low32bits) << 32);
 v = ((v & high16bits) >> 16) | ((v & low16bits) << 16);
 v = ((v & high8bits) >> 8) | ((v & low8bits) << 8);
 v = ((v & high4bits) >> 4) | ((v & low4bits) << 4);
 v = ((v & high2bits) >> 2) | ((v & low2bits) << 2);
 return v;
 }	//	static long long revComp(long long val)
 
+static void copyToArray(struct crisprList *list)
+/* copy the crispr list data into arrays */
+{
+verbose(1, "# copyToArray(%p)\n", (void *)list);
+struct crisprList *cl = NULL;
+for (cl = list; cl; cl = cl->next)
+    {
+    verbose(1, "# copy %p %lld crisprs for chrom %s\n", (void *)cl, cl->crisprCount, cl->chrom);
+    size_t memSize = cl->crisprCount * sizeof(long long);
+    cl->sequence = (long long *)needLargeMem(memSize);
+    cl->start = (long long *)needLargeMem(memSize);
+    memSize = cl->crisprCount * sizeof(char);
+    cl->strand = (char *)needLargeMem(memSize);
+    memSize = 5 * sizeof(int *);
+    cl->offBy = (int **)needLargeMem(memSize);
+    memSize = cl->crisprCount * sizeof(int);
+    int r = 0;
+    for (r = 0; r < 5; ++r)
+        cl->offBy[r] = (int *)needLargeZeroedMem(memSize);
+
+    long long i = 0;
+    struct crispr *c = NULL;
+    for (c = cl->chromCrisprs; c; c = c->next)
+        {
+    #ifdef NOT
+    if (2 > i) {
+    verbose(1, "# list: %#llx\n", (unsigned long long)cl);
+    verbose(1, "# list->sequence: %#llx\n", (unsigned long long)cl->sequence);
+    verbose(1, "# list->sequence[%lld]: %#llx\n", i, (unsigned long long)&cl->sequence[i]);
+    verbose(1, "# list->strand: %#llx\n", (unsigned long long)cl->strand);
+    verbose(1, "# list->strand[%lld]: %#llx\n", i, (unsigned long long)&cl->strand[i]);
+    }
+    #endif
+        cl->sequence[i] = c->sequence;
+        cl->start[i] = c->start;
+        cl->strand[i++] = c->strand;
+        }
+    }
+}	//	static void copyToArray(struct crisprList *list)	*/
+
 static struct crisprList *generateKmers(struct dnaSeq *seq)
 {
 struct crispr *crisprSet = NULL;
 struct crisprList *returnList = NULL;
 AllocVar(returnList);
 returnList->chrom = cloneString(seq->name);
 returnList->size = seq->size;
 int i;
 DNA *dna;
 long long chromPosition = 0;
 long long startGap = 0;
 long long gapCount = 0;
 int kmerLength = 0;
 long long kmerVal = 0;
 long long endsGG = (G_BASE << 2) | G_BASE;
@@ -297,30 +383,72 @@
             if (startGap != chromPosition)
                 verbose(4, "#GAP %s\t%lld\t%lld\t%lld\t%lld\t%s\n", seq->name, startGap, chromPosition, gapCount, chromPosition-startGap, "+");
             else
                 verbose(4, "#GAP %s\t%lld\t%lld\t%lld\t%lld\t%s\n", seq->name, startGap, chromPosition+1, gapCount, 1+chromPosition-startGap, "+");
             kmerLength = 0;  /* reset, start over */
             kmerVal = 0;
             }	// else if (val >= 0)
     ++chromPosition;
     }	// for (i=0; i < seq->size; ++i)
 // slReverse(&crisprSet);	// save time, order not important at this time
 returnList->chromCrisprs = crisprSet;
 returnList->crisprCount = slCount(crisprSet);
 return returnList;
 }	// static struct crisprList *generateKmers(struct dnaSeq *seq)
 
+static void showCountsArray(struct crisprList *all)
+/* everything has been scanned and counted, print out all the data from arrays*/
+{
+FILE *bedFH = NULL;
+if (bedFileOut)
+    bedFH = mustOpen(bedFileOut, "w");
+
+verbose(1, "#\tprint out all data\n");
+struct crisprList *list;
+long long totalOut = 0;
+verbose(1, "# %d number of chromosomes to display\n", slCount(all));
+for (list = all; list; list = list->next)
+    {
+    verbose(1, "# crisprs count %lld on chrom %s\n", list->crisprCount, list->chrom);
+    long long c = 0;
+    for (c = 0; c < list->crisprCount; ++c)
+        {
+	int negativeOffset = 0;
+        if (list->strand[c] == '-')
+	    negativeOffset = 3;
+	long long txStart = list->start[c] + negativeOffset;
+	long long txEnd = txStart + 20 + negativeOffset;
+        int totalOffs = list->offBy[0][c] + list->offBy[1][c] +
+	    list->offBy[2][c] + list->offBy[3][c] + list->offBy[4][c];
+
+        if (0 == totalOffs)
+verbose(1, "# PERFECT score %s:%lld %c\t%s\n", list->chrom, list->start[c], list->strand[c], kmerValToStringArray(list->sequence[c], 3));
+
+	if (bedFH)
+	    fprintf(bedFH, "%s\t%lld\t%lld\t%s\t%d\t%c\t%lld\t%lld\t%s\t%d\t%d\t%d\t%d\n", list->chrom, list->start[c], list->start[c]+23, kmerValToStringArray(list->sequence[c], 3), list->offBy[0][c], list->strand[c], txStart, txEnd, kmerPAMStringArray(list->sequence[c]), list->offBy[1][c], list->offBy[2][c], list->offBy[3][c], list->offBy[4][c]);
+
+        if (list->offBy[0][c])
+           verbose(3, "# array identical: %d %s:%lld %c\t%s\n", list->offBy[0][c], list->chrom, list->start[c], list->strand[c], kmerValToStringArray(list->sequence[c], 3));
+	++totalOut;
+	}
+    }
+verbose(1, "# total Array crisprs displayed: %lld\n", totalOut);
+if (bedFH)
+    carefulClose(&bedFH);
+}	// static void showCountsArray(struct crisprList *all)
+
+#ifdef NOT
 static void showCounts(struct crisprList *all)
 /* everything has been scanned and counted, print out all the data */
 {
 FILE *bedFH = NULL;
 if (bedFileOut)
     bedFH = mustOpen(bedFileOut, "w");
 
 verbose(1, "#\tprint out all data\n");
 struct crisprList *list;
 long long totalChecked = 0;
 verbose(1, "# %d number of chromosomes to display\n", slCount(all));
 for (list = all; list; list = list->next)
     {
     struct crispr *c = NULL;
     struct crispr *next = NULL;
@@ -333,31 +461,33 @@
 	long long txStart = c->start + negativeOffset;
 	long long txEnd = c->start+20 + negativeOffset;
 	if (bedFH)
 	    fprintf(bedFH, "%s\t%lld\t%lld\t%s\t%d\t%c\t%lld\t%lld\t%s\t%d\t%d\t%d\t%d\n", list->chrom, c->start, c->start+23, kmerValToString(c, 3), c->offBy0, c->strand, txStart, txEnd, kmerPAMString(c), c->offBy1, c->offBy2, c->offBy3, c->offBy4);
 
         if (c->offBy0)
            verbose(3, "# identical: %d %s:%lld %c\t%s\n", c->offBy0, list->chrom, c->start, c->strand, kmerValToString(c, 3));
 	next = c->next;
 	++totalChecked;
 	}
     }
 verbose(1, "# total crisprs displayed: %lld\n", totalChecked);
 if (bedFH)
     carefulClose(&bedFH);
 }	// static void showCounts(struct crisprList *all)
+#endif
 
+#ifdef NOT
 static long long countOffTargets(char *chrom, struct crispr *c, struct crisprList *all)
 /* given one crispr c, scan against all others to find off targets */
 {
 struct crisprList *listPtr;
 long long totalCompares = 0;
 for (listPtr = all; listPtr; listPtr = listPtr->next)
     {
     struct crispr *crisprPtr = NULL;
     struct crispr *next = NULL;
     for (crisprPtr = listPtr->chromCrisprs; crisprPtr; crisprPtr = next)
 	{
 	/* the XOR will determine differences in two sequences
 	 *  the shift right 6 removes the PAM sequence
 	 */
         long long misMatch = (c->sequence ^ crisprPtr->sequence) >> 6;
@@ -391,30 +521,31 @@
                         break;
                     case 4:
                         c->offBy4 += 1;
                         crisprPtr->offBy4 += 1;
                         break;
                     default: break;
                     }
                 }
 	    }
         ++totalCompares;
 	next = crisprPtr->next;
 	}
     }
 return totalCompares;
 }	// static void countOffTargets( . . . )
+#endif
 
 static struct crisprList *scanSequence(char *inFile)
 /* scan the given file, return list of crisprs */
 {
 verbose(1, "#\tscanning sequence file: %s\n", inFile);
 dnaUtilOpen();
 struct dnaLoad *dl = dnaLoadOpen(inFile);
 struct dnaSeq *seq;
 struct crisprList *listReturn = NULL;
 
 long elapsedMs = 0;
 long scanStart = clock1000();
 long long totalCrisprs = 0;
 
 /* scanning all sequences, setting up crisprs on the listReturn */
@@ -428,98 +559,257 @@
 	}
 
     long startTime = clock1000();
     struct crisprList *oneList = generateKmers(seq);
     slAddHead(&listReturn, oneList);
     totalCrisprs += oneList->crisprCount;
     elapsedMs = clock1000() - startTime;
     timingMessage(seq->name, oneList->crisprCount, "crisprs", elapsedMs, "crisprs/sec");
     }
 
 elapsedMs = clock1000() - scanStart;
 timingMessage("scanSequence", totalCrisprs, "total crisprs", elapsedMs, "crisprs/sec");
 return listReturn;
 }	/*	static crisprList *scanSequence(char *inFile)	*/
 
-static struct crisprList *rangeExtraction(struct crisprList *all)
+static struct crisprList *rangeExtraction(struct crisprList **allReference)
 /* given ranges in global rangesHash, construct new list of crisprs that
  *     have any type of overlap with the ranges, also extract those items from
  *         the all list.  Returns new list.
  */
 {
+struct crisprList *all = *allReference;
 struct crisprList *listReturn = NULL;
-struct crisprList *listPtr = NULL;
+struct crisprList *list = NULL;
 int inputChromCount = slCount(all);
 long long returnListCrisprCount = 0;
 long long examinedCrisprCount = 0;
+struct crisprList *prevChromList = NULL;
 
 long elapsedMs = 0;
 long scanStart = clock1000();
 
-for (listPtr = all; listPtr; listPtr = listPtr->next)
+struct crisprList *nextList = NULL;
+for (list = all; list; list = nextList)
     {
+    nextList = list->next;	// remember before perhaps lost
+verbose(1, "# range scan all %p, list %p, nextList %p\n",
+  (void *)all, (void *)list, (void *)nextList);
     struct crispr *c = NULL;
-    struct binKeeper *bk = hashFindVal(rangesHash, listPtr->chrom);
+    struct binKeeper *bk = hashFindVal(rangesHash, list->chrom);
     struct crispr *newCrispr = NULL;
     if (bk != NULL)
         {
 	struct crispr *prevCrispr = NULL;
 	struct crispr *next = NULL;
-        for (c = listPtr->chromCrisprs; c; c = next)
+        for (c = list->chromCrisprs; c; c = next)
             {
 	    ++examinedCrisprCount;
             struct binElement *hitList = NULL;
 	    next = c->next;	// remember before perhaps lost
             int start = c->start;
             if (c->strand == '-')
                 start += 2;
             int end = start + 20;
             hitList = binKeeperFind(bk, start, end);
             if (hitList)
 		{
 		++returnListCrisprCount;
 		if (prevCrispr)	// remove this one from the all list
 		    prevCrispr->next = next;
+                else
+                    list->chromCrisprs = next;	// removing the first one
 		c->next = NULL;
 		slAddHead(&newCrispr, c);	// constructing new list
-		prevCrispr = prevCrispr;
 		}
 	    else
 		prevCrispr = c;	// remains on all list
 	    slFreeList(&hitList);
             }
 	}
     if (newCrispr)
 	{
 	struct crisprList *newItem = NULL;
 	AllocVar(newItem);
-	newItem->chrom = cloneString(listPtr->chrom);
-	newItem->size = listPtr->size;
+	newItem->chrom = cloneString(list->chrom);
+	newItem->size = list->size;
 	newItem->chromCrisprs = newCrispr;
 	newItem->crisprCount = slCount(newCrispr);
 	slAddHead(&listReturn, newItem);
+verbose(1, "# range scan newCrispr: %lld crisprs on chrom %s\n", 
+    newItem->crisprCount, newItem->chrom);
+        if (NULL == list->chromCrisprs)	// all have been removed for this chrom
+	    {
+verbose(1, "# all crisprs on chrom %s have been removed, prevChromList: %p nextList %p\n", list->chrom, (void *)prevChromList, (void *)nextList);
+	    if (prevChromList)	// remove it from the chrom list
+		prevChromList->next = nextList;
+	    else
+{
+verbose(1, "# removing the first chromList, all %p becomes nextList %p\n", (void *)all, (void *)nextList);
+                all = nextList;	// removing the first one
+}
+	    }
+	else
+	    verbose(1, "# range scan same chrom list still has %lld crisprs on chrom %s\n",
+    (long long)slCount(list->chromCrisprs), list->chrom);
 	}
-    }	//	for (listPtr = all; listPtr; listPtr = listPtr->next)
+    prevChromList = list;
+    }	//	for (list = all; list; list = list->next)
 
 elapsedMs = clock1000() - scanStart;
 verbose(1, "# range scan, input %d chromosomes, return %d chromosomes\n",
     inputChromCount, slCount(listReturn));
 timingMessage("range scan", examinedCrisprCount, "examined crisprs", elapsedMs, "crisprs/sec");
 timingMessage("range scan", returnListCrisprCount, "returned crisprs", elapsedMs, "crisprs/sec");
+if (NULL == all)
+    {
+    allReference = NULL;	// they have all been removed
+verbose(1, "# range scan, every single chrom has been removed\n");
+    }
+else if (*allReference != all)
+    {
+verbose(1, "# range scan, first chrom list has been moved from %p to %p\n", (void *)*allReference, (void *)all);
+    *allReference = all;
+    }
 return listReturn;
 }	//	static crisprList *rangeExtraction(crisprList *all)
 
+static void queryVsAllArray(struct crisprList *query,
+    struct crisprList *target)
+/* run the query crisprs list against the target list in the array structures */
+{
+struct crisprList *qList = NULL;
+long long totalCrisprsQuery = 0;
+long long totalCompares = 0;
+
+long processStart = clock1000();
+long elapsedMs = 0;
+
+for (qList = query; qList; qList = qList->next)
+    {
+    long long qCount = 0;
+    totalCrisprsQuery += qList->crisprCount;
+verbose(1, "# queryVsAllArray %lld query crisprs on chrom %s\n", qList->crisprCount, qList->chrom);
+    for (qCount = 0; qCount < qList->crisprCount; ++qCount)
+	{
+        struct crisprList *tList = NULL;
+        for (tList = target; tList; tList = tList->next)
+            {
+            long long tCount = 0;
+	    totalCompares += tList->crisprCount;
+if (0 == qCount)
+    verbose(1, "# queryVsAllArray %lld target crisprs on chrom %s\n", tList->crisprCount, tList->chrom);
+            for (tCount = 0; tCount < tList->crisprCount; ++tCount)
+                {
+                /* the XOR will determine differences in two sequences
+                 *  the shift right 6 removes the PAM sequence
+                 */
+                long long misMatch =
+                    (qList->sequence[qCount] ^ tList->sequence[tCount]) >> 6;
+                if (misMatch)
+                    {
+                    /* possible misMatch bit values: 01 10 11
+                     *  turn those three values into just: 01
+                     */
+                    misMatch = (misMatch | (misMatch > 1)) & 0x5555555555;
+                    int bitsOn = _mm_popcnt_u64(misMatch);
+		    if (bitsOn < 5)
+			{
+			qList->offBy[bitsOn][qCount] += 1;
+			tList->offBy[bitsOn][tCount] += 1;
+			}
+                    }
+                else
+                    { 	/* no misMatch, identical crisprs */
+                    qList->offBy[0][qCount] += 1;
+                    tList->offBy[0][tCount] += 1;
+                    }
+                } // for (tCount = 0; tCount < tList->crisprCount; ++tCount)
+            }	//	for (tList = target; tList; tList = tList->next)
+	}	//	for (qCount = 0; qCount < qList->crisprCount; ++qCount)
+    }	//	for (qList = query; qList; qList = qList->next)
+verbose(1, "# done with scanning, check timing\n");
+elapsedMs = clock1000() - processStart;
+timingMessage("queryVsAllArray", totalCrisprsQuery, "crisprs processed", elapsedMs, "crisprs/sec");
+timingMessage("queryVsAllArray", totalCompares, "total comparisons", elapsedMs, "compares/sec");
+}	/* static struct crisprList *queryVsAllArray(struct crisprList *query,
+	    struct crisprList *target) */
+
+static void allVsAllArray(struct crisprList *all)
+/* run this 'all' list vs. itself with arrays */
+{
+struct crisprList *qList = NULL;
+long long totalCrisprsQuery = 0;
+long long totalCrisprsCompare = 0;
+
+long processStart = clock1000();
+long elapsedMs = 0;
+
+/* query runs through all chroms */
+for (qList = all; qList; qList = qList->next)
+    {
+    long long qCount = 0;
+    totalCrisprsQuery += qList->crisprCount;
+verbose(1, "# allVsAllArray %lld query crisprs on chrom %s\n", qList->crisprCount, qList->chrom);
+    /* query runs through all kmers on this chrom */
+    for (qCount = 0; qCount < qList->crisprCount; ++qCount)
+	{
+	/* target starts on same chrom as query, and
+	   at next kmer after query for this first chrom */
+        long long tStart = qCount+1;
+        struct crisprList *tList = NULL;
+        for (tList = qList; tList; tList = tList->next)
+            {
+            long long tCount = tStart;
+	    totalCrisprsCompare += tList->crisprCount - tStart;
+            for (tCount = tStart; tCount < tList->crisprCount; ++tCount)
+                {
+                /* the XOR will determine differences in two sequences
+                 *  the shift right 6 removes the PAM sequence
+                 */
+                long long misMatch =
+                    (qList->sequence[qCount] ^ tList->sequence[tCount]) >> 6;
+                if (misMatch)
+                    {
+                    /* possible misMatch bit values: 01 10 11
+                     *  turn those three values into just: 01
+                     */
+                    misMatch = (misMatch | (misMatch > 1)) & 0x5555555555;
+                    int bitsOn = _mm_popcnt_u64(misMatch);
+		    if (bitsOn < 5)
+			{
+			qList->offBy[bitsOn][qCount] += 1;
+			tList->offBy[bitsOn][tCount] += 1;
+			}
+                    }
+                else
+                    { 	/* no misMatch, identical crisprs */
+                    qList->offBy[0][qCount] += 1;
+                    tList->offBy[0][tCount] += 1;
+                    }
+                } // for (tCount = 0; tCount < tList->crisprCount; ++tCount)
+                tStart = 0;	/* following chroms run through all */
+            }	//	for (tList = target; tList; tList = tList->next)
+	}	//	for (qCount = 0; qCount < qList->crisprCount; ++qCount)
+    }	//	for (qList = query; qList; qList = qList->next)
+elapsedMs = clock1000() - processStart;
+timingMessage("allVsAllArray", totalCrisprsQuery, "crisprs processed", elapsedMs, "crisprs/sec");
+timingMessage("allVsAllArray", totalCrisprsCompare, "total comparisons", elapsedMs, "compares/sec");
+}	/* static struct crisprList *allVsAllArray(struct crisprList *query,
+	    struct crisprList *target) */
+
+#ifdef NOT
 static struct crisprList *queryVsAll(struct crisprList *query,
     struct crisprList *target)
 /* run the query crisprs list against the target list */
 {
 struct crisprList *listReturn = NULL;
 
 long long totalCrisprsQuery = 0;
 long long totalCompares = 0;
 int targetChrCount = slCount(target);
 int queryChrCount = slCount(query);
 verbose(1, "# queryVsAll: target %d chromosomes vs. query %d chromosomes\n",
     targetChrCount, queryChrCount);
 
 struct crisprList *listPtr = NULL;
 long processStart = clock1000();
@@ -537,31 +827,33 @@
         totalCompares += countOffTargets(listPtr->chrom, c, target);
         ++crisprsDone;
 	if (crisprsDone < 9)
 	    {
 	    elapsedMs = clock1000() - processStart;
 	    timingMessage("queryVsAll", totalCompares, "total comparisons", elapsedMs, "compares/sec");
 	    }
         }
     }	// for (listPtr = target; listPtr; listPtr = nextChr)
 
 elapsedMs = clock1000() - processStart;
 timingMessage("queryVsAll", totalCrisprsQuery, "crisprs processed", elapsedMs, "crisprs/sec");
 timingMessage("queryVsAll", totalCompares, "total comparisons", elapsedMs, "compares/sec");
 
 return listReturn;
-}
+}	/* static struct crisprList *queryVsAll(struct crisprList *query,
+	    struct crisprList *target) */
+
 
 static struct crisprList *allVsAll(struct crisprList *all,
     struct crisprList *addTo)
 /* run the all list against itself, add to 'addTo' if given */
 {
 struct crisprList *listReturn = NULL;
 if (addTo)
     listReturn = addTo;
 
 long long totalCrisprsIn = 0;
 long long totalCrisprsOut = 0;
 long long totalCompares = 0;
 int chrCount = slCount(all);
 verbose(1, "# allVsAll: crispr list, scanning %d chromosomes, now processing . . .\n", chrCount);
 struct crisprList *listPtr = NULL;
@@ -613,30 +905,31 @@
 if (totalCrisprsIn != totalCrisprsOut)
     verbose(1, "#\tERROR: initial crispr list count %lld != output count %lld\n", totalCrisprsIn, totalCrisprsOut);
 elapsedMs = clock1000() - processStart;
 perSecond = 0.0;
 if (elapsedMs > 0)
     perSecond = 1000.0 * totalCrisprsIn / elapsedMs;
 verbose(1, "# %lld total crisprs processed @ %ld ms -> %.2f crisprs/sec\n", totalCrisprsIn, elapsedMs, perSecond);
 perSecond = 0.0;
 if (elapsedMs > 0)
     perSecond = 1000.0 * totalCompares / elapsedMs;
 verbose(1, "# %lld total comparisons @ %ld ms -> %.2f crisprs/sec\n", totalCompares, elapsedMs, perSecond);
 
 return listReturn;
 }	//	static struct crisprList *allVsAll(struct crisprList *all,
 			// struct crisprList *addTo)
+#endif
 
 static struct crisprList *readKmers(char *fileIn)
 /* read in kmer list from 'fileIn', return list structure */
 {
 verbose(1, "# reading crisprs from: %s\n", fileIn);
 struct crisprList *listReturn = NULL;
 struct lineFile *lf = lineFileOpen(fileIn, TRUE);
 char *row[10];
 int wordCount = 0;
 long long crisprsInput = 0;
 
 long startMs = clock1000();
 
 while (0 < (wordCount = lineFileChopNextTab(lf, row, ArraySize(row))) )
     {
@@ -703,34 +996,33 @@
 	fprintf(fh, "%lld\t%lld\t%c\n", c->sequence,
 	    c->start, c->strand);
 	++crisprsWritten;
 	}
     }
 carefulClose(&fh);
 
 long elapsedMs = clock1000() - startMs;
 timingMessage("writeKmers", crisprsWritten, "crisprs written", elapsedMs, "crisprs/sec");
 }	//	static void writeKmers(struct crisprList *all, char *fileOut)
 
 static void crisprKmers(char *sequence)
 /* crisprKmers - find and annotate crispr sequences. */
 {
 struct crisprList *queryCrisprs = NULL;
-struct crisprList *countedCrisprs = NULL;
+// struct crisprList *countedCrisprs = NULL;
 struct crisprList *allCrisprs = NULL;
 
-
 if (loadKmers)
     allCrisprs = readKmers(loadKmers);
 else
     allCrisprs = scanSequence(sequence);
 
 #ifdef NOT
 // timing walking through the linked lists
 long startMs = clock1000();
 long long kmerCount = 0;
 struct crisprList *list = NULL;
 for (list = allCrisprs; list; list = list->next)
     kmerCount += slCount(list->chromCrisprs);
 long elapsedMs = clock1000() - startMs;
 timingMessage("slCount", kmerCount, "kmers counted", elapsedMs, "kmers/sec");
 startMs = clock1000();
@@ -743,48 +1035,82 @@
 return;
 #endif
 
 if (dumpKmers)
     {
     writeKmers(allCrisprs, dumpKmers);
     return;
     }
 
 /* processing those crisprs */
 if (verboseLevel() > 1)
     {
     /* if ranges have been specified, construct queryList of kmers to measure */
     if (rangesHash)
         {
-        queryCrisprs = rangeExtraction(allCrisprs);
-        /* first run up query vs. all, no list mucking about */
+	/* result here is two exclusive sets: query, and allCrisprs
+	 *    the query crisprs have been extracted from the allCrisprs */
+verbose(1, "# before rangeExtraction, allCrisprs: %p\n", (void *)allCrisprs);
+        queryCrisprs = rangeExtraction(& allCrisprs);
+verbose(1, "# after rangeExtraction, allCrisprs: %p\n", (void *)allCrisprs);
+verbose(1, "# copyToArray(queryCrisprs)\n");
+        copyToArray(queryCrisprs);
+	if (allCrisprs)	// if there are any left on the all list
+	    {
+verbose(1, "# copyToArray(allCrisprs)\n");
+	    copyToArray(allCrisprs);
+            /* first run up query vs. all */
+            queryVsAllArray(queryCrisprs, allCrisprs);
+	    }
+        /* then run up the query vs. itself avoiding self vs. self */
+        allVsAllArray(queryCrisprs);
+        showCountsArray(queryCrisprs);
+verbose(1, "# back from showCountsArray\n");
+#ifdef NOT
         countedCrisprs = queryVsAll(queryCrisprs, allCrisprs);
         /* then run up query vs. itself, add to countedCrisprs */
         countedCrisprs = allVsAll(queryCrisprs, countedCrisprs);
+#endif
+        }
+    else
+        {
+	copyToArray(allCrisprs);
+        /* run up all vs. all avoiding self vs. self */
+        allVsAllArray(allCrisprs);
+        showCountsArray(allCrisprs);
         }
+#ifdef NOT
     else
         countedCrisprs = allVsAll(allCrisprs, NULL);
+#endif
 
+#ifdef NOT
     showCounts(countedCrisprs);
+#endif
     }
+verbose(1, "# returning from crisprKmers\n");
 }	// static void crisprKmers(char *sequence)
 
 int main(int argc, char *argv[])
 /* Process command line, initialize translation arrays and call the process */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
+verbose(0, "# running verboseLevel: %d\n", verboseLevel());
+
 bedFileOut = optionVal("bed", bedFileOut);
 dumpKmers = optionVal("dumpKmers", dumpKmers);
 loadKmers = optionVal("loadKmers", loadKmers);
 ranges = optionVal("ranges", ranges);
 if (ranges)
     rangesHash = readRanges(ranges);
 
 initOrderedNtVal();	/* set up orderedNtVal[] */
 crisprKmers(argv[1]);
+verbose(1, "# returned from crisprKmers(%s}\n", argv[1]);
+verbose(1, "# checking heap after crisprKmers\n");
 if (verboseLevel() > 1)
     printVmPeak();
 return 0;
 }