src/shortReads/splat/splat.c 1.32

1.32 2010/04/01 17:31:11 markd
fixed GCC portability issues
Index: src/shortReads/splat/splat.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/shortReads/splat/splat.c,v
retrieving revision 1.31
retrieving revision 1.32
diff -b -B -U 1000000 -r1.31 -r1.32
--- src/shortReads/splat/splat.c	7 Nov 2008 07:06:40 -0000	1.31
+++ src/shortReads/splat/splat.c	1 Apr 2010 17:31:11 -0000	1.32
@@ -1,1150 +1,1150 @@
 /* splat - Speedy Local Alignment Tool.  For short reads - 25 bases and up.  */
 /* Copyright 2008 Jim Kent all rights reserved. */
 
 /* The algorithm is designed to map 25-mers accommodating up to two base substitutions,
  * or one substitution and a single base insertion or deletion. The idea of the algorithm
  * is based on the idea that there will be a very similar 12-mer, and a very similar
  * 6-mer in the query sequence.  First a "splix" indexed which is combined 12-mer and 6-mers
  * is constructed, and then the index is searched allowing a single base mismatch and a
  * single base shift between the 12-mers and 6-mers. 
  *
  * This all probably sounds a little vague.  To make it more concrete let's introduce some
  * notation.  Let the letters A-Y represent locations 1-25 of the 25-mer.  The 25-mer as a
  * whole is represented as ABCDEFGHIJKLMNOPQRSTUVWXY.  In our search we'll consider two
  * major cases - those where there is a near-perfect match on the left side (ABCDEFGHIJKL)
  * and where there is a near perfect match on the right side (NOPQRSTUVWXYZ).  
  *
  * Since we are only allowing only one indel, it must be either on the right or the left 
  * side. Thus the other side must be free from indels.  Since we allow only a single mismatch
  * if there is an indel, this indel-free side must differ by at most one base between the 
  * query and the genome. Thus if we index all 12-mers in the genome, and look up the side and
  * all possible all possible single base differences from that side in the index, we will be
  * guaranteed to be able to find the true mapping location (if any) in the index.  Since there
  * are 2 alternative bases at each of 12 positions in the side, plus the side itself, there are
  * 37 lookups we need to do for each side,  and a total of 74 for both sides of the query.
  *
  * Unfortunately in a 4 gigabase genome, approximately the size of the human genome, 
  * one would expect each 12-mer to occur 256 times. Since we're searching 74 times, this
  * means we expect 74 * 256 = 18944 spurious matches. We'd like to be a little more specific
  * than this.  This is where the 6-mers come in.  For now let's just consider the case
  * where the left side is the indel free near-perfect match.  In the splix index
  * there is a list of all six-mers that come after the 12-mer, and also a list of
  * all six-mers that come immemediately after that.  Thus in the case where there
  * is a perfect match to the query in the genome, there will be an index entry
  * corresponding to 
  *        ABCDEFGHIJKL MNOPQR STUVWX
  * where we call MNOPQR the 'after1' and STUVWX the 'after2.'
  *
  * In the no-indel case, we have up to two mismatches. If one of them is in the 12-mer,
  * then either after1 or after2 will be a perfect match.  If the 12-mer is perfect,
  * then we have two mismatches to distribute between after1 and after2.  If both mismatches
  * are in one of the afters, then the other will be perfect,  otherwise both afters will have
  * exactly one mismatch.  So, in the worst case, so long as we look for near-perfect matches
  * to both after1 and after2, we will be guaranteed to find the true mapping. 
  *
  * This will end up narrowing down our search a lot.  Each 6-mer lookup happens by chance just
  * 1 in 4096 times.  We will need to do 2 * (3 * 6 + 1) = 38 searches though to handle the
  * off-by-one.  This gets us down to approximately 176 spurious matches for every true match.
  *
  * The segmented nature of the index, and the fact that our query has 25 rather than 24
  * bases together make it possible to accommodate single base insertions in this scheme
  * relatively inexpensively. Considering just to consider the case where the indel is on
  * the right side,  it can either occur in the MNOPQR section of the STUVWX section.  If
  * it occurs in the latter section, there is no problem, because MNOPQR will match.  We only
  * need to consider then the case where the indel is in the 'third quadrant' that is in
  * MNOPQR.
  *
  * Let's represent the matching part of the genome with the same letters.  An insertion in 
  * the genome in this quadrant would thus yeild one of the following things in the splix index:
  *      ABCDEFGHIJKL +MNOPQ RSTUVW
  *      ABCDEFGHIJKL M+NOPQ RSTUVW
  *      ABCDEFGHIJKL MN+OPQ RSTUVW
  *      ABCDEFGHIJKL MNO+PQ RSTUVW
  *      ABCDEFGHIJKL MNOP+Q RSTUVW
  *      ABCDEFGHIJKL MNOPQ+ RSTUVW
  * Note in all cases the after2 is RSTUVW.  Fortunately this is a sequence that is found in
  * the query, just using bases 17-23  instead of the usual 18-24 for the after2.
  *
  * A deletion in the genome would be one of the following cases:
  *      ABCDEFGHIJKL NOPQRS TUVWXY
  *      ABCDEFGHIJKL MOPQRS TUVWXY
  *      ABCDEFGHIJKL MNPQRS TUVWXY
  *      ABCDEFGHIJKL MNOQRS TUVWXY
  *      ABCDEFGHIJKL MNOPRS TUVWXY
  *      ABCDEFGHIJKL MNOPQS TUVWXY
  * Note in all cases the after2 is TUVWXY.  Fortunately this is also a sequence that is found
  * in the query, just usig bases 19-25 instead of the usual 18-24 for the after2.
  *
  * Thus by doing a search on the after2 shifted one base left and right we pick up all single
  * base deletions. This involves 38 searches, and has the effect of bringing us up to an
  * expectation of 352 spurious matches.
  *
  * Doing 352 extensions per read is not that much, but still we want to do 100's of millions
  * of reads in a timely manner, so the strategy here is to do as little work per extension
  * as possible.  From the index search we already know the mismatches
  * in 18 of the query bases, know if there is an indel, and if there is an indel have
  * localized it to within about 6 bases.  So we only need to do the extension on the parts
  * that haven't been searched in the index.  The code to do this is a little long and
  * contains quite a few cases, but it contains no nested loops, and the single loops just run
  * about 6 times so it is quite fast.
  */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "localmem.h"
 #include "dnaseq.h"
 #include "dnaLoad.h"
 #include "splix.h"
 #include "intValTree.h"
 #include "fa.h"
 #include "fuzzyFind.h"
 #include "psl.h"
 #include "axt.h"
 #include "maf.h"
 #include "splat.h"
 
 static char const rcsid[] = "$Id$";
 
 char *version = "31";	/* Program version number. */
 
 /* Command line driven variables. */
 static char *over = NULL;
 static char *out = "splat";
 static int maxDivergence = 5;
 static boolean worseToo = FALSE;
 static int maxRepeat = 10;
 static char *repeatOutput = NULL;
 static int maxGap = 1;
 static int maxMismatch = 2;
 static boolean memoryMap = FALSE;
 static int tagSize;
 
 static boolean exactOnly;	/* Set to true if maxGap and maxMismatch are both 0. */
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
 "splat - SPeedy Local Alignment Tool. v%s. Designed to align small reads to large\n"
 "DNA databases such as genomes quickly.  Reads need to be at least 25 bases.\n"
 "usage:\n"
 "   splat target query output\n"
 "where:\n"
 "   target is a large indexed dna database in splix format.  Use splixMake to create this\n"
 "          file from fasta, 2bit or nib format\n"
 "   query is a fasta or fastq file full of sequences of at least 25 bases each\n"
 "   output is the output alignment file, by default in .splat format\n"
 "note: can use 'stdin' or 'stdout' as a file name for better piping\n"
 "overall options:\n"
 "   -out=format - Output format.  Options include splat (default), psl, maf, [soap, eland soon....]\n"
 "   -worseToo - if set return alignments other than the best alignments\n"
 "   -maxRepeat=N  - maximum number of alignments to output on one query sequence. Default %d\n"
 "   -over=fileName - Name of file with list of 25-mers that map many times in genome.\n"
 "       Helps speed in some cases.  Typically use file human10.over for human genome.\n"
 "   -repeatOutput=fileName.fa - Put reads that map more than maxRepeat times in here\n"
 "options effecting just first 25 bases\n"
 "   -maxGap=N. Maximum gap size. Default is %d. Set to 0 for no gaps\n"
 "   -maxMismatch=N. Maximum number of mismatches. Default %d. (A gap counts as a mismatch.)\n"
 "   -maxDivergence=N Maximum divergence level between read and genome to map.  Default %d\n"
 "                    Divergence combines gaps and mismatches.  Mismatch=2, gap=3\n"
 "   -mmap - Use memory mapping. Faster just a few reads, but much slower for many reads\n"
 , version, maxRepeat, maxGap, maxMismatch, maxDivergence
 );
 }
 
 static struct optionSpec options[] = {
    {"over", OPTION_STRING},
    {"out", OPTION_STRING},
    {"maxDivergence", OPTION_FLOAT},
    {"worseToo", OPTION_BOOLEAN},
    {"maxRepeat", OPTION_INT},
    {"repeatOutput", OPTION_STRING},
    {"maxGap", OPTION_INT},
    {"maxMismatch", OPTION_INT},
    {"mmap", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 static FILE *repeatOutputFile = NULL;
 int overArraySize;
 bits64 *overArray;
 
 long exactIndexQueries, hits12, hits18, hits25, hitsFull;
 
 static int splatTagCmp(const void *va, const void *vb)
 /* Sort tags based on position fields, then divergence. */
 {
 const struct splatTag *a = *((struct splatTag **)va);
 const struct splatTag *b = *((struct splatTag **)vb);
 int diff;
 
 if ((diff = a->t1 - b->t1) != 0) return diff;
 int aEnd = a->t2 + a->size2;
 int bEnd = b->t2 + b->size2;
 if ((diff = aEnd - bEnd) != 0) return diff;
 if ((diff = a->strand - b->strand) != 0) return diff;
 if ((diff = a->divergence - b->divergence) != 0) return diff;
 if ((diff = a->q1 - b->q1) != 0) return diff;
 aEnd = a->q2 + a->size2;
 bEnd = b->q2 + b->size2;
 return aEnd - bEnd;
 }
 
 void splatAlignFree(struct splatAlign **pAli)
 /* Free up a splatAlign. */
 {
 struct splatAlign *ali = *pAli;
 if (ali != NULL)
     {
     chainFree(&ali->chain);
     freez(pAli);
     }
 }
 
 void splatAlignFreeList(struct splatAlign **pList)
 /* Free a list of dynamically allocated splatAlign's */
 {
 struct splatAlign *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     splatAlignFree(&el);
     }
 *pList = NULL;
 }
 
 
 void splatAlignFreeList(struct splatAlign **pList);
 /* Free up a list of splatAligns. */
 
 
 static int splatAlignCmpScore(const void *va, const void *vb)
 /* Sort alignments based on score. */
 {
 const struct splatAlign *a = *((struct splatAlign **)va);
 const struct splatAlign *b = *((struct splatAlign **)vb);
 double diff = a->chain->score - b->chain->score;
 if (diff < 0)
     return -1;
 else if (diff > 0)
     return 1;
 else
     return 0;
 }
 
 
 #ifdef DEBUG
 static void dumpSixmer(int hexamer)
 /* Write out hexamer */
 {
 fputc(valToNt[(hexamer>>10)&3], stdout);
 fputc(valToNt[(hexamer>>8)&3], stdout);
 fputc(valToNt[(hexamer>>6)&3], stdout);
 fputc(valToNt[(hexamer>>4)&3], stdout);
 fputc(valToNt[(hexamer>>2)&3], stdout);
 fputc(valToNt[hexamer&3], stdout);
 }
 
 static void dumpTwelvemer(int twelvemer)
 {
 fputc(valToNt[(twelvemer>>22)&3], stdout);
 fputc(valToNt[(twelvemer>>20)&3], stdout);
 fputc(valToNt[(twelvemer>>18)&3], stdout);
 fputc(valToNt[(twelvemer>>16)&3], stdout);
 fputc(valToNt[(twelvemer>>14)&3], stdout);
 fputc(valToNt[(twelvemer>>12)&3], stdout);
 dumpSixmer(twelvemer);
 }
 #endif /* DEBUG */
 
 
 static void dnaToBinary(DNA *dna, int maxGap, int *pFirstHalf, int *pSecondHalf,
 	bits16 *pAfter1, bits16 *pAfter2, bits16 *pBefore1, bits16 *pBefore2,
 	bits16 *pFirstHex, bits16 *pLastHex,
 	bits16 *pTwoAfterFirstHex, bits16 *pTwoBeforeLastHex)
 /* Convert from DNA string to binary representation chopped up various ways. */
 {
 bits32 firstHalf
     = (ntValNoN[(int)dna[0]] << 22) + (ntValNoN[(int)dna[1]] << 20)
     + (ntValNoN[(int)dna[2]] << 18) + (ntValNoN[(int)dna[3]] << 16)
     + (ntValNoN[(int)dna[4]] << 14) + (ntValNoN[(int)dna[5]] << 12)
     + (ntValNoN[(int)dna[6]] << 10) + (ntValNoN[(int)dna[7]] << 8)
     + (ntValNoN[(int)dna[8]] << 6) + (ntValNoN[(int)dna[9]] << 4)
     + (ntValNoN[(int)dna[10]] << 2) + (ntValNoN[(int)dna[11]]);
 bits16 after1 
     = (ntValNoN[(int)dna[12]] << 10) + (ntValNoN[(int)dna[13]] << 8)
     + (ntValNoN[(int)dna[14]] << 6) + (ntValNoN[(int)dna[15]] << 4)
     + (ntValNoN[(int)dna[16]] << 2) + (ntValNoN[(int)dna[17]]);
 bits16 after2 
     = (ntValNoN[(int)dna[18]] << 10) + (ntValNoN[(int)dna[19]] << 8)
     + (ntValNoN[(int)dna[20]] << 6) + (ntValNoN[(int)dna[21]] << 4)
     + (ntValNoN[(int)dna[22]] << 2) + (ntValNoN[(int)dna[23]]);
 bits32 secondHalf;
 bits16 before1, before2;
 bits16 firstHex = firstHalf >> 12;
 bits16 lastHex;
 if (maxGap)
     {
     int base24 = ntValNoN[(int)dna[24]];
     lastHex = ((after2 << 2) + base24) & 0xFFF;
     secondHalf = (((((bits32)after1)<<14) + (after2<<2) + base24) & 0xFFFFFF);
     before1 = ((firstHalf>>10)&0xFFF);
     before2 = (((firstHalf<<2) + (ntValNoN[(int)dna[12]])) & 0xFFF);
     }
 else
     {
     lastHex = after2;
     secondHalf = (((bits32)after1)<<12) + after2;
     before1 = firstHex;
     before2 = firstHalf & 0xFFF;
     }
 
 *pFirstHalf = firstHalf;
 *pSecondHalf = secondHalf;
 *pAfter1 = after1;
 *pAfter2 = after2;
 *pBefore1 = before1;
 *pBefore2 = before2;
 *pFirstHex = firstHex;
 *pLastHex = lastHex;
 *pTwoAfterFirstHex = (firstHalf >> 8) & 0xFFF;
 *pTwoBeforeLastHex = (secondHalf >> 4) & 0xFFF;
 }
 
 #ifdef SOON
 static boolean chopOutPieces(bits64 bases25, int maxGap, int *pFirstHalf, int *pSecondHalf,
 	bits16 *pAfter1, bits16 *pAfter2, bits16 *pBefore1, bits16 *pBefore2,
 	bits16 *pFirstHex, bits16 *pLastHex,
 	bits16 *pTwoAfterFirstHex, bits16 *pTwoBeforeLastHex)
 /* Convert from DNA string to binary representation chopped up various ways. */
 {
 }
 #endif /* SOON */
 
 int hexCmp(const void *va, const void *vb)
 /* Compare two bit16. */
 {
 bits16 a = *((bits16*)va), b = *((bits16*)vb);
 return a-b;
 }
 
 int binaryFindHex(bits16 val, bits16 *pos, 
 	int posCount)
 /* Find index of hex that matches val, or -1 if none such. */
 {
 int startIx=0, endIx=posCount-1, midIx;
 bits16 posVal;
 
 for (;;)
     {
     if (startIx == endIx)
         {
 	posVal = pos[startIx];
 	if (posVal == val)
 	    return startIx;
 	else
 	    return -1;
 	}
     midIx = ((startIx + endIx)>>1);
     posVal = pos[midIx];
     if (posVal < val)
         startIx = midIx+1;
     else
         endIx = midIx;
     }
 }
 
 
 int linearFindHex(bits16 query, bits16 *hexes, int hexCount)
 /* See if query sequence is in hexes, which is sorted. */
 {
 int i;
 for (i=0; i<hexCount; ++i)
     if (hexes[i] == query)
 	return i;
 return -1;
 }
 
 static void exactIndexHits(bits16 hex, bits16 *sortedHexes, int slotSize, 
 	bits32 *offsets, int startOffset, int subCount, int gapSize, 
 	int missingQuad, struct lm *lm, struct splatHit **pHitList)
 /* Look for hits that involve no index mismatches.  Put resultint offsets (plus startOffset)
  * into hitTree. */
 {
 hits12 += slotSize;
 int ix = binaryFindHex(hex, sortedHexes, slotSize);
 if (ix >= 0)
     {
     for (;;)
 	{
 	int dnaOffset = offsets[ix] + startOffset;
 	struct splatHit *hit;
 	lmAllocVar(lm, hit);
 	hit->tOffset = dnaOffset;
 	hit->gapSize = gapSize;
 	hit->missingQuad = missingQuad;
 	hit->subCount = subCount;
 	slAddHead(pHitList, hit);
 	ix += 1;
 	++hits18;
 	if (sortedHexes[ix] != hex)
 	    break;
 	}
     }
 }
 
 static void searchExact(struct splix *splix, int twelvemer, bits16 sixmer, int whichSixmer,
 	int dnaStartOffset, int subCount, int gapSize, int missingQuad,
 	struct lm *lm, struct splatHit **pHitList)
 /* If there's an exact match put DNA offset of match into hitTree. */
 {
 exactIndexQueries += 1;
 int slotSize = splix->slotSizes[twelvemer];
 if (slotSize != 0)
     {
     /* Compute actual positions of the subindexes for the 1/4 reads in the slot. */
     char *slot = splix->slots[twelvemer];
     bits16 *hexesSixmer = (bits16*)(slot + whichSixmer*sizeof(bits16)*slotSize);
     bits32 *offsetsSixmer = (bits32*)((8+sizeof(bits32)*whichSixmer)*slotSize + slot);
     exactIndexHits(sixmer, hexesSixmer, slotSize, offsetsSixmer, dnaStartOffset,
     	subCount, gapSize, missingQuad, lm, pHitList);
     }
 }
 
 static void searchExact12Vary6(struct splix *splix, int twelvemer, bits16 sixmer, int whichSixmer,
 	int dnaStartOffset, int gapSize, int missingQuad, struct lm *lm,
 	struct splatHit **pHitList)
 /* Search for exact matches to twelvemer and off-by-ones to sixmer. */
 {
 bits16 oneOff = sixmer;
 /* The toggles here are xor'd to quickly cycle us through all 4 binary values for a base. */
 bits16 toggle1 = 1, toggle2 = 2;
 int baseIx;
 for (baseIx = 0; baseIx<6; ++baseIx)
     {
     oneOff ^= toggle1;
     searchExact(splix, twelvemer, oneOff, whichSixmer, dnaStartOffset,
     	1, gapSize, missingQuad, lm, pHitList);
     oneOff ^= toggle2;
     searchExact(splix, twelvemer, oneOff, whichSixmer, dnaStartOffset, 
     	1, gapSize, missingQuad, lm, pHitList);
     oneOff ^= toggle1;
     searchExact(splix, twelvemer, oneOff, whichSixmer, dnaStartOffset, 
     	1, gapSize, missingQuad, lm, pHitList);
     oneOff ^= toggle2;	/* Restore base to unmutated form. */
     toggle1 <<= 2;	/* Move on to next base. */
     toggle2 <<= 2;	/* Move on to next base. */
     }
 }
 
 static void searchVary12Exact6(struct splix *splix, int twelvemer, bits16 sixmer, int whichSixmer,
 	int dnaStartOffset, int gapSize, int missingQuad, 
 	struct lm *lm, struct splatHit **pHitList)
 /* Search for exact matches to twelvemer and off-by-ones to sixmer. */
 {
 bits32 oneOff = twelvemer;
 /* The toggles here are xor'd to quickly cycle us through all 4 binary values for a base. */
 bits32 toggle1 = 1, toggle2 = 2;
 int baseIx;
 for (baseIx = 0; baseIx<12; ++baseIx)
     {
     oneOff ^= toggle1;
     searchExact(splix, oneOff, sixmer, whichSixmer, dnaStartOffset, 
     	1, gapSize, missingQuad, lm, pHitList);
     oneOff ^= toggle2;
     searchExact(splix, oneOff, sixmer, whichSixmer, dnaStartOffset, 
     	1, gapSize, missingQuad, lm, pHitList);
     oneOff ^= toggle1;
     searchExact(splix, oneOff, sixmer, whichSixmer, dnaStartOffset, 
     	1, gapSize, missingQuad, lm, pHitList);
     oneOff ^= toggle2;	/* Restore base to unmutated form. */
     toggle1 <<= 2;	/* Move on to next base. */
     toggle2 <<= 2;	/* Move on to next base. */
     }
 }
 
 struct alignContext
 /* Context for an alignment. Necessary data for rbTree-traversing call-back function. 
  * Actually we no longer need this, but it does bundle together parameters shared
  * by a number of routines together. */
     {
     struct splix *splix;	/* Index. */
     struct splatTag **pTagList;	/* Pointer to tag list that gets filled in. */
     struct dnaSeq *qSeq;	/* DNA sequence. */
     char strand;		/* query strand. */
     int tagPosition;		/* Position of tag. */
     struct lm *lm;		/* Local memory pool where tags are allocated. */
     };
 
 int countDnaDiffs(DNA *a, DNA *b, int size)
 /* Count number of differing bases. */
 {
 int i;
 int count = 0;
 for (i=0; i<size; ++i)
    if (a[i] != b[i])
        ++count;
 return count;
 }
 
 static void addMatch(int diffCount, struct alignContext *c, 
 	int t1, int q1, int size1,
 	int t2, int q2, int size2)
 /* Output a match, which may be in two blocks. */
 {
 q1 += c->tagPosition;
 q2 += c->tagPosition;
 /* Normalize q2,t2 to make 'gap' zero size in single block case */
 if (size2 <= 0)
     {
     size2 = 0;
     q2 = q1 + size1;
     t2 = t1 + size1;
     }
 
 /* Calculate divergence - 2*bases_different + 2*(gap_count) + base_in_gaps. */
 int qGap = q1 + size1 - q2;
 int tGap = t1 + size1 - t2;
 int gap = max(qGap, tGap);
 int divergence = 2*diffCount + gap;
 if (gap)
     divergence += 2;
 
 
 /* Alloc and fill in tag. */
 struct splatTag *tag;
 lmAllocVar(c->lm, tag);
 tag->divergence  = divergence;
 tag->q1 = q1;
 tag->t1 = t1;
 tag->size1 = size1;
 tag->q2 = q2;
 tag->t2 = t2;
 tag->size2 = size2;
 tag->strand = c->strand;
 
 /* Add to list. */
 slAddHead(c->pTagList, tag);
 }
 
 static void slideInsert1(DNA *q, DNA *t, int gapSize, int solidSize, int mysterySize,
 	int *retBestGapPos, int *retLeastDiffs)
 /* Slide insert though all possible positions and return best position and number
  * of differences there. */
 {
 int gapPos = -1;
 int diffs = countDnaDiffs(q, t, solidSize);
 int bestGapPos = gapPos;
 int leastDiffs = diffs;
 for (gapPos = 0; gapPos < mysterySize; ++gapPos)
     {
     DNA qq = q[gapPos];
     DNA tt = t[gapPos];
     if (qq != tt)
        diffs -= 1;
     if (q[gapPos-gapSize] != tt)
        diffs += 1;
     if (diffs < leastDiffs)
 	{
 	leastDiffs = diffs;
 	bestGapPos = gapPos;
 	}
     }
 *retLeastDiffs = leastDiffs;
 *retBestGapPos = bestGapPos;
 }
 
 static void slideInsert2(DNA *q, DNA *t, int gapSize, int solidSize, int mysterySize,
 	int *retBestGapPos, int *retLeastDiffs)
 /* Slide insert though all possible positions and return best position and number
  * of differences there. Does this a _slightly_ different way than slideInsert1. 
  * Actually different in at least three ways, and to parameterize all three would
  * give it a lot of parameters.... */
 {
 int diffs = countDnaDiffs(q+gapSize, t, solidSize);
 int bestGapPos = -1;
 int leastDiffs = diffs;
 int gapPos;
 for (gapPos = 0; gapPos < mysterySize; ++gapPos)
     {
     DNA qq = q[gapPos];
     DNA tt = t[gapPos];
     if (qq != tt)
        diffs += 1;
     if (q[gapPos+gapSize] != tt)
        diffs -= 1;
     if (diffs < leastDiffs)
 	{
 	leastDiffs = diffs;
 	bestGapPos = gapPos;
 	}
     }
 *retLeastDiffs = leastDiffs;
 *retBestGapPos = bestGapPos;
 }
 
 static void slideDeletion2(DNA *q, DNA *t, int gapSize, int solidSize, int mysterySize,
 	int *retBestGapPos, int *retLeastDiffs)
 /* Slide deletion through all possible positions and return best position and number
  * of differences there. */
 {
 int diffs = countDnaDiffs(q, t+gapSize, solidSize);
 int bestGapPos = -1;
 int leastDiffs = diffs;
 int gapPos;
 for (gapPos = 0; gapPos < mysterySize; ++gapPos)
     {
     DNA qq = q[gapPos];
     DNA tt = t[gapPos];
     if (qq != tt)
        diffs += 1;
     if (qq != t[gapPos+gapSize])
        diffs -= 1;
     if (diffs < leastDiffs)
 	{
 	leastDiffs = diffs;
 	bestGapPos = gapPos;
 	}
     }
 *retLeastDiffs = leastDiffs;
 *retBestGapPos = bestGapPos;
 }
 
 static void extendMaybeOutputNoIndexIndels(struct splatHit *hit, struct alignContext *c)
 /* Handle case where the 12-mer and the 6-mer in the index both hit without any
  * indication of an indel.  In the case where the missing quadrant is the first or
  * last though there could still be an indel in one of these quadrands. */
 {
 int tOffset = hit->tOffset;
 int origDiffCount = hit->subCount;
 int diffCount = origDiffCount;
 int quad = hit->missingQuad;
 struct dnaSeq *qSeq = c->qSeq;
 DNA *qDna = qSeq->dna + c->tagPosition;
 DNA *tDna = c->splix->allDna + tOffset;
 switch (hit->missingQuad)
     {
     /* The ?'s in the comments below indicate areas that have not been explored in the index.
      * The _'s indicate areas that have been explored in the index. 
      * The singleton ? may actually represent 0 up to maxGap bases. */
     case 0: /* Explore ? ?????? ______ ______ ______ */
        diffCount += countDnaDiffs(qDna, tDna, 6+maxGap);
        break;
     case 1: /* Explore ? ______ ?????? ______ ______ */
        diffCount += countDnaDiffs(qDna, tDna, maxGap);
        diffCount += countDnaDiffs(qDna + 6 + maxGap, tDna + 6 + maxGap, 6);
        break;
     case 2: /* Explore ______ ______ ?????? ______ ? */
        diffCount += countDnaDiffs(qDna+12, tDna+12, 6);
        diffCount += countDnaDiffs(qDna+24, tDna+24, maxGap);
        break;
     case 3: /* Explore ______ ______ ______ ?????? ? */
        diffCount += countDnaDiffs(qDna+18, tDna+18, 6+maxGap);
        break;
     }
 if (qSeq->size > 24 && maxGap == 0)	/* Make 25th base significant in all cases. */
     {
     if (c->strand == '-')
 	{
 	if (qDna[-1] != tDna[-1])
 	   ++diffCount;
 	}
     else
 	{
 	if (qDna[24] != tDna[24])
 	   ++diffCount;
 	}
     }
 if (diffCount <= maxMismatch)
     addMatch(diffCount, c, tOffset, 0, tagSize, 0, 0, 0);
 else if (maxGap > 0 && origDiffCount < maxMismatch) 
     {
     /* Need to explore possibilities of indels if we are on the end quadrants */
     int gapSize = 1; /* May need more work here for indels bigger than a single base. */
     int bestGapPosInsert, leastDiffsInsert;
     int bestGapPosDelete, leastDiffsDelete;
     if (quad == 0)
         {
 	/* Figure out insertion best score and position. */
 	slideInsert1(qDna+gapSize, tDna+gapSize, gapSize, 6, 7, 
 		&bestGapPosInsert, &leastDiffsInsert);
 
 	/* Figure out deletion best score and position. */
 	slideDeletion2(qDna, tDna-gapSize, gapSize, 7, 8, &bestGapPosDelete, &leastDiffsDelete);
 
 	/* Output better scoring of insert or delete - delete on tie since it has one
 	 * more matching base. */
 	if (leastDiffsInsert < leastDiffsDelete)
 	    {
 	    if (leastDiffsInsert < maxMismatch)
 		{
 		int b1Size = bestGapPosInsert + 1;
 		addMatch(leastDiffsInsert+origDiffCount, c, tOffset+gapSize, 0, b1Size, 
 		    tOffset+gapSize+b1Size, b1Size+gapSize, tagSize - b1Size - gapSize);
 		}
 	    }
 	else
 	    {
 	    if (leastDiffsDelete < maxMismatch)
 		{
 		int b1Size = bestGapPosDelete + 1;
 		addMatch(leastDiffsDelete+origDiffCount, c, tOffset-gapSize, 0, b1Size, 
 		    tOffset+b1Size, b1Size, tagSize - b1Size);
 		}
 	    }
 	}
     else if (quad == 3)
         {
 	/* Figure out insertion best score and position. */
 	int startOffset = 18;
 	slideInsert2(qDna+startOffset, tDna+startOffset, gapSize, 
 		6, 7, &bestGapPosInsert, &leastDiffsInsert);
 
 
 	/* Figure out deletion best score and position. */
 	slideDeletion2(qDna+startOffset, tDna+startOffset, gapSize, 
 		7, 8, &bestGapPosDelete, &leastDiffsDelete);
 
 	/* Output better scoring of insert or delete - delete on tie since it has one
 	 * more matching base. */
 	if (leastDiffsInsert < leastDiffsDelete)
 	    {
 	    if (leastDiffsInsert < maxMismatch)
 		{
 		int b1Size = bestGapPosInsert + 1 + startOffset;
 		addMatch(leastDiffsInsert+origDiffCount, c, tOffset, 0, b1Size, 
 		    tOffset+b1Size, b1Size+gapSize, tagSize - b1Size - gapSize);
 		}
 	    }
 	else
 	    {
 	    if (leastDiffsDelete < maxMismatch)
 		{
 		int b1Size = bestGapPosDelete + 1 + startOffset;
 		addMatch(leastDiffsDelete+origDiffCount, c, tOffset, 0, b1Size, 
 		    tOffset+b1Size+gapSize, b1Size, tagSize - b1Size);
 		}
 	    }
 	}
     }
 }
 
 static void extendMaybeOutputInsert(struct splatHit *hit, struct alignContext *c)
 /* Handle case where there is a base inserted in the query relative to the target. */
 {
 int tOffset = hit->tOffset;
 int mysteryOffset = 0;    /* Offset of unexplored region in query. */
 int mysterySize = 6;	/* May need to adjust when have >1 size gaps, not sure. */
 int gapSize = hit->gapSize;
 switch (hit->missingQuad)
     {
     case 1: /* Explore ______ ?????? ? ______ ______ */
         mysteryOffset = 6;
 	break;
     case 2: /* Explore ______ ______ ? ?????? ------ */
 	mysteryOffset = 12;
 	break;
     default:
         internalErr();
         break;
     }
 int bestGapPos, leastDiffs;
 slideInsert2(c->qSeq->dna + c->tagPosition + mysteryOffset, 
 	     c->splix->allDna + tOffset + mysteryOffset, 
 	     gapSize, mysterySize, mysterySize, 
 	     &bestGapPos, &leastDiffs);
 leastDiffs += hit->subCount + 1;
 if (leastDiffs <= maxMismatch)
     {
     int b1Size = bestGapPos + mysteryOffset + 1;
     addMatch(leastDiffs, c, tOffset, 0, b1Size, 
     	tOffset+b1Size, b1Size+gapSize, tagSize - b1Size - gapSize);
     }
 }
 
 
 static void extendMaybeOutputDeletion(struct splatHit *hit, struct alignContext *c)
 /* Handle case where there is a base deleted in the query relative to the target. */
 {
 int tOffset = hit->tOffset;
 int gapSize = -hit->gapSize;
 int bigMysteryOffset = 0, qSmallMysteryOffset = 0, tSmallMysteryOffset = 0;
 int bigMysterySize = 6, smallMysterySize = gapSize;
 int diffCount = hit->subCount;
 
 switch (hit->missingQuad)
     {
     case 1: 
     	/* Explore Q  ?? ______ ????? ______ ______ */
 	/* Explore T  ?? ______ ?????? ______ ______ */
 	bigMysteryOffset = 8;
 	break;
     case 2: 
     	/* Explore Q ______ ______ ????? ------ ?? */
 	/* Explore T ______ ______ ?????? ------ ?? */
 	bigMysteryOffset = 12;
 	qSmallMysteryOffset = 23;
 	tSmallMysteryOffset = 24;
 	break;
     default:
         internalErr();
         break;
     }
 DNA *q = c->qSeq->dna + c->tagPosition;
 DNA *t = c->splix->allDna + tOffset;
 diffCount += countDnaDiffs(q + qSmallMysteryOffset, t + tSmallMysteryOffset, smallMysterySize);
 if (diffCount < maxMismatch)
     {
     int bestGapPos, leastDiffs;
     slideDeletion2(q + bigMysteryOffset, t+bigMysteryOffset, 
 		 gapSize, bigMysterySize-gapSize, bigMysterySize, 
 		 &bestGapPos, &leastDiffs);
     leastDiffs += diffCount + 1;
 
     if (leastDiffs <= maxMismatch)
 	{
 	int b1Size = bestGapPos + bigMysteryOffset + 1;
 	addMatch(leastDiffs, c, tOffset, 0, b1Size, 
 	    tOffset+b1Size+gapSize, b1Size, tagSize - b1Size);
 	}
     }
 }
 
 static void extendMaybeOutput(struct splatHit *hit, struct alignContext *c)
 /* Callback function for tree traverser that extends alignments and if good, outputs them. */
 {
 verbose(3, "   tOffset=%d gapSize=%d subCount=%d missingQuad=%d\n", 
 	hit->tOffset, hit->gapSize, hit->subCount, hit->missingQuad);
 if (hit->gapSize == 0)
     {
     extendMaybeOutputNoIndexIndels(hit, c);
     }
 else if (hit->gapSize > 0)
     {
     extendMaybeOutputInsert(hit, c);
     }
 else
     {
     extendMaybeOutputDeletion(hit, c);
     }
 }
 
 void splatHitsOneStrand(struct dnaSeq *qSeq, bits64 bases25, 
 	char strand, int tagPosition, struct splix *splix, int maxGap,
 	struct lm *lm, struct splatHit **pHitList)
 /* Look through index for hits to query tag on one strand. 
  * Input:
  *   qSeq - entire query sequence, reverse complimented if on - strand
  *   strand - either + or -
  *   tagPosition - start position within query to pass against index.
  *                 splixMinQuerySize + maxGap long.
  *   splix - the index to search
  *   maxGap - the maximum insertion or deletion size
  * A list of splat hits allocated on lm. */
 {
 /* Convert DNA to binary format */
 int firstHalf, secondHalf;
 bits16 after1, after2, before1, before2;
 bits16 firstHex, lastHex, twoAfterFirstHex, twoBeforeLastHex;
 
 dnaToBinary(qSeq->dna + tagPosition, maxGap,
 	&firstHalf, &secondHalf, &after1, &after2, &before1, &before2,
 	&firstHex, &lastHex, &twoAfterFirstHex, &twoBeforeLastHex);
 
 int secondHalfPos = -12 - maxGap;
 searchExact(splix, firstHalf, after1, 2, 0, 0, 0, 3, lm, pHitList);
 if (!exactOnly)
     {
     searchExact(splix, firstHalf, after2, 3, 0, 0, 0, 2, lm, pHitList);
     searchExact(splix, secondHalf, before1, 0, secondHalfPos, 0, 0, 1, lm, pHitList);
     searchExact(splix, secondHalf, before2, 1, secondHalfPos, 0, 0, 0, lm, pHitList);
     }
 if (maxMismatch > 1)
     {
     searchVary12Exact6(splix, firstHalf, after1, 2, 0, 0, 3, lm, pHitList);
     searchVary12Exact6(splix, firstHalf, after2, 3, 0, 0, 2, lm, pHitList);
     searchVary12Exact6(splix, secondHalf, before1, 0, secondHalfPos, 0, 1, lm, pHitList);
     searchVary12Exact6(splix, secondHalf, before2, 1, secondHalfPos, 0, 0, lm, pHitList);
     searchExact12Vary6(splix, firstHalf, after1, 2, 0, 0, 3, lm, pHitList);
     searchExact12Vary6(splix, firstHalf, after2, 3, 0, 0, 2, lm, pHitList);
     searchExact12Vary6(splix, secondHalf, before1, 0, secondHalfPos, 0, 1, lm, pHitList);
     searchExact12Vary6(splix, secondHalf, before2, 1, secondHalfPos, 0, 0, lm, pHitList);
     }
 
 /* Look at single base indels with few mismatches */
 if (maxGap > 0)
     {
     searchExact(splix, secondHalf, twoAfterFirstHex, 0, secondHalfPos-maxGap, 0, -1, 1, lm, pHitList);
     searchExact(splix, secondHalf, firstHex, 0, secondHalfPos+maxGap, 0, 1, 1, lm, pHitList);
     searchExact(splix, firstHalf, twoBeforeLastHex, 3, 0, 0, -1, 2, lm, pHitList);
     searchExact(splix, firstHalf, lastHex, 3, 0, 0, 1, 2, lm, pHitList);
     if (maxMismatch > 1)
 	{
 	searchExact12Vary6(splix, secondHalf, twoAfterFirstHex, 0, secondHalfPos-maxGap, -1, 1, lm, pHitList);
 	searchExact12Vary6(splix, secondHalf, firstHex, 0, secondHalfPos+maxGap, 1, 1, lm, pHitList);
 	searchExact12Vary6(splix, firstHalf, twoBeforeLastHex, 3, 0, -1, 2, lm, pHitList);
 	searchExact12Vary6(splix, firstHalf, lastHex, 3, 0, 1, 2, lm, pHitList);
 	searchVary12Exact6(splix, secondHalf, twoAfterFirstHex, 0, secondHalfPos-maxGap, -1, 1, lm, pHitList);
 	searchVary12Exact6(splix, secondHalf, firstHex, 0, secondHalfPos+maxGap, 1, 1, lm, pHitList);
 	searchVary12Exact6(splix, firstHalf, twoBeforeLastHex, 3, 0, -1, 2, lm, pHitList);
 	searchVary12Exact6(splix, firstHalf, lastHex, 3, 0, 1, 2, lm, pHitList);
 	}
     }
 }
 
 void extendHitsToTags(struct splatHit *hitList, struct dnaSeq *qSeq, char strand, 
 	int tagPosition, struct splix *splix, struct lm *lm, struct splatTag **pTagList)
 /* Examine each of the hits in the hitTree, and add ones that are high scoring enough
  * after extension to the tagList. If bestOnly is set it may free existing tags on list if
  * it finds a higher scoring tag. */
 {
 struct splatHit *hit;
 struct alignContext ac;
 ZeroVar(&ac);
 ac.splix = splix;
 ac.pTagList = pTagList;
 ac.qSeq = qSeq;
 ac.strand = strand;
 ac.tagPosition = tagPosition;
 ac.lm = lm;
 for (hit = hitList; hit != NULL; hit = hit->next)
     extendMaybeOutput(hit, &ac);
 }
 
 static struct splatTag *removeDupeTags(struct splatTag *tagList)
 /* Return list with duplicate tags removed.  As a side effect sort list. */
 {
 struct splatTag *tag, *next, *newList = NULL;
 
 slSort(&tagList, splatTagCmp);
 for (tag = tagList; tag != NULL; tag = next)
     {
     next = tag->next;
     if (next == NULL || splatTagCmp(&tag, &next) != 0)
 	slAddHead(&newList, tag);
     }
 slReverse(&newList);
 return newList;
 }
 
 static int findLeastDivergence(struct splatTag *tagList)
 {
 struct splatTag *tag;
 int leastDivergence = tagList->divergence;
 for (tag = tagList->next; tag != NULL; tag = tag->next)
     {
     if (tag->divergence < leastDivergence)
 	leastDivergence = tag->divergence;
     }
 return leastDivergence;
 }
 
 static struct splatTag *splatTagFilterOnDivergence(struct splatTag *tagList, int maxDivergence)
 /* Remove (and free) tags with more than maxDivergence from list. */
 {
 struct splatTag *tag, *next, *newList = NULL;
 for (tag = tagList; tag != NULL; tag = next)
     {
     next = tag->next;
     if (tag->divergence <= maxDivergence)
         slAddHead(&newList, tag);
     }
 slReverse(&newList);
 return newList;
 }
 
 static void splatOneStrand(struct dnaSeq *qSeq, bits64 bases25, char strand,
 	int tagPosition, struct splix *splix, int maxGap, 
 	struct lm *lm, struct splatTag **pTagList)
 /* Align one query strand against index, filter, and write out results */
 {
 struct splatHit *hitList = NULL;
 splatHitsOneStrand(qSeq, bases25, strand, tagPosition, splix, maxGap, lm, &hitList);
 struct splatTag *tagList = NULL;
 int hitCount = slCount(hitList);
 verbose(2, " %d hits on %c strand\n", hitCount, strand);
 if (hitCount > 0)
     {
     extendHitsToTags(hitList, qSeq, strand, tagPosition, splix, lm, &tagList);
 
     /* Since this is faster than removing dupes, and will often trim list a lot, do
      * it now, even though we have to do it again when we have data on both strands. */
     if (tagList != NULL && !worseToo)
 	tagList = splatTagFilterOnDivergence(tagList, findLeastDivergence(tagList) );
 
     /* Some duplicate tags may have come through.  This also will filter the tags 
      * by chromosome position. */
     tagList = removeDupeTags(tagList);
     *pTagList = slCat(tagList, *pTagList);
     }
 }
 
 static void splatOne(struct dnaSeq *qSeqF, struct splix *splix, int maxGap, 
 	struct axtScoreScheme *scoreScheme, FILE *f, int *retMapCount, boolean *retIsRepeat)
 /* Align one query sequence against index, filter, and write out results.
  * Returns the number of mappings. */
 {
 /* Local memory pool for hits and tags for this sequence. */
 struct lm *lm = lmInit(1024*4);
 
 /* Make reverse complement of query too. */
 struct dnaSeq *qSeqR = cloneDnaSeq(qSeqF);
 reverseComplement(qSeqR->dna, qSeqR->size);
 
 struct splatTag *tagList = NULL;
 int size = qSeqF->size;
 int desiredSize = tagSize;
 int outputCount = 0;
 boolean isRepeat = FALSE;
 if (size < desiredSize)
     {
     warn("%s is %d bases, minimum splat query size %d, skipping", 
     	qSeqF->name, tagSize, qSeqF->size);
     return;
     }
 bits64 bases25f = basesToBits64(qSeqF->dna, 25);
 boolean isRepeatingOver = overCheck(bases25f, overArraySize, overArray);
 if (!isRepeatingOver)
     {
     splatOneStrand(qSeqF, bases25f, '+', 0, splix, maxGap, lm, &tagList);
     int tagPosition = qSeqR->size - desiredSize;
     bits64 bases25r = basesToBits64(qSeqR->dna + tagPosition, 25);
     isRepeatingOver = overCheck(bases25r, overArraySize, overArray);
     if (!isRepeatingOver)
 	splatOneStrand(qSeqR, bases25r, '-', tagPosition, splix, maxGap, lm, &tagList);
     }
 verbose(2, " %d tags\n", slCount(tagList));
 if (isRepeatingOver)
     {
     if (repeatOutputFile != NULL)
 	faWriteNext(repeatOutputFile, qSeqF->name, qSeqF->dna, qSeqF->size);
     isRepeat = TRUE;
     }
 else if (tagList != NULL)
     {
     /* Find least divergence. */
 
     /* Unless doing worseToo remove tags that are not best scoring */
     if (!worseToo)
 	tagList = splatTagFilterOnDivergence(tagList, findLeastDivergence(tagList) );
     
 
     /* Count up mappings, and output either to repeat file or to mapping file. */
     outputCount = slCount(tagList);
     hits25 += outputCount;
     isRepeat = (outputCount > maxRepeat);
     if (isRepeat)
         {
 	if (repeatOutputFile != NULL)
 	    faWriteNext(repeatOutputFile, qSeqF->name, qSeqF->dna, qSeqF->size);
 	outputCount = 0;
 	}
     else
         {
 	struct splatAlign *aliList = splatExtendTags(tagList, qSeqF, qSeqR, splix, scoreScheme);
 	hitsFull += slCount(aliList);
 	slSort(&aliList, splatAlignCmpScore);
 	splatOutList(aliList, out, qSeqF, qSeqR, splix, f);
 	splatAlignFreeList(&aliList);
 	}
 #ifdef SOON
 #endif /* SOON */
     }
 dnaSeqFree(&qSeqR);
 lmCleanup(&lm);
 *retMapCount = outputCount;
 *retIsRepeat = isRepeat;
 }
 
 
 void splat(char *target, char *query, char *output)
 /* splat - Speedy Local Alignment Tool. */
 {
 struct axtScoreScheme *scoreScheme = axtScoreSchemeSimpleDna(2, 2, 2, 2);
 struct dnaLoad *qLoad = dnaLoadOpen(query);
 if (over != NULL)
     {
     overRead(over, maxRepeat+1, &overArraySize, &overArray);
     }
 struct splix *splix = splixRead(target, memoryMap);
 verboseTime(1, "Loaded %s", target);
 FILE *f = mustOpen(output, "w");
 if (repeatOutput != NULL)
     repeatOutputFile = mustOpen(repeatOutput, "w");
 splatOutHeader(target, query, out, f);
 struct dnaSeq *qSeq;
 int uniqCount = 0, totalMap = 0, totalRepeat = 0, totalReads = 0;
 while ((qSeq = dnaLoadNext(qLoad)) != NULL)
     {
     verbose(2, "Processing %s\n", qSeq->name);
     toUpperN(qSeq->dna, qSeq->size);
     int mapCount = 0;
     boolean isRepeat = FALSE;
     splatOne(qSeq, splix, maxGap, scoreScheme, f, &mapCount, &isRepeat);
     verbose(2, " %d mappings\n", mapCount);
     if (mapCount > 0)
         {
 	++totalMap;
 	if (mapCount == 1)
 	    ++uniqCount;
 	}
     if (isRepeat)
         ++totalRepeat;
     dnaSeqFree(&qSeq);
     ++totalReads;
     }
 verboseTime(1, "Alignment");
 
 /* Report statistics (to stderr) */
 verbose(1, "Overall results for mapping %d reads in %s to %s\n", 
 	totalReads, query, target);
 int missCount = totalReads - totalMap - totalRepeat;
 verbose(1, "%d (%5.2f%%) did not map\n", missCount, 100.0 * missCount/totalReads);
 verbose(1, "%d (%5.2f%%) mapped more than %d places\n", 
 	totalRepeat, 100.0 * totalRepeat / totalReads, maxRepeat);
 if (maxRepeat >= 2)
     {
     int multiCount = totalMap - uniqCount;
     verbose(1,  "%d (%5.2f%%) mapped between 2 and %d places\n",
 	multiCount, 100.0 * multiCount / totalReads, maxRepeat);
     }
 verbose(1, "%d (%5.2f%%) mapped uniquely\n",
     uniqCount, 100.0 * uniqCount / totalReads);
 
 verbose(1, "%ld index queries, %ld hits12, %ld hits18, %ld hits25, %ld hitsFull\n", 
 	exactIndexQueries, hits12, hits18, hits25, hitsFull);
 
 /* Clean up. */
 splixFree(&splix);
 dnaLoadClose(&qLoad);
 carefulClose(&f);
 carefulClose(&repeatOutputFile);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
-verboseTime(1, NULL);
+verboseTimeInit();
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 over = optionVal("over", over);
 out = optionVal("out", out);
 maxDivergence = optionInt("maxDivergence", maxDivergence);
 worseToo = optionExists("worseToo");
 maxRepeat = optionInt("maxRepeat", maxRepeat);
 repeatOutput = optionVal("repeatOutput", NULL);
 maxGap = optionInt("maxGap", maxGap);
 maxMismatch = optionInt("maxMismatch", maxMismatch);
 exactOnly = (maxGap == 0 && maxMismatch == 0);
 tagSize = maxGap + splixMinQuerySize;
 memoryMap = optionExists("mmap");
 if (maxGap > 1)
     errAbort("Sorry, for now maxGap must be just 0 or 1");
 dnaUtilOpen();
 splat(argv[1], argv[2], argv[3]);
 return 0;
 }