c3c65fde6dd5aa6f20860c7113eb9ee22cf35b96
markd
  Wed Jan 15 08:37:28 2020 -0800
Initial pass at 64bit blat index

diff --git src/inc/genoFind.h src/inc/genoFind.h
index f74c209..7b8a7f8 100644
--- src/inc/genoFind.h
+++ src/inc/genoFind.h
@@ -29,106 +29,110 @@
 #include "bits.h"
 #endif
 
 #ifndef AXT_H
 #include "axt.h"
 #endif
 
 enum gfConstants {
     gfMinMatch = 2,
     gfMaxGap = 2,
     gfTileSize = 11,
     gfMaxTileUse = 1024,
     gfPepMaxTileUse = 30000,
 };
 
+typedef bits64 gfOffset;  /* offset/size of genome sequences */
+
 struct gfSeqSource
 /* Where a block of sequence comes from. */
     {
     struct gfSeqSource *next;
     char *fileName;	/* Name of file. */
     bioSeq *seq;	/* Sequences.  Usually either this or fileName is NULL. */
-    bits32 start,end;	/* Position within merged sequence. */
+    gfOffset start,end;	/* Position within merged sequence. */
     Bits *maskedBits;	/* If non-null contains repeat-masking info. */
     };
 
 struct gfHit
 /* A genoFind hit. */
    {
    struct gfHit *next;
-   bits32 qStart;		/* Where it hits in query. */
-   bits32 tStart;		/* Where it hits in target. */
-   bits32 diagonal;		/* tStart + qSize - qStart. */
+   gfOffset qStart;		/* Where it hits in query. */
+   gfOffset tStart;		/* Where it hits in target. */
+   gfOffset diagonal;		/* tStart + qSize - qStart. */
    };
 
 /* gfHits are free'd with simple freeMem or slFreeList. */
 
 struct gfClump
 /* A clump of hits. */
 /* Note: for clumps from regular (blat) queries, tStart and tEnd include 
  * target->start, but for clumps from gfPcrClumps(), tStart and tEnd have 
  * already had target->start subtracted.  So tStart and tEnd in PCR clumps 
  * are relative to that target sequence (not the collection of all target 
  * sequences). */
     {
     struct gfClump *next;	/* Next clump. */
-    bits32 qStart, qEnd;	/* Position in query. */
+    gfOffset qStart, qEnd;	/* Position in query. */
     struct gfSeqSource *target;	/* Target source sequence. */
-    bits32 tStart, tEnd;	/* Position in target. */
+    gfOffset tStart, tEnd;	/* Position in target. */
     int hitCount;		/* Number of hits. */
     struct gfHit *hitList;	/* List of hits. Not allocated here. */
     int queryCoverage;		/* Number of bases covered in query (thx AG!) */
     };
 
 void gfClumpFree(struct gfClump **pClump);
 /* Free a single clump. */
 
 void gfClumpFreeList(struct gfClump **pList);
 /* Free a list of dynamically allocated gfClump's */
 
+struct endList
+/* A more complex list for each N-mer. Used if isSegmented is true. */
+{
+    int tileTail: 16;   /* The first is the packed last few * letters of the tile */
+    gfOffset offset: 48;  /* offset in genome */
+};
+
+
+
 struct genoFind
 /* An index of all K-mers in the genome. */
     {
     int maxPat;                          /* Max # of times pattern can occur
                                           * before it is ignored. */
     int minMatch;                        /* Minimum number of tile hits needed
                                           * to trigger a clump hit. */
     int maxGap;                          /* Max gap between tiles in a clump. */
     int tileSize;			 /* Size of each N-mer. */
     int stepSize;			 /* Spacing between N-mers. */
     int tileSpaceSize;                   /* Number of N-mer values. */
     int tileMask;			 /* 1-s for each N-mer. */
     int sourceCount;			 /* Count of source files. */
     struct gfSeqSource *sources;         /* List of sequence sources. */
     bool isPep;			 	 /* Is a peptide. */
     bool allowOneMismatch;		 /* Allow a single mismatch? */
     bool noSimpRepMask;			  /* Dis-Allow simple repeat masking. */
     int segSize;			 /* Index is segmented if non-zero. */
-    bits32 totalSeqSize;		 /* Total size of all sequences. */
-    bits32 *listSizes;                   /* Size of list for each N-mer */
+    gfOffset totalSeqSize;		 /* Total size of all sequences. */
+    gfOffset *listSizes;                   /* Size of list for each N-mer */
     void *allocated;                     /* Storage space for all lists. */
-    bits32 **lists;                      /* A list for each N-mer. Used if
+    gfOffset **lists;                      /* A list for each N-mer. Used if
                                           * isSegmented is false. */
-    bits16 **endLists;                   /* A more complex list for each N-mer.
-                                          * Used if isSegmented is true.
-					  * Values come in groups of threes.
-					  * The first is the packed last few
-					  * letters of the tile.  The next two
-					  * are the offset in the genome.  This
-					  * would be a struct but that would take
-					  * 8 bytes instead of 6, or nearly an
-					  * extra gigabyte of RAM. */
+    struct endList **endLists;           /* A more complex list for each N-mer.
+                                          * Used if isSegmented is true. */
     };
 
 void genoFindFree(struct genoFind **pGenoFind);
 /* Free up a genoFind index. */
 
 struct gfSeqSource *gfFindNamedSource(struct genoFind *gf, char *name);
 /* Find target of given name.  Return NULL if none. */
 
 /* ---  Stuff for saving results ---- */
 
 
 struct gfOutput
 /* A polymorphic object to help us write many file types. */
     {
     struct gfOutput *next;