0ede364a2dcb452681933d5a3579b4f05d90a245
markd
  Sun Jul 5 15:16:27 2020 -0700
fixed bug where totalSeqSize was not stored in index, now hgBlat works

diff --git src/jkOwnLib/genoFind.c src/jkOwnLib/genoFind.c
index f475290..a2e40d1 100644
--- src/jkOwnLib/genoFind.c
+++ src/jkOwnLib/genoFind.c
@@ -44,31 +44,31 @@
 signal(SIGPIPE, gfPipeHandler);
 }
 
 int gfReadMulti(int sd, void *vBuf, size_t size)
 /* Read in until all is read or there is an error. */
 {
 char *buf = vBuf;
 size_t totalRead = 0;
 int oneRead;
 
 while (totalRead < size)
     {
     oneRead = read(sd, buf + totalRead, size - totalRead);
     if (oneRead < 0)
         {
-	perror("Couldn't finish large read");
+	errAbort("Couldn't finish large read");
 	return oneRead;
 	}
     else if (oneRead == 0)
     /* Avoid an infinite loop when the client closed the socket. */
         break;
     totalRead += oneRead;
     }
 return totalRead;
 }
 
 
 void genoFindFree(struct genoFind **pGenoFind)
 /* Free up a genoFind index. */
 {
 struct genoFind *gf = *pGenoFind;
@@ -105,73 +105,80 @@
 struct genoFindFileHdr
 /* header for genoFind section in file */
 {
     int maxPat;
     int minMatch;
     int maxGap;
     int tileSize;
     int stepSize;
     int tileSpaceSize;
     int tileMask;
     int sourceCount;
     bool isPep;
     bool allowOneMismatch;
     bool noSimpRepMask;
     int segSize;
+    int totalSeqSize;
 
     off_t sourcesOff;     // offset of sequences sources
     off_t listSizesOff;   // offset of listSizes
     off_t listsOff;       // offset of lists or endLists
     off_t endListsOff;
+
+    // Reserved area. This is really padding, as all structures are accessed via
+    // offsets, so fields can be added without decreasing padding as long as the
+    // reserved is not consumed.
     bits64 reserved[32];  // vesion 1.0: 32
 };
 
 static void genoFindInitHdr(struct genoFind *gf,
                             struct genoFindFileHdr *hdr)
 /* fill in the file header struct from in-memory struct */
 {
 zeroBytes(hdr, sizeof(struct genoFindFileHdr));
 hdr->maxPat = gf->maxPat;
 hdr->minMatch = gf->minMatch;
 hdr->maxGap = gf->maxGap;
 hdr->tileSize = gf->tileSize;
 hdr->stepSize = gf->stepSize;
 hdr->tileSpaceSize = gf->tileSpaceSize;
 hdr->tileMask = gf->tileMask;
 hdr->sourceCount = gf->sourceCount;
 hdr->isPep = gf->isPep;
 hdr->allowOneMismatch = gf->allowOneMismatch;
 hdr->noSimpRepMask = gf->noSimpRepMask;
 hdr->segSize = gf->segSize;
+hdr->totalSeqSize = gf->totalSeqSize;
 }                        
     
 static void genoFindReadHdr(struct genoFindFileHdr *hdr,
                             struct genoFind *gf)
 /* fill in the in-memory struct from file header struct */
 {
 gf->maxPat = hdr->maxPat;
 gf->minMatch = hdr->minMatch;
 gf->maxGap = hdr->maxGap;
 gf->tileSize = hdr->tileSize;
 gf->stepSize = hdr->stepSize;
 gf->tileSpaceSize = hdr->tileSpaceSize;
 gf->tileMask = hdr->tileMask;
 gf->sourceCount = hdr->sourceCount;
 gf->isPep = hdr->isPep;
 gf->allowOneMismatch = hdr->allowOneMismatch;
 gf->noSimpRepMask = hdr->noSimpRepMask;
 gf->segSize = hdr->segSize;
+gf->totalSeqSize = hdr->totalSeqSize;
 }                        
     
 static void genoFindWriteSource(struct gfSeqSource *ss, FILE *f)
 /* write a gfSeqSource object */
 {
 if ((ss->seq != NULL) || (ss->maskedBits != NULL))
     errAbort("can't write index contained sequences, must used external sequence file");
 
 char *fileName = ss->fileName;
 if (fileName != NULL)
     {
     // don't include directories
     char *s = strrchr(fileName, '/');
     if (s != NULL)
         fileName = s + 1;
@@ -359,30 +366,34 @@
                                              maxGap, tileSize, repMatch, oocFile, allowOneMismatch,
                                              stepSize, noSimpRepMask);
 return gfIdx;
 }
 
 struct genoFindIndexFileHdr
 /* header for genoFind section in file */
 {
     char magic[32];
     char version[32];
     bits32 indexAddressSize;  // 32 or 64 bit, compile time.
     boolean isTrans;
     // offsets to data, only one is filed in based on being translated or note
     off_t untransOff;
     off_t transOff[2][3];
+
+    // Reserved area. This is really padding, as all structures are accessed via
+    // offsets, so fields can be added without decreasing padding as long as the
+    // reserved is not consumed.
     bits64 reserved[32];  // vesion 1.0: 32
 };
 
 static void genoFindIndexInitHeader(struct genoFindIndex *gfIdx,
                                     struct genoFindIndexFileHdr* hdr)
 /* fill in the file header struct from in-memory struct */
 {
 zeroBytes(hdr, sizeof(struct genoFindIndexFileHdr));
 safecpy(hdr->magic, sizeof(hdr->magic), indexFileMagic);
 safecpy(hdr->version, sizeof(hdr->version), indexFileVerison);
 hdr->indexAddressSize = 32;
 hdr->isTrans = gfIdx->isTrans;
 }
 
 static void genoFindIndexReadHeader(void* memMapped, struct genoFindIndexFileHdr* hdr,
@@ -1756,30 +1767,31 @@
 int totalHits = 0, usedHits = 0, clumpCount = 0;
 int tileSize = gf->tileSize;
 int bucketShift = 16;		/* 64k buckets. */
 bits32 bucketSize = (1<<bucketShift);
 int bucketCount = (gf->totalSeqSize >> bucketShift) + 1;
 int nearEnough = (gf->isPep ? gfNearEnough/3 : gfNearEnough);
 bits32 boundary = bucketSize - nearEnough;
 int i;
 struct gfHit **buckets = NULL, **pb;
 
 /* Sort hit list into buckets. */
 AllocArray(buckets, bucketCount);
 for (hit = hitList; hit != NULL; hit = nextHit)
     {
     nextHit = hit->next;
+    assert((hit->tStart >> bucketShift) < bucketCount);
     pb = buckets + (hit->tStart >> bucketShift);
     slAddHead(pb, hit);
     }
 
 /* Sort each bucket on diagonal and clump. */
 for (i=0; i<bucketCount; ++i)
     {
     int clumpSize;
     bits32 maxT;
     struct gfHit *clumpHits;
     pb = buckets + i;
     gfHitSortDiagonal(pb);
     for (hit = *pb; hit != NULL; )
          {
 	 /* Each time through this loop will get info on a clump.  Will only