src/lib/bigBed.c cfa3310ec7d0c3ef45a3f647c7e7164453d9d4e0

cfa3310ec7d0c3ef45a3f647c7e7164453d9d4e0
kent
  Tue Mar 5 01:01:42 2013 -0800
A little more progress on multiple extra index change. First time reader and writer have both worked in any small way together.
diff --git src/lib/bigBed.c src/lib/bigBed.c
index a663195..1108797 100644
--- src/lib/bigBed.c
+++ src/lib/bigBed.c
@@ -190,44 +190,44 @@
 	summarySize, summary);
 }
 
 boolean bigBedSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end,
 	enum bbiSummaryType summaryType, int summarySize, double *summaryValues)
 /* Fill in summaryValues with  data from indicated chromosome range in bigBed file.
  * Be sure to initialize summaryValues to a default value, which will not be touched
  * for regions without data in file.  (Generally you want the default value to either
  * be 0.0 or nan("") depending on the application.)  Returns FALSE if no data
  * at that position. */
 {
 return bbiSummaryArray(bbi, chrom, start, end, bigBedCoverageIntervals,
 	summaryType, summarySize, summaryValues);
 }
 
+#ifdef OLD
 void bigBedAttachNameIndex(struct bbiFile *bbi)
 /* Attach name index part of bbiFile to bbi */
 {
-#ifdef OLD
 if (bbi->nameBpt == NULL)
     {
     if (bbi->nameIndexOffset == 0)
 	errAbort("%s has no name index", bbi->fileName);
     udcSeek(bbi->udc, bbi->nameIndexOffset);
     bbi->nameBpt = bptFileAttach(bbi->fileName, bbi->udc);
     }
-#endif /* OLD */
 uglyAbort("bigBedAttachNameIndex() - no can do");
 }
+#endif /* OLD */
 
 struct offsetSize 
 /* Simple file offset and file size. */
     {
     bits64 offset; 
     bits64 size;
     };
 
 static int cmpOffsetSizeRef(const void *va, const void *vb)
 /* Compare to sort slRef pointing to offsetSize.  Sort is kind of hokey,
  * but guarantees all items that are the same will be next to each other
  * at least, which is all we care about. */
 {
 const struct slRef *a = *((struct slRef **)va);
 const struct slRef *b = *((struct slRef **)vb);
@@ -259,52 +259,53 @@
 	    fos->size = byteSwap64(lastOffsetSize.size);
 	    }
 	else
 	    {
 	    fos->offset = lastOffsetSize.offset;
 	    fos->size = lastOffsetSize.size;
 	    }
 	slAddHead(&fosList, fos);
 	}
     }
 slReverse(&fosList);
 return fosList;
 }
 
 
-static struct fileOffsetSize *bigBedChunksMatchingName(struct bbiFile *bbi, char *name)
+static struct fileOffsetSize *bigBedChunksMatchingName(struct bbiFile *bbi, 
+    struct bptFile *index, char *name)
 /* Get list of file chunks that match name.  Can slFreeList this when done. */
 {
-struct slRef *blockList = bptFileFindMultiple(bbi->nameBpt, 
+struct slRef *blockList = bptFileFindMultiple(index, 
 	name, strlen(name), sizeof(struct offsetSize));
 struct fileOffsetSize *fosList = fosFromRedundantBlockList(&blockList, bbi->isSwapped);
 slRefFreeListAndVals(&blockList);
 return fosList;
 }
 
 static struct fileOffsetSize *bigBedChunksMatchingNames(struct bbiFile *bbi, 
-	char **names, int nameCount)
+	struct bptFile *index, char **names, int nameCount)
 /* Get list of file chunks that match any of the names.  Can slFreeList this when done. */
 {
 /* Go through all names and make a blockList that includes all blocks with any hit to any name.  
  * Many of these blocks will occur multiple times. */
 struct slRef *blockList = NULL;
 int nameIx;
 for (nameIx = 0; nameIx < nameCount; ++nameIx)
     {
     char *name = names[nameIx];
-    struct slRef *oneList = bptFileFindMultiple(bbi->nameBpt, 
+    struct slRef *oneList = bptFileFindMultiple(index, 
 	    name, strlen(name), sizeof(struct offsetSize));
     blockList = slCat(oneList, blockList);
     }
 
 /* Create nonredundant list of blocks. */
 struct fileOffsetSize *fosList = fosFromRedundantBlockList(&blockList, bbi->isSwapped);
 
 /* Clean up and resturn result. */
 slRefFreeListAndVals(&blockList);
 return fosList;
 }
 
 typedef boolean (*BbFirstWordMatch)(char *line, void *target);
 /* A function that returns TRUE if first word in tab-separated line matches target. */
 
@@ -392,48 +393,49 @@
 	    interval->rest = cloneString(dy->string);
 	    interval->chromId = chromIx;
 	    slAddHead(&intervalList, interval);
 	    }
 	}
 
     /* Clean up temporary buffers. */
     dyStringFree(&dy);
     freez(&uncompressedData);
     freez(&rawData);
     }
 slReverse(&intervalList);
 return intervalList;
 }
 
-struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, char *name, struct lm *lm)
+
+
+struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, struct bptFile *index,
+    char *name, struct lm *lm)
 /* Return list of intervals matching file. These intervals will be allocated out of lm. */
 {
-bigBedAttachNameIndex(bbi);
-struct fileOffsetSize *fosList = bigBedChunksMatchingName(bbi, name);
+struct fileOffsetSize *fosList = bigBedChunksMatchingName(bbi, index, name);
 struct bigBedInterval *intervalList = bigBedIntervalsMatchingName(bbi, fosList, 
     bbFirstWordMatchesName, name, lm);
 slFreeList(&fosList);
 return intervalList;
 }
 
-struct bigBedInterval *bigBedMultiNameQuery(struct bbiFile *bbi, char **names, 
-    int nameCount, struct lm *lm)
+struct bigBedInterval *bigBedMultiNameQuery(struct bbiFile *bbi, struct bptFile *index,
+    char **names, int nameCount, struct lm *lm)
 /* Fetch all records matching any of the names. Return list is allocated out of lm. */
 {
 /* Set up name index and get list of chunks that match any of our names. */
-bigBedAttachNameIndex(bbi);
-struct fileOffsetSize *fosList = bigBedChunksMatchingNames(bbi, names, nameCount);
+struct fileOffsetSize *fosList = bigBedChunksMatchingNames(bbi, index, names, nameCount);
 
 /* Create hash of all names. */
 struct hash *hash = newHash(0);
 int nameIx;
 for (nameIx=0; nameIx < nameCount; ++nameIx)
     hashAdd(hash, names[nameIx], NULL);
 
 
 /* Get intervals where name matches hash target. */
 struct bigBedInterval *intervalList = bigBedIntervalsMatchingName(bbi, fosList, 
     bbFirstWordIsInHash, hash, lm);
 
 /* Clean up and return results. */
 slFreeList(&fosList);
 hashFree(&hash);
@@ -473,44 +475,50 @@
 }
 
 char *bigBedAutoSqlText(struct bbiFile *bbi)
 /* Get autoSql text if any associated with file.  Do a freeMem of this when done. */
 {
 if (bbi->asOffset == 0)
     return NULL;
 struct udcFile *f = bbi->udc;
 udcSeek(f, bbi->asOffset);
 return udcReadStringAndZero(f);
 }
 
 struct asObject *bigBedAs(struct bbiFile *bbi)
 /* Get autoSql object definition if any associated with file. */
 {
+struct asObject *as = bbi->cachedAs;
+if (as != NULL)
+    return as;
 if (bbi->asOffset == 0)
     return NULL;
 char *asText = bigBedAutoSqlText(bbi);
-struct asObject *as = asParseText(asText);
+bbi->cachedAs = as = asParseText(asText);
 freeMem(asText);
 return as;
 }
 
 struct asObject *bigBedAsOrDefault(struct bbiFile *bbi)
 // Get asObject associated with bigBed - if none exists in file make it up from field counts.
 {
-struct asObject *as = bigBedAs(bbi);
+struct asObject *as = bbi->cachedAs;
+if (as != NULL)
+    return as;
+as = bigBedAs(bbi);
 if (as == NULL)
-    as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount));
+    bbi->cachedAs = as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount));
 return as;
 }
 
 struct asObject *bigBedFileAsObjOrDefault(char *fileName)
 // Get asObject associated with bigBed file, or the default.
 {
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 if (bbi)
     {
     struct asObject *as = bigBedAsOrDefault(bbi);
     bbiFileClose(&bbi);
     return as;
     }
 return NULL;
 }
@@ -531,59 +539,105 @@
 /* See if we have any extra indexes, and if so seek to there. */
 bits64 offset = bbi->extraIndexListOffset;
 if (offset == 0)
    return NULL;
 udcSeek(udc, offset);
 
 /* Construct list of field that are being indexed.  List is list of 
  * field numbers within asObj. */
 int i;
 struct slInt *intList = NULL, *intEl;
 for (i=0; i<bbi->extraIndexCount; ++i)
     {
     bits16 type,fieldCount;
     type = udcReadBits16(udc, isSwapped);
     fieldCount = udcReadBits16(udc, isSwapped);
-    udcSeekCur(udc, 12);    // skip over reserved bits
+    udcSeekCur(udc, sizeof(bits64));  // skip over fileOffset
+    udcSeekCur(udc, 4);    // skip over reserved bits
     if (fieldCount == 1)
         {
 	bits16 fieldId = udcReadBits16(udc, isSwapped);
 	udcSeekCur(udc, 2);    // skip over reserved bits
 	intEl = slIntNew(fieldId);
 	slAddHead(&intList, intEl);
 	}
     else
         {
 	warn("Not yet understanding indexes on multiple fields at once.");
 	internalErr();
 	}
     }
 
 /* Now have to make an asObject to find out name that corresponds to this field. */
 struct asObject *as = bigBedAsOrDefault(bbi);
 
 /* Make list of field names out of list of field numbers */
 struct slName *nameList = NULL;
 for (intEl = intList; intEl != NULL; intEl = intEl->next)
     {
     struct asColumn *col = slElementFromIx(as->columnList, intEl->val);
     if (col == NULL)
 	{
         warn("Inconsistent bigBed file %s", bbi->fileName);
 	internalErr();
 	}
     slNameAddHead(&nameList, col->name);
     }
 
-asObjectFree(&as);
 return nameList;
 }
 
 struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName)
 /* Return index associated with fieldName.  Aborts if no such index. */
 {
-uglyAbort("Coming soon.");
+struct udcFile *udc = bbi->udc;
+boolean isSwapped = bbi->isSwapped;
+struct asObject *as = bigBedAsOrDefault(bbi);
+struct asColumn *col = asColumnFind(as, fieldName);
+if (col == NULL)
+    errAbort("No field %s in %s", fieldName, bbi->fileName);
+int colIx = slIxFromElement(as->columnList, col);
+
+/* See if we have any extra indexes, and if so seek to there. */
+bits64 offset = bbi->extraIndexListOffset;
+if (offset == 0)
+   errAbort("%s has no indexes", bbi->fileName);
+udcSeek(udc, offset);
+
+/* Go through each extra index and see if it's a match */
+int i;
+for (i=0; i<bbi->extraIndexCount; ++i)
+    {
+    bits16 type = udcReadBits16(udc, isSwapped);
+    bits16 fieldCount = udcReadBits16(udc, isSwapped);
+    bits64 fileOffset = udcReadBits64(udc, isSwapped);
+    udcSeekCur(udc, 4);    // skip over reserved bits
+
+    if (type != 0)
+        {
+	warn("Don't understand type %d", type);
+	internalErr();
+	}
+    if (fieldCount == 1)
+        {
+	bits16 fieldId = udcReadBits16(udc, isSwapped);
+	udcSeekCur(udc, 2);    // skip over reserved bits
+	if (fieldId == colIx)
+	    {
+	    udcSeek(udc, fileOffset);
+	    struct bptFile *bpt = bptFileAttach(bbi->fileName, udc);
+	    return bpt;
+	    }
+	}
+    else
+        {
+	warn("Not yet understanding indexes on multiple fields at once.");
+	internalErr();
+	}
+    }
+
+errAbort("%s is not indexed in %s", fieldName, bbi->fileName);
 return NULL;
 }