src/utils/bigBedNamedItems/bigBedNamedItems.c d10159e362021b3fbbcf57d04526c756004fdb73

d10159e362021b3fbbcf57d04526c756004fdb73
kent
  Tue Feb 26 22:53:33 2013 -0800
Making it so that multiple items can have same name, and all such items are found.
diff --git src/utils/bigBedNamedItems/bigBedNamedItems.c src/utils/bigBedNamedItems/bigBedNamedItems.c
index 6829294..a16e7a7 100644
--- src/utils/bigBedNamedItems/bigBedNamedItems.c
+++ src/utils/bigBedNamedItems/bigBedNamedItems.c
@@ -27,63 +27,108 @@
    {NULL, 0},
 };
 
 void bigBedAttachNameIndex(struct bbiFile *bbi)
 /* Attach name index part of bbiFile to bbi */
 {
 if (bbi->nameBpt == NULL)
     {
     if (bbi->nameIndexOffset == 0)
 	errAbort("%s has no name index", bbi->fileName);
     udcSeek(bbi->udc, bbi->nameIndexOffset);
     bbi->nameBpt = bptFileAttach(bbi->fileName, bbi->udc);
     }
 }
 
-boolean bigBedNameQuery(struct bbiFile *bbi, char *name, FILE *f)
-/* Write item matching name to file.  Return TRUE if anything written.  */
+struct offsetSize 
+/* Simple file offset and file size. */
+    {
+    bits64 offset; 
+    bits64 size;
+    };
+
+int cmpOffsetSizeRef(const void *va, const void *vb)
+/* Compare to sort slRef pointing to offsetSize.  Sort is kind of hokey,
+ * but guarantees all items that are the same will be next to each other
+ * at least, which is all we care about. */
+{
+const struct slRef *a = *((struct slRef **)va);
+const struct slRef *b = *((struct slRef **)vb);
+return memcmp(a->val, b->val, sizeof(struct offsetSize));
+}
+
+struct fileOffsetSize *bigBedChunksMatchingName(struct bbiFile *bbi, char *name)
+/* Get list of file chunks that match name.  Can slFreeList this when done. */
 {
 bigBedAttachNameIndex(bbi);
-boolean isSwapped = bbi->isSwapped;
-struct offsetSize {bits64 offset; bits64 size;} block;
-boolean didWrite = FALSE;
-if (bptFileFind(bbi->nameBpt, name, strlen(name), &block, sizeof(block)))
+struct slRef *blockList = bptFileFindMultiple(bbi->nameBpt, name, strlen(name), sizeof(struct offsetSize));
+slSort(&blockList, cmpOffsetSizeRef);
+
+struct fileOffsetSize *fosList = NULL, *fos;
+struct offsetSize lastOffsetSize = {0,0};
+struct slRef *blockRef;
+for (blockRef = blockList; blockRef != NULL; blockRef = blockRef->next)
+    {
+    if (memcmp(&lastOffsetSize, blockRef->val, sizeof(lastOffsetSize)) != 0)
     {
+	memcpy(&lastOffsetSize, blockRef->val, sizeof(lastOffsetSize));
+	AllocVar(fos);
     if (bbi->isSwapped)
 	{
-	block.offset = byteSwap64(block.offset);
-	block.size = byteSwap64(block.size);
+	    fos->offset = byteSwap64(lastOffsetSize.offset);
+	    fos->size = byteSwap64(lastOffsetSize.size);
+	    }
+	else
+	    {
+	    fos->offset = lastOffsetSize.offset;
+	    fos->size = lastOffsetSize.size;
+	    }
+	slAddHead(&fosList, fos);
+	}
+    }
+slRefFreeListAndVals(&blockList);
+slReverse(&fosList);
+return fosList;
 	}
 
+boolean bigBedNameQuery(struct bbiFile *bbi, char *name, FILE *f)
+/* Write item matching name to file.  Return TRUE if anything written.  */
+{
+bigBedAttachNameIndex(bbi);
+boolean isSwapped = bbi->isSwapped;
+struct fileOffsetSize *fos, *fosList = bigBedChunksMatchingName(bbi, name);
+boolean didWrite = FALSE;
+for (fos = fosList; fos != NULL; fos = fos->next)
+    {
     /* Read in raw data */
-    udcSeek(bbi->udc, block.offset);
-    char *rawData = needLargeMem(block.size);
-    udcRead(bbi->udc, rawData, block.size);
+    udcSeek(bbi->udc, fos->offset);
+    char *rawData = needLargeMem(fos->size);
+    udcRead(bbi->udc, rawData, fos->size);
 
     /* Optionally uncompress data, and set data pointer to uncompressed version. */
     char *uncompressedData = NULL;
     char *data = NULL;
     int dataSize = 0;
     if (bbi->uncompressBufSize > 0)
 	{
 	data = uncompressedData = needLargeMem(bbi->uncompressBufSize);
-	dataSize = zUncompress(rawData, block.size, uncompressedData, bbi->uncompressBufSize);
+	dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize);
 	}
     else
 	{
         data = rawData;
-	dataSize = block.size;
+	dataSize = fos->size;
 	}
 
     /* Set up for "memRead" routines to more or less treat memory block like file */
     char *blockPt = data, *blockEnd = data + dataSize;
     struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here
 
 
     /* Read next record into local variables. */
     while (blockPt < blockEnd)
 	{
 	bits32 chromIx = memReadBits32(&blockPt, isSwapped);
 	bits32 s = memReadBits32(&blockPt, isSwapped);
 	bits32 e = memReadBits32(&blockPt, isSwapped);
 	int c;
 	dyStringClear(dy);
@@ -95,30 +140,31 @@
 	    }
 	if (startsWithWordByDelimiter(name, '\t', dy->string))
 	    {
 	    char chromName[bbi->chromBpt->keySize+1];
 	    bptStringKeyAtPos(bbi->chromBpt, chromIx, chromName, sizeof(chromName));
 	    fprintf(f, "%s\t%u\t%u\t%s\n", chromName, s, e, dy->string);
 	    didWrite = TRUE;
 	    }
 	}
 
     /* Clean up temporary buffers. */
     dyStringFree(&dy);
     freez(&uncompressedData);
     freez(&rawData);
     }
+slFreeList(&fosList);
 return didWrite;
 }
 
 void bigBedNamedItems(char *bigBedFile, char *name, char *outFile)
 /* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */
 {
 struct bbiFile *bbi = bigBedFileOpen(bigBedFile);
 FILE *f = mustOpen(outFile, "w");
 bigBedNameQuery(bbi, name, f);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {