ba300e9f4268b4e271699dab04c02a345918b31e
kent
  Wed Feb 27 09:57:52 2013 -0800
Refactoring in preparation for moving much of this to library.
diff --git src/utils/bigBedNamedItems/bigBedNamedItems.c src/utils/bigBedNamedItems/bigBedNamedItems.c
index a16e7a7..09c8c70 100644
--- src/utils/bigBedNamedItems/bigBedNamedItems.c
+++ src/utils/bigBedNamedItems/bigBedNamedItems.c
@@ -1,176 +1,198 @@
 /* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "localmem.h"
 #include "udc.h"
 #include "bPlusTree.h"
 #include "bigBed.h"
 #include "obscure.h"
 #include "zlibFace.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bigBedNamedItems - Extract item of given name from bigBed\n"
   "usage:\n"
   "   bigBedNamedItems file.bb name output.bed\n"
   "options:\n"
   "   -xxx=XXX\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 void bigBedAttachNameIndex(struct bbiFile *bbi)
 /* Attach name index part of bbiFile to bbi */
 {
 if (bbi->nameBpt == NULL)
     {
     if (bbi->nameIndexOffset == 0)
 	errAbort("%s has no name index", bbi->fileName);
     udcSeek(bbi->udc, bbi->nameIndexOffset);
     bbi->nameBpt = bptFileAttach(bbi->fileName, bbi->udc);
     }
 }
 
 struct offsetSize 
 /* Simple file offset and file size. */
     {
     bits64 offset; 
     bits64 size;
     };
 
 int cmpOffsetSizeRef(const void *va, const void *vb)
 /* Compare to sort slRef pointing to offsetSize.  Sort is kind of hokey,
  * but guarantees all items that are the same will be next to each other
  * at least, which is all we care about. */
 {
 const struct slRef *a = *((struct slRef **)va);
 const struct slRef *b = *((struct slRef **)vb);
 return memcmp(a->val, b->val, sizeof(struct offsetSize));
 }
 
 struct fileOffsetSize *bigBedChunksMatchingName(struct bbiFile *bbi, char *name)
 /* Get list of file chunks that match name.  Can slFreeList this when done. */
 {
 bigBedAttachNameIndex(bbi);
 struct slRef *blockList = bptFileFindMultiple(bbi->nameBpt, name, strlen(name), sizeof(struct offsetSize));
 slSort(&blockList, cmpOffsetSizeRef);
 
 struct fileOffsetSize *fosList = NULL, *fos;
 struct offsetSize lastOffsetSize = {0,0};
 struct slRef *blockRef;
 for (blockRef = blockList; blockRef != NULL; blockRef = blockRef->next)
     {
     if (memcmp(&lastOffsetSize, blockRef->val, sizeof(lastOffsetSize)) != 0)
         {
 	memcpy(&lastOffsetSize, blockRef->val, sizeof(lastOffsetSize));
 	AllocVar(fos);
 	if (bbi->isSwapped)
 	    {
 	    fos->offset = byteSwap64(lastOffsetSize.offset);
 	    fos->size = byteSwap64(lastOffsetSize.size);
 	    }
 	else
 	    {
 	    fos->offset = lastOffsetSize.offset;
 	    fos->size = lastOffsetSize.size;
 	    }
 	slAddHead(&fosList, fos);
 	}
     }
 slRefFreeListAndVals(&blockList);
 slReverse(&fosList);
 return fosList;
 }
 
-boolean bigBedNameQuery(struct bbiFile *bbi, char *name, FILE *f)
-/* Write item matching name to file.  Return TRUE if anything written.  */
+struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, char *name, struct lm *lm)
+/* Return list of intervals matching file. These intervals will be allocated out of lm. */
 {
 bigBedAttachNameIndex(bbi);
 boolean isSwapped = bbi->isSwapped;
 struct fileOffsetSize *fos, *fosList = bigBedChunksMatchingName(bbi, name);
-boolean didWrite = FALSE;
+struct bigBedInterval *interval, *intervalList = NULL;
 for (fos = fosList; fos != NULL; fos = fos->next)
     {
     /* Read in raw data */
     udcSeek(bbi->udc, fos->offset);
     char *rawData = needLargeMem(fos->size);
     udcRead(bbi->udc, rawData, fos->size);
 
     /* Optionally uncompress data, and set data pointer to uncompressed version. */
     char *uncompressedData = NULL;
     char *data = NULL;
     int dataSize = 0;
     if (bbi->uncompressBufSize > 0)
 	{
 	data = uncompressedData = needLargeMem(bbi->uncompressBufSize);
 	dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize);
 	}
     else
 	{
         data = rawData;
 	dataSize = fos->size;
 	}
 
     /* Set up for "memRead" routines to more or less treat memory block like file */
     char *blockPt = data, *blockEnd = data + dataSize;
     struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here
 
 
     /* Read next record into local variables. */
     while (blockPt < blockEnd)
 	{
 	bits32 chromIx = memReadBits32(&blockPt, isSwapped);
 	bits32 s = memReadBits32(&blockPt, isSwapped);
 	bits32 e = memReadBits32(&blockPt, isSwapped);
 	int c;
 	dyStringClear(dy);
 	while ((c = *blockPt++) >= 0)
 	    {
 	    if (c == 0)
 		break;
 	    dyStringAppendC(dy, c);
 	    }
 	if (startsWithWordByDelimiter(name, '\t', dy->string))
 	    {
-	    char chromName[bbi->chromBpt->keySize+1];
-	    bptStringKeyAtPos(bbi->chromBpt, chromIx, chromName, sizeof(chromName));
-	    fprintf(f, "%s\t%u\t%u\t%s\n", chromName, s, e, dy->string);
-	    didWrite = TRUE;
+	    lmAllocVar(lm, interval);
+	    interval->start = s;
+	    interval->end = e;
+	    interval->rest = cloneString(dy->string);
+	    interval->chromId = chromIx;
+	    slAddHead(&intervalList, interval);
 	    }
 	}
 
     /* Clean up temporary buffers. */
     dyStringFree(&dy);
     freez(&uncompressedData);
     freez(&rawData);
     }
 slFreeList(&fosList);
-return didWrite;
+slReverse(&intervalList);
+return intervalList;
+}
+
+void bigBedIntervalListToBedFile(struct bbiFile *bbi, struct bigBedInterval *intervalList, FILE *f)
+/* Write out big bed interval list to bed file,  looking up chromosome mostly */
+{
+char chromName[bbi->chromBpt->keySize+1];
+int chromId = -1;
+struct bigBedInterval *interval;
+for (interval = intervalList; interval != NULL; interval = interval->next)
+    {
+    if (interval->chromId != chromId)
+        {
+	chromId = interval->chromId;
+	bptStringKeyAtPos(bbi->chromBpt, chromId, chromName, sizeof(chromName));
+	}
+    fprintf(f, "%s\t%u\t%u\t%s\n", chromName, interval->start, interval->end, interval->rest);
+    }
 }
 
 void bigBedNamedItems(char *bigBedFile, char *name, char *outFile)
 /* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */
 {
 struct bbiFile *bbi = bigBedFileOpen(bigBedFile);
 FILE *f = mustOpen(outFile, "w");
-bigBedNameQuery(bbi, name, f);
+struct lm *lm = lmInit(0);
+struct bigBedInterval *intervalList = bigBedNameQuery(bbi, name, lm);
+bigBedIntervalListToBedFile(bbi, intervalList, f);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 bigBedNamedItems(argv[1], argv[2], argv[3]);
 return 0;
 }