bc587fd886e5c5973d56b138aece47f34857e7eb
markd
Sat Oct 4 21:36:04 2025 -0700
added -bed option to bigWigToWig to selectively output wiggle regions (contributed by Michael Hiller)
diff --git src/lib/bwgQuery.c src/lib/bwgQuery.c
index 81217a73cf8..a1a4fa400de 100644
--- src/lib/bwgQuery.c
+++ src/lib/bwgQuery.c
@@ -140,30 +140,127 @@
if (maxCount != 0 && outCount >= maxCount)
break;
}
start += head.itemStep;
}
break;
}
default:
internalErr();
break;
}
assert( (maxCount != 0 && outCount >= maxCount) || (blockPt == blockEnd));
return outCount;
}
+
+static int bigWigBlockDumpIntersectingRangeWithName(boolean isSwapped, char *blockPt, char *blockEnd,
+ char *chrom, bits32 rangeStart, bits32 rangeEnd, int maxCount, char *name, FILE *out)
+/* Print out info on parts of block that intersect start-end, block starting at current position.
+ Same as bigWigBlockDumpIntersectingRange() but prints the optional name parameter
+*/
+{
+struct bwgSectionHead head;
+bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
+bits16 i;
+float val;
+int outCount = 0;
+
+switch (head.type)
+ {
+ case bwgTypeBedGraph:
+ {
+/* fprintf(out, "#bedGraph section %s:%u-%u\n", chrom, head.start, head.end);
+*/
+ for (i=0; i
0)
+ {
+ if (name != NULL) {
+ fprintf(out, "%s\t%u\t%u\t%s\t%g\n", chrom, start, end, name, val);
+ }else{
+ fprintf(out, "%s\t%u\t%u\t%g\n", chrom, start, end, val);
+ }
+ ++outCount;
+ if (maxCount != 0 && outCount >= maxCount)
+ break;
+ }
+ }
+ break;
+ }
+ case bwgTypeVariableStep:
+ {
+/* fprintf(out, "variableStep chrom=%s span=%u\n", chrom, head.itemSpan);
+*/
+ for (i=0; i 0)
+ {
+ if (name != NULL) {
+ fprintf(out, "%u\t%s\t%g\n", start+1, name, val);
+ }else{
+ fprintf(out, "%u\t%g\n", start+1, val);
+ }
+ ++outCount;
+ if (maxCount != 0 && outCount >= maxCount)
+ break;
+ }
+ }
+ break;
+ }
+ case bwgTypeFixedStep:
+ {
+ boolean gotStart = FALSE;
+ bits32 start = head.start;
+ for (i=0; i 0)
+ {
+ if (!gotStart)
+ {
+/* fprintf(out, "fixedStep chrom=%s start=%u step=%u span=%u\n",
+ chrom, start+1, head.itemStep, head.itemSpan);
+*/
+ gotStart = TRUE;
+ }
+ if (name != NULL) {
+ fprintf(out, "%s\t%g\n", name, val);
+ }else{
+ fprintf(out, "%g\n", val);
+ }
+ ++outCount;
+ if (maxCount != 0 && outCount >= maxCount)
+ break;
+ }
+ start += head.itemStep;
+ }
+ break;
+ }
+ default:
+ internalErr();
+ break;
+ }
+assert( (maxCount != 0 && outCount >= maxCount) || (blockPt == blockEnd));
+return outCount;
+}
+
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
struct lm *lm)
/* Get data for interval. Return list allocated out of lm. */
{
if (bwf->typeSig != bigWigSig)
errAbort("Trying to do bigWigIntervalQuery on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct bbiInterval *el, *list = NULL;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir,
chrom, start, end, NULL);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
float val;
int i;
@@ -349,30 +446,106 @@
break;
}
maxCount -= oneCount;
}
blockBuf += block->size;
}
freeMem(mergedBuf);
}
freeMem(uncompressBuf);
slFreeList(&blockList);
return printCount;
}
+int bigWigIntervalDumpWithName(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount, char *name,
+ FILE *out)
+/* Print out info on bigWig parts that intersect chrom:start-end. Set maxCount to 0 if you
+ * don't care how many are printed. Returns number printed.
+ Same as bigWigIntervalDump but passes the name parameter on to bigWigBlockDumpIntersectingRangeWithName
+ */
+{
+if (bwf->typeSig != bigWigSig)
+ errAbort("Trying to do bigWigIntervalDump on a non big-wig file.");
+bbiAttachUnzoomedCir(bwf);
+struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir,
+ chrom, start, end, NULL);
+struct fileOffsetSize *block, *beforeGap, *afterGap;
+struct udcFile *udc = bwf->udc;
+int printCount = 0;
+
+/* Set up for uncompression optionally. */
+char *uncompressBuf = NULL;
+if (bwf->uncompressBufSize > 0)
+ uncompressBuf = needLargeMem(bwf->uncompressBufSize);
+
+/* This loop is a little complicated because we merge the read requests for efficiency, but we
+ * have to then go back through the data one unmerged block at a time. */
+for (block = blockList; block != NULL; )
+ {
+ /* Find contigious blocks and read them into mergedBuf. */
+ fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
+ bits64 mergedOffset = block->offset;
+ bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
+ udcSeek(udc, mergedOffset);
+ char *mergedBuf = needLargeMem(mergedSize);
+ udcMustRead(udc, mergedBuf, mergedSize);
+ char *blockBuf = mergedBuf;
+
+ /* Loop through individual blocks within merged section. */
+ for (;block != afterGap; block = block->next)
+ {
+ /* Uncompress if necessary. */
+ char *blockPt, *blockEnd;
+ if (uncompressBuf)
+ {
+ blockPt = uncompressBuf;
+ int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
+ blockEnd = blockPt + uncSize;
+ }
+ else
+ {
+ blockPt = blockBuf;
+ blockEnd = blockPt + block->size;
+ }
+
+ /* Do the actual dump. */
+ int oneCount = bigWigBlockDumpIntersectingRangeWithName(bwf->isSwapped, blockPt, blockEnd,
+ chrom, start, end, maxCount, name, out);
+
+ /* Keep track of how many dumped, not exceeding maximum. */
+ printCount += oneCount;
+ if (maxCount != 0)
+ {
+ if (oneCount >= maxCount)
+ {
+ block = NULL; // we want to drop out of the outer loop too
+ break;
+ }
+
+ maxCount -= oneCount;
+ }
+ blockBuf += block->size;
+ }
+ freeMem(mergedBuf);
+ }
+freeMem(uncompressBuf);
+
+slFreeList(&blockList);
+return printCount;
+}
boolean bigWigSummaryArray(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
enum bbiSummaryType summaryType, int summarySize, double *summaryValues)
/* Fill in summaryValues with data from indicated chromosome range in bigWig file.
* Be sure to initialize summaryValues to a default value, which will not be touched
* for regions without data in file. (Generally you want the default value to either
* be 0.0 or nan("") depending on the application.) Returns FALSE if no data
* at that position. */
{
boolean ret = bbiSummaryArray(bwf, chrom, start, end, bigWigIntervalQuery,
summaryType, summarySize, summaryValues);
return ret;
}
boolean bigWigSummaryArrayExtended(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
int summarySize, struct bbiSummaryElement *summary)