15080c151c9929733fa2af71ce5dc98533c4f913 galt Fri Mar 20 18:45:40 2026 -0700 The error messages are more detailed to help users, something MarkD wanted. It aborts by default if chromStart or chromEnd are greater than chromSize, unless the user has specified -skipChromCheck which skips both chrom name and chrom size checks now. Fixes issues with undetected overflowing integers issues, warns when they are too large instead of silently failing. Expanded the all the coordinates in the utils and the lib so that they use full range of unsigned integer space when querying and using GB for chromStart and chromEnd coordinates used by BED standard and supported by .2bit chroms. Fixed minor overflow issue with bigBed.c going past the end of the 4GB space. I made changes to bigBedToBed.c and then used CLAUDE prompt to refactor those changes into the 6 other utilities bigMafToMaf, bigChainToChain, bigGenePredToGenePred, bigPslToPsl, bigWigToBedGraph, bigWigToWig. refs #28109 diff --git src/utils/bigWigToWig/bigWigToWig.c src/utils/bigWigToWig/bigWigToWig.c index 45ccdf1fc9c..f18221c7ca0 100644 --- src/utils/bigWigToWig/bigWigToWig.c +++ src/utils/bigWigToWig/bigWigToWig.c @@ -1,159 +1,180 @@ /* bigWigToWig - Convert bigWig to wig. This will keep more of the same structure of the * original wig than bigWigToBedGraph does, but still will break up large stepped sections into * smaller ones. */ /* Copyright (C) 2011 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "udc.h" #include "bigWig.h" #include "obscure.h" #include "basicBed.h" #include "bigBedCmdSupport.h" +#include char *clChrom = NULL; -int clStart = -1; -int clEnd = -1; +long long clStart = 0; +long long clEnd = 0; char *clBed = NULL; /* Bed file that specifies bounds of sequences. */ char *clPos = NULL; /* Positions file that specifies bounds of sequences. */ struct slName *clRange = NULL; struct hash *chromHash = NULL; boolean skipChromCheck = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "bigWigToWig - Convert bigWig to wig. This will keep more of the same structure of the\n" "original wig than bigWigToBedGraph does, but still will break up large stepped sections\n" "into smaller ones.\n" "usage:\n" " bigWigToWig in.bigWig out.wig\n" "options:\n" " -chrom=chr1 - if set restrict output to given chromosome\n" " -start=N - if set, restrict output to only that over start\n" " -end=N - if set, restict output to only that under end\n" - " -range=\"chrom start end\" - if set, restrict output to only that within range from start to end. \n" - " This range start is a half-open 0-based coordinate like used in BED files. \n" - " -range=chrom:start-end - if set, restrict output to only that within range from start to end. \n" - " This range start is a 1-based start position. \n" - " Do not use range with chrom, start, and/or end options. \n" - " -range may be specified multiple times for multiple ranges. \n" + " -range - restrict output to the given genomic range.\n" + " Two formats are accepted:\n" + " -range=\"chrom start end\" - 0-based half-open coordinates (BED format)\n" + " -range=chrom:start-end - 1-based start position (colon/hyphen-separated)\n" + " May be specified multiple times for multiple ranges.\n" + " Do not combine -range with -chrom, -start, or -end.\n" " -bed=input.bed Extract values for all ranges specified by input.bed. If bed4, will also print the bed name.\n" " -positions=in.pos - restrict output to all regions in a position file with 1-based start\n" " -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs\n" - " -skipChromCheck - skip checking chrom name.\n" + " -skipChromCheck - skip chrom name validation and coordinate check for chrom size.\n" ); } static struct optionSpec options[] = { {"chrom", OPTION_STRING}, - {"start", OPTION_INT}, - {"end", OPTION_INT}, + {"start", OPTION_LONG_LONG}, + {"end", OPTION_LONG_LONG}, {"udcDir", OPTION_STRING}, {"bed", OPTION_STRING}, {"range", OPTION_STRING|OPTION_MULTI}, {"positions", OPTION_STRING}, {"skipChromCheck", OPTION_BOOLEAN}, {NULL, 0}, }; static void processChromChunk(struct bbiFile *bbi, char *chrom, - int start, int end, char *bedName, FILE *f) + uint start, uint end, char *bedName, FILE *f) /* Output one chunk. Only blocks where start is in the range will be written * to avoid outputting a block multiple tines. */ { -verbose(2, "==> extract %s:%d-%d %s\n", chrom, start, end, bedName); +verbose(2, "==> extract %s:%u-%u %s\n", chrom, start, end, bedName); if (bedName) bigWigIntervalDumpWithName(bbi, chrom, start, end, 0, bedName, f); else bigWigIntervalDump(bbi, chrom, start, end, 0, f); } void bigWigToWig(char *inFile, char *outFile) /* bigWigToWig - Convert bigWig to wig. This will keep more of the same structure of the * original wig than bigWigToBedGraph does, but still will break up large stepped sections into * smaller ones. */ { struct bbiFile *bbi = bigWigFileOpen(inFile); FILE *f = mustOpen(outFile, "w"); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); if (!skipChromCheck) chromHash = makeChromHash(chromList); if (clBed != NULL) { genericBigToNonBigFromBed(bbi, chromHash, clBed, f, &processChromChunk); } else if (clPos != NULL) { genericBigToNonBigFromPos(bbi, chromHash, clPos, f, &processChromChunk); } else if (clRange != NULL) { genericBigToNonBigFromRange(bbi, chromHash, f, clRange, &processChromChunk); } else { boolean chromFound = FALSE; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { if (clChrom != NULL && !sameString(clChrom, chrom->name)) continue; chromFound = TRUE; char *chromName = chrom->name; - int start = 0, end = chrom->size; - if (clStart >= 0) + uint start = 0, end = chrom->size; + if (optionExists("start")) + { start = clStart; - if (clEnd >= 0) + if (!skipChromCheck) + { + if (start > chrom->size) + errAbort("invalid start=%u > chromSize=%u", start, chrom->size); + } + } + if (optionExists("end")) { end = clEnd; + if (!skipChromCheck) + { if (end > chrom->size) - end = chrom->size; + errAbort("invalid end=%u > chromSize=%u", end, chrom->size); + } } if (start > end) - errAbort("invalid range, start=%d > end=%d", start, end); + errAbort("invalid range, start=%u > end=%u", start, end); bigWigIntervalDump(bbi, chromName, start, end, 0, f); } if (clChrom && !chromFound && !skipChromCheck) errAbort("specified chrom %s not found in bigWig", clChrom); } bbiChromInfoFreeList(&chromList); carefulClose(&f); bbiFileClose(&bbi); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); clChrom = optionVal("chrom", clChrom); -clStart = optionInt("start", clStart); -clEnd = optionInt("end", clEnd); +clStart = optionLongLong("start", clStart); +if (clStart > UINT_MAX) + errAbort("-start option too big. Should not exceed %u - the size of unsigned integer.", UINT_MAX); +if (clStart < 0) + errAbort("-start option should not be less than 0."); + +clEnd = optionLongLong("end", clEnd); +if (clEnd > UINT_MAX) + errAbort("-end option too big. Should not exceed %u - the size of an unsigned integer.", UINT_MAX); +if (clEnd < 0) + errAbort("-end option should not be less than 0."); + clRange = optionMultiVal("range", clRange); clBed = optionVal("bed", clBed); clPos = optionVal("positions", clPos); skipChromCheck = optionExists("skipChromCheck"); udcSetDefaultDir(optionVal("udcDir", udcDefaultDir())); -if ((clBed || clPos || clRange) && (clChrom || (clStart >= 0) || (clEnd >= 0))) +if ((clBed || clPos || clRange) && (clChrom || optionExists("start") || optionExists("end"))) errAbort("-bed or -positions or -range can not be used with -chrom -start or -end options"); if ((clBed && clPos) || (clBed && clRange) || (clPos && clRange)) errAbort("-bed, -positions, and -range can not be used together"); bigWigToWig(argv[1], argv[2]); if (verboseLevel() > 1) printVmPeak(); return 0; }