d0054a39067a773d5342af78a80d964835d47a60 markd Thu Oct 16 13:28:21 2025 -0700 add bedToBigBed -fixScores to correct scores that are out-of-range or invalid diff --git src/lib/basicBed.c src/lib/basicBed.c index e56a20e56e4..cefe8e06e3c 100644 --- src/lib/basicBed.c +++ src/lib/basicBed.c @@ -1443,67 +1443,77 @@ static unsigned currentSize = 0; static char *currentMem = NULL; if (size > currentSize) { if (currentMem) freeMem(currentMem); currentSize = size; currentMem = needLargeMem(size); } return currentMem; } -void loadAndValidateBedExt(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt, boolean allow1bpOverlap) +void loadAndValidateBedOpts(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, unsigned opts) /* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. * If a customTrack, then some errors are tolerated. Possibly allow exons to overlap by one base. */ { +boolean isCt = (opts & BED_IS_CUSTOM_TRACK) != 0; int count; int *blockSizes = NULL; int *chromStarts; bed->chrom = row[0]; // note this value is not cloned for speed, callers may need to clone it. // This check is usually redundant since the caller should be checking it against actual chromInfo names // however hgLoadBed might not always have that info available. if (strlen(bed->chrom) >= BB_MAX_CHROM_STRING) // must leave room for 0 terminator lineFileAbort(lf, "chrom [%s] is too long (must not exceed %d characters)", bed->chrom, BB_MAX_CHROM_STRING - 1); if (strlen(bed->chrom) < 1) lineFileAbort(lf, "chrom cannot be blank or empty"); lineFileAllInts(lf, row, 1, &bed->chromStart, FALSE, 4, "integer", FALSE); lineFileAllInts(lf, row, 2, &bed->chromEnd, FALSE, 4, "integer", FALSE); if (bed->chromEnd < bed->chromStart) lineFileAbort(lf, "chromStart after chromEnd (%u > %u)", bed->chromStart, bed->chromEnd); if (bedFieldCount > 3) { bed->name = row[3]; if (strlen(bed->name) > 255) lineFileAbort(lf, "name [%s] is too long (must not exceed 255 characters)", bed->name); if (isCt) bed->name = cloneString(bed->name); } if (bedFieldCount > 4) { + if ((opts & BED_FIX_SCORE) && !isAllDigits(row[4])) + bed->score = 0; + else lineFileAllInts(lf, row, 4, &bed->score, TRUE, 4, "integer", FALSE); - if (!isCt && (bed->score < 0 || bed->score > 1000)) + if (bed->score < 0 || bed->score > 1000) + { + if (opts & BED_FIX_SCORE) + bed->score = min(max(bed->score, 0), 1000); + else if (!isCt) lineFileAbort(lf, "score (%d) must be between 0 and 1000", bed->score); + // else allow out of range score for custom tracks + } } if (bedFieldCount > 5) { if (!isCt && strlen(row[5]) > 1) lineFileAbort(lf, "Expecting + or - or . in strand, found [%s]",row[5]); bed->strand[0] = row[5][0]; bed->strand[1] = 0; if (bed->strand[0] != '+' && bed->strand[0] != '-' && bed->strand[0] != '.') lineFileAbort(lf, "Expecting + or - or . in strand, found [%s]",row[5]); } if (bedFieldCount > 6) lineFileAllInts(lf, row, 6, &bed->thickStart, FALSE, 4, "integer", FALSE); else bed->thickStart = bed->chromStart; @@ -1639,31 +1649,31 @@ /* printf("%d:%d %s %s s:%d c:%u cs:%u ce:%u csI:%d bsI:%d ls:%d le:%d<BR>\n", lineIx, i, bed->chrom, bed->name, bed->score, bed->blockCount, bed->chromStart, bed->chromEnd, bed->chromStarts[i], bed->blockSizes[i], lastStart, lastEnd); */ // extra check to give user help for a common problem if (chromStarts[i]+bed->chromStart >= bed->chromEnd) { if (chromStarts[i] >= bed->chromStart) lineFileAbort(lf, "BED chromStarts offsets must be relative to chromStart, " "not absolute. Try subtracting chromStart from each offset " "in chromStarts."); else lineFileAbort(lf, "BED chromStarts[i]+chromStart must be less than chromEnd."); } // chrom blocks must ascend without overlap int fudge = 0; - if (allow1bpOverlap) + if (opts & BED_ALLOW_1BP_OVERLAP) fudge = -1; if (!(chromStarts[i] >= chromStarts[i-1] + blockSizes[i-1] + fudge)) lineFileAbort(lf, "BED blocks must be in ascending order without overlap. Blocks %d and %d overlap.", i-1, i); } // last block-end must match chromEnd i = bed->blockCount-1; if ((bed->chromStart + chromStarts[i] + blockSizes[i]) != bed->chromEnd) lineFileAbort(lf, BAD_BLOCKS); } if (bedFieldCount > 12) // get the microarray/colored-exon fields { if (isCt) @@ -1774,38 +1784,52 @@ , asCol->linkedSizeName, asCol->name); if (!(listSize >= 1)) lineFileAbort(lf, "invalid list size %d for list %s must be 1 or greater, empty lists are not allowed", listSize, asCol->name); if (!(listSize == count)) lineFileAbort(lf, "expecting %d elements in %s list, found %d", listSize, asCol->name, count); } } } asCol = asCol->next; } hashFree(&linkHash); } } +void loadAndValidateBedExt(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt, boolean allow1bpOverlap) +/* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. + * If a customTrack, then some errors are tolerated. Possibly allow exons to overlap by one base. */ +{ +unsigned opts = 0; +if (isCt) + opts |= BED_IS_CUSTOM_TRACK; +if (allow1bpOverlap) + opts |= BED_ALLOW_1BP_OVERLAP; +loadAndValidateBedOpts(row, bedFieldCount, fieldCount, lf, bed, as, opts); +} + + void loadAndValidateBed(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt) /* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. * If a customTrack, then some errors are tolerated. */ { loadAndValidateBedExt(row, bedFieldCount, fieldCount, lf, bed, as, isCt, FALSE); } + struct bed3 *bed3LoadAll(char *fileName) /* Load three columns from file as bed3. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; struct bed3 *list = NULL, *el; while (lineFileRow(lf, row)) { AllocVar(el); el->chrom = cloneString(row[0]); el->chromStart = sqlUnsigned(row[1]); el->chromEnd = sqlUnsigned(row[2]); slAddHead(&list, el); } lineFileClose(&lf);