433ed3d13a7fd8e09901031c223c54d5abc77178 braney Fri Apr 24 13:11:54 2020 -0700 add a flag to bedToBigBed to allow exons to have 1bp overlapping diff --git src/lib/basicBed.c src/lib/basicBed.c index 305779f..1fd9843 100644 --- src/lib/basicBed.c +++ src/lib/basicBed.c @@ -1392,33 +1392,33 @@ boolean result = FALSE; struct asObject *asStandard = NULL; if (numColumnsToCheck > 15) errAbort("There are only 15 standard BED columns defined and you have asked for %d.", numColumnsToCheck); if (numColumnsToCheck < 3) errAbort("All BED files must have at least 3 columns. (Is it possible that you provided a chrom.sizes file instead of a BED file?)"); char *asStandardText = bedAsDef(15,15); asStandard = asParseText(asStandardText); result = asCompareObjs("Yours", asYours, "BED Standard", asStandard, numColumnsToCheck, NULL, abortOnDifference); freeMem(asStandardText); asObjectFreeList(&asStandard); return result; } -void loadAndValidateBed(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt) +void loadAndValidateBedExt(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt, boolean allow1bpOverlap) /* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. - * If a customTrack, then some errors are tolerated. */ + * If a customTrack, then some errors are tolerated. Possibly allow exons to overlap by one base. */ { int count; int *blockSizes = NULL; int *chromStarts; bed->chrom = row[0]; // note this value is not cloned for speed, callers may need to clone it. // This check is usually redundant since the caller should be checking it against actual chromInfo names // however hgLoadBed might not always have that info available. if (strlen(bed->chrom) >= BB_MAX_CHROM_STRING) // must leave room for 0 terminator lineFileAbort(lf, "chrom [%s] is too long (must not exceed %d characters)", bed->chrom, BB_MAX_CHROM_STRING - 1); if (strlen(bed->chrom) < 1) lineFileAbort(lf, "chrom cannot be blank or empty"); lineFileAllInts(lf, row, 1, &bed->chromStart, FALSE, 4, "integer", FALSE); @@ -1563,31 +1563,34 @@ /* printf("%d:%d %s %s s:%d c:%u cs:%u ce:%u csI:%d bsI:%d ls:%d le:%d<BR>\n", lineIx, i, bed->chrom, bed->name, bed->score, bed->blockCount, bed->chromStart, bed->chromEnd, bed->chromStarts[i], bed->blockSizes[i], lastStart, lastEnd); */ // extra check to give user help for a common problem if (chromStarts[i]+bed->chromStart >= bed->chromEnd) { if (chromStarts[i] >= bed->chromStart) lineFileAbort(lf, "BED chromStarts offsets must be relative to chromStart, " "not absolute. Try subtracting chromStart from each offset " "in chromStarts."); else lineFileAbort(lf, "BED chromStarts[i]+chromStart must be less than chromEnd."); } // chrom blocks must ascend without overlap - if (!(chromStarts[i] >= chromStarts[i-1] + blockSizes[i-1])) + int fudge = 0; + if (allow1bpOverlap) + fudge = -1; + if (!(chromStarts[i] >= chromStarts[i-1] + blockSizes[i-1] + fudge)) lineFileAbort(lf, "BED blocks must be in ascending order without overlap. Blocks %d and %d overlap.", i-1, i); } // last block-end must match chromEnd i = bed->blockCount-1; if ((bed->chromStart + chromStarts[i] + blockSizes[i]) != bed->chromEnd) { lineFileAbort(lf, "BED blocks must span chromStart to chromEnd. (chromStart + " "chromStarts[last] + blockSizes[last]) must equal chromEnd."); } } if (bedFieldCount > 12) // get the microarray/colored-exon fields { @@ -1702,30 +1705,38 @@ , asCol->linkedSizeName, asCol->name); if (!(listSize >= 1)) lineFileAbort(lf, "invalid list size %d for list %s must be 1 or greater, empty lists are not allowed", listSize, asCol->name); if (!(listSize == count)) lineFileAbort(lf, "expecting %d elements in %s list, found %d", listSize, asCol->name, count); } } } asCol = asCol->next; } hashFree(&linkHash); } } +void loadAndValidateBed(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt) +/* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. + * If a customTrack, then some errors are tolerated. */ +{ +loadAndValidateBedExt(row, bedFieldCount, fieldCount, lf, bed, as, isCt, FALSE); +} + + struct bed3 *bed3LoadAll(char *fileName) /* Load three columns from file as bed3. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; struct bed3 *list = NULL, *el; while (lineFileRow(lf, row)) { AllocVar(el); el->chrom = cloneString(row[0]); el->chromStart = sqlUnsigned(row[1]); el->chromEnd = sqlUnsigned(row[2]); slAddHead(&list, el); } lineFileClose(&lf);