49fdfa8146be2005af76f6613c84c884d8e6e2bd kent Thu May 5 10:23:38 2011 -0700 Making wigToBigWig detect overlaps in input. diff --git src/lib/bwgCreate.c src/lib/bwgCreate.c index a2adf28..dce68a3 100644 --- src/lib/bwgCreate.c +++ src/lib/bwgCreate.c @@ -263,31 +263,31 @@ slAddHead(pSectionList, section); } lmCleanup(&lmLocal); } static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList) /* Read the single column data in section until get to end. */ { struct lm *lmLocal = lmInit(0); /* Stream through section until get to end of file or next section, * adding values from single column to list. */ char *words[2]; char *line; -struct bwgVariableStepItem *item, *itemList = NULL; +struct bwgVariableStepItem *item, *nextItem, *itemList = NULL; int originalSectionSize = 0; while (lineFileNextReal(lf, &line)) { if (steppedSectionEnd(line, 2)) { lineFileReuse(lf); break; } chopLine(line, words); lmAllocVar(lmLocal, item); int start = lineFileNeedNum(lf, words, 0); if (start <= 0) { errAbort("line %d of %s: zero or negative chromosome coordinate not allowed", lf->lineIx, lf->fileName); @@ -297,30 +297,44 @@ if (item->start + span > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", lf->lineIx, lf->fileName, chrom, chromSize, item->start + span); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&itemList, item); ++originalSectionSize; } } slSort(&itemList, bwgVariableStepItemCmp); +/* Make sure no overlap between items. */ +if (itemList != NULL) + { + item = itemList; + for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) + { + if (item->start + span > nextItem->start) + errAbort("Overlap on %s between items starting at %d and %d.\n" + "Please remove overlaps and try again", + chrom, item->start, nextItem->start); + item = nextItem; + } + } + /* Break up into sections of no more than items-per-slot size. */ int sizeLeft = originalSectionSize; for (item = itemList; item != NULL; ) { /* Figure out size of this section */ int sectionSize = sizeLeft; if (sectionSize > itemsPerSlot) sectionSize = itemsPerSlot; sizeLeft -= sectionSize; /* Convert from list to array representation. */ struct bwgVariableStepPacked *packed, *p; p = lmAllocArray(lm, packed, sectionSize); int i; for (i=0; i<sectionSize; ++i) @@ -503,34 +517,46 @@ item->start, item->end, lf->lineIx, lf->fileName); if (item->end > chrom->size) { warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u", lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&chrom->itemList, item); } } slSort(&chromList, bedGraphChromCmpName); +/* Loop through each chromosome and output the item list, broken into sections + * for that chrom. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { slSort(&chrom->itemList, bwgBedGraphItemCmp); + /* Check to make sure no overlap between items. */ + struct bwgBedGraphItem *item = chrom->itemList, *nextItem; + for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) + { + if (item->end > nextItem->start) + errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again", + chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end); + item = nextItem; + } + /* Break up into sections of no more than items-per-slot size. */ struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) { /* Find end item of this section, and start item for next section. * Terminate list at end item. */ int sectionSize = 0; int i; endItem = startItem; for (i=0; i<itemsPerSlot; ++i) { if (nextStartItem == NULL) break; endItem = nextStartItem; nextStartItem = nextStartItem->next; @@ -1028,31 +1054,31 @@ /* Parse out a bed graph line just to check numerical format. */ char *chrom = words[0]; int start = lineFileNeedNum(lf, words, 1); int end = lineFileNeedNum(lf, words, 2); double val = lineFileNeedDouble(lf, words, 3); verbose(2, "bedGraph %s:%d-%d@%g\n", chrom, start, end, val); /* Push back line and call bed parser. */ lineFileReuse(lf); parseBedGraphSection(lf, clipDontDie, chromSizeHash, lm, maxSectionSize, §ionList); } } slSort(§ionList, bwgSectionCmp); -/* Check for overlap. */ +/* Check for overlap at section level. */ struct bwgSection *section, *nextSection; for (section = sectionList; section != NULL; section = nextSection) { nextSection = section->next; if (nextSection != NULL) { if (sameString(section->chrom, nextSection->chrom)) { if (section->end > nextSection->start) { errAbort("There's more than one value for %s base %d (in coordinates that start with 1).\n", section->chrom, nextSection->start+1); } } }