49fdfa8146be2005af76f6613c84c884d8e6e2bd
kent
  Thu May 5 10:23:38 2011 -0700
Making wigToBigWig detect overlaps in input.
diff --git src/lib/bwgCreate.c src/lib/bwgCreate.c
index a2adf28..dce68a3 100644
--- src/lib/bwgCreate.c
+++ src/lib/bwgCreate.c
@@ -263,31 +263,31 @@
     slAddHead(pSectionList, section);
     }
 lmCleanup(&lmLocal);
 }
 
 static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
 	int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList)
 /* Read the single column data in section until get to end. */
 {
 struct lm *lmLocal = lmInit(0);
 
 /* Stream through section until get to end of file or next section,
  * adding values from single column to list. */
 char *words[2];
 char *line;
-struct bwgVariableStepItem *item, *itemList = NULL;
+struct bwgVariableStepItem *item, *nextItem, *itemList = NULL;
 int originalSectionSize = 0;
 while (lineFileNextReal(lf, &line))
     {
     if (steppedSectionEnd(line, 2))
 	{
         lineFileReuse(lf);
 	break;
 	}
     chopLine(line, words);
     lmAllocVar(lmLocal, item);
     int start = lineFileNeedNum(lf, words, 0);
     if (start <= 0)
 	{
 	errAbort("line %d of %s: zero or negative chromosome coordinate not allowed",
 	    lf->lineIx, lf->fileName);
@@ -297,30 +297,44 @@
     if (item->start + span > chromSize)
         {
 	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
 	    lf->lineIx, lf->fileName, chrom, chromSize, item->start + span);
 	if (!clipDontDie)
 	    noWarnAbort();
 	}
     else
         {
 	slAddHead(&itemList, item);
 	++originalSectionSize;
 	}
     }
 slSort(&itemList, bwgVariableStepItemCmp);
 
+/* Make sure no overlap between items. */
+if (itemList != NULL)
+    {
+    item = itemList;
+    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
+        {
+	if (item->start + span > nextItem->start)
+	    errAbort("Overlap on %s between items starting at %d and %d.\n"
+	             "Please remove overlaps and try again",
+		    chrom, item->start, nextItem->start);
+	item = nextItem;
+	}
+    }
+
 /* Break up into sections of no more than items-per-slot size. */
 int sizeLeft = originalSectionSize;
 for (item = itemList; item != NULL; )
     {
     /* Figure out size of this section  */
     int sectionSize = sizeLeft;
     if (sectionSize > itemsPerSlot)
         sectionSize = itemsPerSlot;
     sizeLeft -= sectionSize;
 
     /* Convert from list to array representation. */
     struct bwgVariableStepPacked *packed, *p;		
     p = lmAllocArray(lm, packed, sectionSize);
     int i;
     for (i=0; i<sectionSize; ++i)
@@ -503,34 +517,46 @@
 		item->start, item->end, lf->lineIx, lf->fileName);
     if (item->end > chrom->size)
 	{
         warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u",
 	        lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end);
 	if (!clipDontDie)
 	    noWarnAbort();
 	}
     else
 	{
 	slAddHead(&chrom->itemList, item);
 	}
     }
 slSort(&chromList, bedGraphChromCmpName);
 
+/* Loop through each chromosome and output the item list, broken into sections
+ * for that chrom. */
 for (chrom = chromList; chrom != NULL; chrom = chrom->next)
     {
     slSort(&chrom->itemList, bwgBedGraphItemCmp);
 
+    /* Check to make sure no overlap between items. */
+    struct bwgBedGraphItem *item = chrom->itemList, *nextItem;
+    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
+        {
+	if (item->end > nextItem->start)
+	    errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again",
+	        chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end);
+	item = nextItem;
+	}
+
     /* Break up into sections of no more than items-per-slot size. */
     struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
     for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
 	{
 	/* Find end item of this section, and start item for next section.
 	 * Terminate list at end item. */
 	int sectionSize = 0;
 	int i;
 	endItem = startItem;
 	for (i=0; i<itemsPerSlot; ++i)
 	    {
 	    if (nextStartItem == NULL)
 		break;
 	    endItem = nextStartItem;
 	    nextStartItem = nextStartItem->next;
@@ -1028,31 +1054,31 @@
 
 	/* Parse out a bed graph line just to check numerical format. */
 	char *chrom = words[0];
 	int start = lineFileNeedNum(lf, words, 1);
 	int end = lineFileNeedNum(lf, words, 2);
 	double val = lineFileNeedDouble(lf, words, 3);
 	verbose(2, "bedGraph %s:%d-%d@%g\n", chrom, start, end, val);
 
 	/* Push back line and call bed parser. */
 	lineFileReuse(lf);
 	parseBedGraphSection(lf, clipDontDie, chromSizeHash, lm, maxSectionSize, &sectionList);
 	}
     }
 slSort(&sectionList, bwgSectionCmp);
 
-/* Check for overlap. */
+/* Check for overlap at section level. */
 struct bwgSection *section, *nextSection;
 for (section = sectionList; section != NULL; section = nextSection)
     {
     nextSection = section->next;
     if (nextSection != NULL)
         {
 	if (sameString(section->chrom, nextSection->chrom))
 	    {
 	    if (section->end > nextSection->start)
 	        {
 		errAbort("There's more than one value for %s base %d (in coordinates that start with 1).\n",
 		    section->chrom, nextSection->start+1);
 		}
 	    }
 	}