dd6685988d7600e805dcf71b943d806dd10289cf
braney
  Sat Sep 30 17:05:23 2023 -0700
relax the constraint on sorting for bedToBigBed and bedGraphToBigWig.

diff --git src/lib/bbiWrite.c src/lib/bbiWrite.c
index 374f920..23e5e8e 100644
--- src/lib/bbiWrite.c
+++ src/lib/bbiWrite.c
@@ -45,58 +45,71 @@
 {
 const struct bbiChromInfo *a = (const struct bbiChromInfo *)va;
 const struct bbiChromInfo *b = (const struct bbiChromInfo *)vb;
 return strcmp(a->name, b->name);
 }
 
 
 void bbiWriteChromInfo(struct bbiChromUsage *usageList, int blockSize, FILE *f)
 /* Write out information on chromosomes to file. */
 {
 int chromCount = slCount(usageList);
 struct bbiChromUsage *usage;
 
 /* Allocate and fill in array from list. */
 struct bbiChromInfo *chromInfoArray = NULL;
+struct bbiChromUsage **usageArray = NULL;
 int maxChromNameSize = 0;
 if (chromCount > 0)
     {
     AllocArray(chromInfoArray, chromCount);
+    AllocArray(usageArray, chromCount);
     int i;
     for (i=0, usage = usageList; i<chromCount; ++i, usage = usage->next)
 	{
 	char *chromName = usage->name;
 	int len = strlen(chromName);
 	if (len > maxChromNameSize)
 	    maxChromNameSize = len;
 	chromInfoArray[i].name = chromName;
 	chromInfoArray[i].id = usage->id;
 	chromInfoArray[i].size = usage->size;
+        usageArray[i] = usage;
 	}
 
     /* Sort so the b-Tree actually works. */
     qsort(chromInfoArray, chromCount, sizeof(chromInfoArray[0]), bbiChromInfoCmp);
+    /* Now we remap the chromId's so they reflect the order in the bTree */
+    for (i=0, usage = usageList; i<chromCount; ++i, usage = usage->next)
+        {
+        if ( usageArray[chromInfoArray[i].id]->id != i)
+            {
+            usageArray[chromInfoArray[i].id]->id = i;
+            chromInfoArray[i].id = i;
+            }
+        }
     }
 
 /* Write chromosome bPlusTree */
 int chromBlockSize = min(blockSize, chromCount);
 bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize,
     bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, 
     sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), 
     f);
 
 freeMem(chromInfoArray);
+freeMem(usageArray);
 }
 
 void bbiWriteFloat(FILE *f, float val)
 /* Write out floating point val to file.  Mostly to convert from double... */
 {
 writeOne(f, val);
 }
 
 struct hash *bbiChromSizesFromFile(char *fileName)
 /* Read two column file into hash keyed by chrom. */
 {
 struct hash *hash = hashNew(0);
 struct lineFile *lf = netLineFileOpen(fileName);
 char *row[2];
 while (lineFileRow(lf, row))
@@ -172,64 +185,64 @@
 }
 
 struct bbiChromUsage *bbiChromUsageFromBedFileInternal(struct lineFile *lf, 
         bbiChromSizeFunc chromSizeFunc,  void *chromSizeClosure,
 	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep)
 /* Go through bed file and collect chromosomes and statistics.  If eim parameter is non-NULL
  * collect max field sizes there too. */
 {
 int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1);
 char *row[maxRowSize];
 struct bbiChromUsage *usage = NULL, *usageList = NULL;
 int lastStart = -1;
 bits32 id = 0;
 bits64 totalBases = 0, bedCount = 0;
 int minDiff = BIGNUM;
+struct hash *usedHash = newHash(0);
 
 lineFileRemoveInitialCustomTrackLines(lf);
 
 for (;;)
     {
     int rowSize = 0;
 
     if (tabSep)
         rowSize = lineFileChopCharNext(lf, '\t', row, maxRowSize);
     else
         rowSize = lineFileChopNext(lf, row, maxRowSize);
     if (rowSize == 0)
         break;
     lineFileExpectAtLeast(lf, maxRowSize, rowSize);
     char *chrom = row[0];
     int start = lineFileNeedNum(lf, row, 1);
     int end = lineFileNeedNum(lf, row, 2);
     if (eim != NULL)
 	bbExIndexMakerUpdateMaxFieldSize(eim, row);
     if (start > end)
         {
 	    errAbort("end (%d) before start (%d) line %d of %s",
 	    	end, start, lf->lineIx, lf->fileName);
 	}
     ++bedCount;
     totalBases += (end - start);
     if (usage == NULL || differentString(usage->name, chrom))
         {
-	/* make sure chrom names are sorted in ASCII order */
-	if ((usage != NULL) && strcmp(usage->name, chrom) > 0)
+        if (hashLookup(usedHash, chrom))
             {
-	    errAbort("%s is not case-sensitive sorted at line %d.  Please use \"LC_ALL=C sort -k1,1 -k2,2n\" or bedSort and try again.",
-	    	lf->fileName, lf->lineIx);
+	    errAbort("Error: All data for each sequence needs to be sorted together in file %s.  Found sequence named %s not in single block on line %d.  Please use \"LC_ALL=C sort -k1,1 -k2,2n\" or bedSort and try again.", lf->fileName, chrom, lf->lineIx);
             }
+        hashStore(usedHash, chrom);
 	int chromSize = (*chromSizeFunc)(chromSizeClosure, chrom, lf->lineIx);
         if (chromSize == 0)
             errAbort("%s is not found in chromosome sizes file", chrom);
        
 	AllocVar(usage);
 	usage->name = cloneString(chrom);
 	usage->id = id++;
 	usage->size = chromSize;
 	slAddHead(&usageList, usage);
 	lastStart = -1;
 	}
     if (end > usage->size)
         errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName);
     usage->itemCount += 1;
     if (lastStart >= 0)