3ff9f3d461ea3fc6d42923658c9ea6bf319cbd7b
kent
  Mon Mar 4 21:36:27 2013 -0800
Starting move from only one extra index (on the name field) being allowed to allowing multiple extra indexes.  Also defining a header extension block for the file since the header is running out of space.
diff --git src/lib/bbiWrite.c src/lib/bbiWrite.c
index 811d116..aa9730e 100644
--- src/lib/bbiWrite.c
+++ src/lib/bbiWrite.c
@@ -120,62 +120,84 @@
 }
 
 void bbiChromUsageFreeList(struct bbiChromUsage **pList)
 /* free a list of bbiChromUsage structures */
 {
 struct bbiChromUsage *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     bbiChromUsageFree(&el);
     }
 *pList = NULL;
 }
 
+int bbExIndexMakerMaxIndexField(struct bbExIndexMaker *eim)
+/* Return the maximum field we have to index. */
+{
+int maxIx = 0;
+int i;
+for (i=0; i<eim->indexCount; ++i)
+    {
+    int ix = eim->indexFields[i];
+    if (ix > maxIx)
+        maxIx = ix;
+    }
+return maxIx;
+}
+
+void bbExIndexMakerUpdateMaxFieldSize(struct bbExIndexMaker *eim, char **row)
+/* Fold in information about row into bbExIndexMaker into eim->maxFieldSize */
+{
+int i;
+for (i=0; i<eim->indexCount; ++i)
+    {
+    int rowIx = eim->indexFields[i];
+    int size = strlen(row[rowIx]);
+    if (size > eim->maxFieldSize[i])
+        eim->maxFieldSize[i] = size;
+    }
+}
+
 struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, 
-	int *retMinDiff, double *retAveSize, bits64 *retBedCount, int *retMaxNameSize)
-/* Go through bed file and collect chromosomes and statistics. */
+	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount)
+/* Go through bed file and collect chromosomes and statistics.  If eim parameter is non-NULL
+ * collect max field sizes there too. */
 {
-char *row[4];
-int maxRowSize = (retMaxNameSize != NULL ? 4 : 3);
+int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1);
+char *row[maxRowSize];
 struct hash *uniqHash = hashNew(0);
 struct bbiChromUsage *usage = NULL, *usageList = NULL;
 int lastStart = -1;
 bits32 id = 0;
 bits64 totalBases = 0, bedCount = 0;
 int minDiff = BIGNUM;
-int maxNameSize = 0;
 
 lineFileRemoveInitialCustomTrackLines(lf);
 
 for (;;)
     {
     int rowSize = lineFileChopNext(lf, row, maxRowSize);
     if (rowSize == 0)
         break;
     lineFileExpectAtLeast(lf, maxRowSize, rowSize);
     char *chrom = row[0];
     int start = lineFileNeedNum(lf, row, 1);
     int end = lineFileNeedNum(lf, row, 2);
-    if (rowSize > 3)
-        {
-	char *name = row[3];
-	int nameSize = strlen(name);
-	if (nameSize > maxNameSize)
-	    maxNameSize = nameSize;
-	}
+    if (eim != NULL)
+	bbExIndexMakerUpdateMaxFieldSize(eim, row);
     if (start > end)
         {
 	    errAbort("end (%d) before start (%d) line %d of %s",
 	    	end, start, lf->lineIx, lf->fileName);
 	}
     ++bedCount;
     totalBases += (end - start);
     if (usage == NULL || differentString(usage->name, chrom))
         {
 	if (hashLookup(uniqHash, chrom))
 	    {
 	    errAbort("%s is not sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
 	    	lf->fileName, lf->lineIx);
 	    }
 	hashAdd(uniqHash, chrom, NULL);
@@ -198,32 +220,30 @@
 	int diff = start - lastStart;
 	if (diff < minDiff)
 	    {
 	    if (diff < 0)
 		errAbort("%s is not sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
 		    lf->fileName, lf->lineIx);
 	    minDiff = diff;
 	    }
 	}
     lastStart = start;
     }
 slReverse(&usageList);
 *retMinDiff = minDiff;
 *retAveSize = (double)totalBases/bedCount;
 *retBedCount = bedCount;
-if (retMaxNameSize != NULL)
-    *retMaxNameSize = maxNameSize;
 freeHash(&uniqHash);
 return usageList;
 }
 
 int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot)
 /* Count up number of sections needed for data. */
 {
 struct bbiChromUsage *usage;
 int count = 0;
 for (usage = usageList; usage != NULL; usage = usage->next)
     {
     int countOne = (usage->itemCount + itemsPerSlot - 1)/itemsPerSlot;
     count += countOne;
     verbose(2, "%s %d, %d blocks of %d\n", usage->name, usage->itemCount, countOne, itemsPerSlot);
     }