src/inc/bbiFile.h cb217f20b4a701edfa88f4db623266a9bf25202e

cb217f20b4a701edfa88f4db623266a9bf25202e
kent
  Mon Mar 4 22:58:07 2013 -0800
Some more steps towards supporting multiple extra indexes in bigBed files.
diff --git src/inc/bbiFile.h src/inc/bbiFile.h
index 22d585a..fbbc150 100644
--- src/inc/bbiFile.h
+++ src/inc/bbiFile.h
@@ -24,31 +24,31 @@
  *     zoomHeaders		there are zoomLevels number of these
  *         reductionLevel	4 bytes
  *	   reserved		4 bytes
  *	   dataOffset		8 bytes
  *         indexOffset          8 bytes
  *     autoSql string (zero terminated - only present if autoSqlOffset non-zero)
  *     totalSummary - summary of all data in file - only present if totalSummaryOffset non-zero
  *         basesCovered        8 bytes
  *         minVal              8 bytes float (for bigBed minimum depth of coverage)
  *         maxVal              8 bytes float (for bigBed maximum depth of coverage)
  *         sumData             8 bytes float (for bigBed sum of coverage)
  *         sumSquared          8 bytes float (for bigBed sum of coverage squared)
  *     extendedHeader
  *         extensionSize       2 size of extended header in bytes - currently 64
  *         extraIndexCount     2 number of extra fields we will be indexing
- *         extraIndexOffset    8 Offset to list of non-chrom/start/end indexes
+ *         extraIndexListOffset 8 Offset to list of non-chrom/start/end indexes
  *         reserved            48 All zeroes for now
  *     extraIndexList - one of these for each extraIndex 
  *         type                2 Type of index.  Always 0 for bPlusTree now
  *         fieldCount          2 Number of fields used in this index.  Always 1 for now
  *         reserved            12 All zeroes for now
  *         fieldList - one of these for each field being used in _this_ index
  *            fieldId          2 index of field within record
  *            reserved         2 All zeroes for now
  *     chromosome b+ tree       bPlusTree index
  *     full data
  *         sectionCount		8 bytes (item count for bigBeds)
  *         section data		section count sections, of three types (bed data for bigBeds)
  *     full index               cirTree index
  *     zoom info             one of these for each zoom level
  *         zoom data
@@ -105,36 +105,43 @@
     {
     struct bbiFile *next;	/* Next in list. */
     char *fileName;		/* Name of file - for better error reporting. */
     struct udcFile *udc;	/* Open UDC file handle. */
     bits32 typeSig;		/* bigBedSig or bigWigSig for now. */
     boolean isSwapped;		/* If TRUE need to byte swap everything. */
     struct bptFile *chromBpt;	/* Index of chromosomes. */
     bits16 version;		/* Version number - initially 1. */
     bits16 zoomLevels;		/* Number of zoom levels. */
     bits64 chromTreeOffset;	/* Offset to chromosome index. */
     bits64 unzoomedDataOffset;	/* Start of unzoomed data. */
     bits64 unzoomedIndexOffset;	/* Start of unzoomed index. */
     bits16 fieldCount;		/* Number of columns in bed version. */
     bits16 definedFieldCount;   /* Number of columns using bed standard definitions. */
     bits64 asOffset;		/* Offset to embedded null-terminated AutoSQL file. */
-    bits64 totalSummaryOffset;	/* Offset to total summary information if any.  (On older files have to calculate) */
+    bits64 totalSummaryOffset;	/* Offset to total summary information if any.  
+				   (On older files have to calculate) */
     bits32 uncompressBufSize;	/* Size of uncompression buffer, 0 if uncompressed */
-    bits64 nameIndexOffset;	/* Start of name index or zero if none. */
+    bits64 extensionOffset;	/* Start of header extension block or 0 if none. */
     struct cirTreeFile *unzoomedCir;	/* Unzoomed data index in memory - may be NULL. */
     struct bbiZoomLevel *levelList;	/* List of zoom levels. */
-    struct bptFile *nameBpt;	/* Index of names, may be NULL */
+
+    /* Fields based on extension block. */
+    bits16 extensionSize;   /* Size of extension block */
+    bits16 extraIndexCount; /* Number of extra indexes (on fields other than chrom,start,end */ 
+    bits64 extraIndexListOffset;    /* Offset to list of extra indexes */
+
+    struct bptFile *nameBpt;	/* Index of names, may be NULL */ // uglyf - remove
     };
 
 
 struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName);
 /* Open up big wig or big bed file. */
 
 void bbiFileClose(struct bbiFile **pBwf);
 /* Close down a big wig/big bed file. */
 
 struct fileOffsetSize *bbiOverlappingBlocks(struct bbiFile *bbi, struct cirTreeFile *ctf,
 	char *chrom, bits32 start, bits32 end, bits32 *retChromId);
 /* Fetch list of file blocks that contain items overlapping chromosome range. */
  
 struct bbiChromIdSize
 /* We store an id/size pair in chromBpt bPlusTree */
@@ -338,30 +345,42 @@
 void bbiWriteFloat(FILE *f, float val);
 /* Write out floating point val to file.  Mostly to convert from double... */
 
 struct hash *bbiChromSizesFromFile(char *fileName);
 /* Read two column file into hash keyed by chrom. */
 
 bits64 bbiTotalSummarySize(struct bbiSummary *list);
 /* Return size on disk of all summaries. */
 
 void bbiChromUsageFree(struct bbiChromUsage **pUsage);
 /* free a single bbiChromUsage structure */
 
 void bbiChromUsageFreeList(struct bbiChromUsage **pList);
 /* free a list of bbiChromUsage structures */
 
+struct bbExIndexMaker
+/* A helper structure to make indexes beyond primary one */
+    {
+    bits16 indexCount;          /* Number of extra indexes. */
+        /* Kind of wish next four fields,  all of which are arrays indexed
+         * by the same thing,  were a single array of a structure instead. */
+    bits16 *indexFields;        /* array of field ids, one for each extra index. */
+    int *maxFieldSize;          /* array of maximum sizes seen for this field. */
+    struct bbNamedFileChunk **chunkArrayArray; /* where we keep name/start/size triples */
+    bits64 *fileOffsets;        /* array of file offsets where indexes starts. */
+    int recordCount;            /* number of records in file. */
+    };
 
 struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, 
 	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount);
 /* Go through bed file and collect chromosomes and statistics.  If eim parameter is non-NULL
  * collect max field sizes there too. */
 
 int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot);
 /* Count up number of sections needed for data. */
 
 void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, 
 	bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares,  
 	int reduction, struct bbiSummary **pOutList);
 /* Add data range to summary - putting it onto top of list if possible, otherwise
  * expanding list. */