3ff9f3d461ea3fc6d42923658c9ea6bf319cbd7b kent Mon Mar 4 21:36:27 2013 -0800 Starting move from only one extra index (on the name field) being allowed to allowing multiple extra indexes. Also defining a header extension block for the file since the header is running out of space. diff --git src/inc/bbiFile.h src/inc/bbiFile.h index 7937255..22d585a 100644 --- src/inc/bbiFile.h +++ src/inc/bbiFile.h @@ -8,62 +8,74 @@ #include "localmem.h" /* bigWig/bigBed file structure: * fixedWidthHeader * magic# 4 bytes * version 2 bytes * zoomLevels 2 bytes * chromosomeTreeOffset 8 bytes * fullDataOffset 8 bytes * fullIndexOffset 8 bytes * fieldCount 2 bytes (for bigWig 0) * definedFieldCount 2 bytes (for bigWig 0) * autoSqlOffset 8 bytes (for bigWig 0) (0 if no autoSql information) * totalSummaryOffset 8 bytes (0 in earlier versions of file lacking totalSummary) * uncompressBufSize 4 bytes (Size of uncompression buffer. 0 if uncompressed.) - * nameIndexOffset 8 bytes (Offset to name index, 0 if no such index) + * extensionOffset 8 bytes (Offset to header extension 0 if no such extension) * zoomHeaders there are zoomLevels number of these * reductionLevel 4 bytes * reserved 4 bytes * dataOffset 8 bytes * indexOffset 8 bytes * autoSql string (zero terminated - only present if autoSqlOffset non-zero) * totalSummary - summary of all data in file - only present if totalSummaryOffset non-zero * basesCovered 8 bytes * minVal 8 bytes float (for bigBed minimum depth of coverage) * maxVal 8 bytes float (for bigBed maximum depth of coverage) * sumData 8 bytes float (for bigBed sum of coverage) * sumSquared 8 bytes float (for bigBed sum of coverage squared) + * extendedHeader + * extensionSize 2 size of extended header in bytes - currently 64 + * extraIndexCount 2 number of extra fields we will be indexing + * extraIndexOffset 8 Offset to list of non-chrom/start/end indexes + * reserved 48 All zeroes for now + * extraIndexList - one of these for each extraIndex + * type 2 Type of index. Always 0 for bPlusTree now + * fieldCount 2 Number of fields used in this index. Always 1 for now + * reserved 12 All zeroes for now + * fieldList - one of these for each field being used in _this_ index + * fieldId 2 index of field within record + * reserved 2 All zeroes for now * chromosome b+ tree bPlusTree index * full data * sectionCount 8 bytes (item count for bigBeds) * section data section count sections, of three types (bed data for bigBeds) * full index cirTree index * zoom info one of these for each zoom level * zoom data * zoomCount 4 bytes * zoom data there are zoomCount of these items * chromId 4 bytes * chromStart 4 bytes * chromEnd 4 bytes * validCount 4 bytes * minVal 4 bytes float * maxVal 4 bytes float * sumData 4 bytes float * sumSquares 4 bytes float * zoom index cirTree index - * name index [optional] bPlusTree index + * extraIndexes [optional] bPlusTreeIndex for each extra field that is indexed * magic# 4 bytes - same as magic number at start of header */ #ifndef CIRTREE_H #include "cirTree.h" #endif #define bbiCurrentVersion 4 /* Version history (of file format, not utilities - corresponds to version field in header) * 1 - Initial release * 1 - Unfortunately when attempting a transparent change to encoders, made the sectionCount * field inconsistent, sometimes not present, sometimes 32 bits. Since offset positions * in index were still accurate this did not break most applications, but it did show * up in the summary section of the Table Browser. * 2 - Made sectionCount consistently 64 bits. Also fixed missing zoomCount in first level of @@ -326,33 +338,35 @@ void bbiWriteFloat(FILE *f, float val); /* Write out floating point val to file. Mostly to convert from double... */ struct hash *bbiChromSizesFromFile(char *fileName); /* Read two column file into hash keyed by chrom. */ bits64 bbiTotalSummarySize(struct bbiSummary *list); /* Return size on disk of all summaries. */ void bbiChromUsageFree(struct bbiChromUsage **pUsage); /* free a single bbiChromUsage structure */ void bbiChromUsageFreeList(struct bbiChromUsage **pList); /* free a list of bbiChromUsage structures */ + struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, - int *retMinDiff, double *retAveSize, bits64 *retBedCount, int *retMaxNameSize); -/* Go through bed file and collect chromosomes and statistics. */ + struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount); +/* Go through bed file and collect chromosomes and statistics. If eim parameter is non-NULL + * collect max field sizes there too. */ int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot); /* Count up number of sections needed for data. */ void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares, int reduction, struct bbiSummary **pOutList); /* Add data range to summary - putting it onto top of list if possible, otherwise * expanding list. */ void bbiAddRangeToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, double val, int reduction, struct bbiSummary **pOutList); /* Add chromosome range to summary - putting it onto top of list if possible, otherwise * expanding list. */