3d890482f243af7cdb51d56b5efde7c43e2ed59e
braney
  Wed Dec 15 13:13:52 2010 -0800
add routine to let validateFiles check for the signature at the beginning and end of bigWigs
diff --git src/inc/bbiFile.h src/inc/bbiFile.h
index 88cd288..803a0f6 100644
--- src/inc/bbiFile.h
+++ src/inc/bbiFile.h
@@ -1,355 +1,358 @@
 /* bbiFile - Big Binary Indexed file.  Stuff that's common between bigWig and bigBed. */
 
 #ifndef BBIFILE_H
 #define BBIFILE_H
 
 #include "cirTree.h"
 
 /* bigWig/bigBed file structure:
  *     fixedWidthHeader
  *         magic# 		4 bytes
  *         version              2 bytes
  *	   zoomLevels		2 bytes
  *         chromosomeTreeOffset	8 bytes
  *         fullDataOffset	8 bytes
  *	   fullIndexOffset	8 bytes
  *         fieldCount           2 bytes (for bigWig 0)
  *         definedFieldCount    2 bytes (for bigWig 0)
  *         autoSqlOffset        8 bytes (for bigWig 0) (0 if no autoSql information)
  *         totalSummaryOffset   8 bytes (0 in earlier versions of file lacking totalSummary)
  *         uncompressBufSize    4 bytes (Size of uncompression buffer.  0 if uncompressed.)
  *         reserved             8 bytes (0 for now)
  *     zoomHeaders		there are zoomLevels number of these
  *         reductionLevel	4 bytes
  *	   reserved		4 bytes
  *	   dataOffset		8 bytes
  *         indexOffset          8 bytes
  *     autoSql string (zero terminated - only present if autoSqlOffset non-zero)
  *     totalSummary - summary of all data in file - only present if totalSummaryOffset non-zero
  *         basesCovered        8 bytes
  *         minVal              8 bytes float (for bigBed minimum depth of coverage)
  *         maxVal              8 bytes float (for bigBed maximum depth of coverage)
  *         sumData             8 bytes float (for bigBed sum of coverage)
  *         sumSquared          8 bytes float (for bigBed sum of coverage squared)
  *     chromosome b+ tree       bPlusTree index
  *     full data
  *         sectionCount		8 bytes (item count for bigBeds)
  *         section data		section count sections, of three types (bed data for bigBeds)
  *     full index               cirTree index
  *     zoom info             one of these for each zoom level
  *         zoom data
  *             zoomCount	4 bytes
  *             zoom data	there are zoomCount of these items
  *                 chromId	4 bytes
  *	           chromStart	4 bytes
  *                 chromEnd     4 bytes
  *                 validCount	4 bytes
  *                 minVal       4 bytes float 
  *                 maxVal       4 bytes float
  *                 sumData      4 bytes float
  *                 sumSquares   4 bytes float
  *         zoom index        	cirTree index
  *     magic# 		4 bytes - same as magic number at start of header
  */
 
 #ifndef CIRTREE_H
 #include "cirTree.h"
 #endif
 
 #define bbiCurrentVersion 4
 /* Version history (of file format, not utilities - corresponds to version field in header)
  *    1 - Initial release
  *    1 - Unfortunately when attempting a transparent change to encoders, made the sectionCount 
  *        field inconsistent, sometimes not present, sometimes 32 bits.  Since offset positions
  *        in index were still accurate this did not break most applications, but it did show
  *        up in the summary section of the Table Browser.
  *    2 - Made sectionCount consistently 64 bits. Also fixed missing zoomCount in first level of
  *        zoom in files made by bedToBigBed and bedGraphToBigWig.  (The older wigToBigWig was fine.)
  *        Added totalSummary section.
  *    3 - Adding zlib compression.  Only active if uncompressBufSize is non-zero in header.
  *    4 - Fixed problem in encoder for the max field in zoom levels higher than the first one.
  *        Added an extra sig at end of file.
  */
 
 struct bbiZoomLevel
 /* A zoom level in bigWig file. */
     {
     struct bbiZoomLevel *next;		/* Next in list. */
     bits32 reductionLevel;		/* How many bases per item */
     bits32 reserved;			/* Zero for now. */
     bits64 dataOffset;			/* Offset of data for this level in file. */
     bits64 indexOffset;			/* Offset of index for this level in file. */
     };
 
 struct bbiZoomLevel *bbiBestZoom(struct bbiZoomLevel *levelList, int desiredReduction);
 /* Return zoom level that is the closest one that is less than or equal to 
  * desiredReduction. */
 
 struct bbiFile 
 /* An open bbiFile */
     {
     struct bbiFile *next;	/* Next in list. */
     char *fileName;		/* Name of file - for better error reporting. */
     struct udcFile *udc;	/* Open UDC file handle. */
     bits32 typeSig;		/* bigBedSig or bigWigSig for now. */
     boolean isSwapped;		/* If TRUE need to byte swap everything. */
     struct bptFile *chromBpt;	/* Index of chromosomes. */
     bits16 version;		/* Version number - initially 1. */
     bits16 zoomLevels;		/* Number of zoom levels. */
     bits64 chromTreeOffset;	/* Offset to chromosome index. */
     bits64 unzoomedDataOffset;	/* Start of unzoomed data. */
     bits64 unzoomedIndexOffset;	/* Start of unzoomed index. */
     bits16 fieldCount;		/* Number of columns in bed version. */
     bits16 definedFieldCount;   /* Number of columns using bed standard definitions. */
     bits64 asOffset;		/* Offset to embedded null-terminated AutoSQL file. */
     bits64 totalSummaryOffset;	/* Offset to total summary information if any.  (On older files have to calculate) */
     bits32 uncompressBufSize;	/* Size of uncompression buffer, 0 if uncompressed */
     struct cirTreeFile *unzoomedCir;	/* Unzoomed data index in memory - may be NULL. */
     struct bbiZoomLevel *levelList;	/* List of zoom levels. */
     };
 
 
 struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName);
 /* Open up big wig or big bed file. */
 
 void bbiFileClose(struct bbiFile **pBwf);
 /* Close down a big wig/big bed file. */
 
 struct fileOffsetSize *bbiOverlappingBlocks(struct bbiFile *bbi, struct cirTreeFile *ctf,
 	char *chrom, bits32 start, bits32 end, bits32 *retChromId);
 /* Fetch list of file blocks that contain items overlapping chromosome range. */
  
 struct bbiChromIdSize
 /* We store an id/size pair in chromBpt bPlusTree */
     {
     bits32 chromId;	/* Chromosome ID */
     bits32 chromSize;	/* Chromosome Size */
     };
 
 struct bbiChromInfo
 /* Pair of a name and a 32-bit integer. Used to assign IDs to chromosomes. */
     {
     struct bbiChromInfo *next;
     char *name;		/* Chromosome name */
     bits32 id;		/* Chromosome ID - a small number usually */
     bits32 size;	/* Chromosome size in bases */
     };
 
 struct bbiChromInfo *bbiChromList(struct bbiFile *bbi);
 /* Return all chromosomes in file.  Dispose of this with bbiChromInfoFreeList. */
 
 void bbiChromInfoFreeList(struct bbiChromInfo **pList);
 /* Free a list of bbiChromInfo's */
 
 bits32 bbiChromSize(struct bbiFile *bbi, char *chrom);
 /* Returns size of given chromosome. */
 
 void bbiChromInfoKey(const void *va, char *keyBuf);
 /* Get key field out of bbiChromInfo. */
 
 void *bbiChromInfoVal(const void *va);
 /* Get val field out of bbiChromInfo. */
 
 struct bbiChromUsage
 /* Information on how many items per chromosome etc.  Used by multipass bbiFile writers. */
     {
     struct bbiChromUsage *next;
     char *name;	/* chromosome name. */
     bits32 itemCount;	/* Number of items for this chromosome. */
     bits32 id;	/* Unique ID for chromosome. */
     bits32 size;	/* Size of chromosome. */
     };
 
 
 enum bbiSummaryType
 /* Way to summarize data. */
     {
     bbiSumMean = 0,	/* Average value */
     bbiSumMax = 1,	/* Maximum value */
     bbiSumMin = 2,	/* Minimum value */
     bbiSumCoverage = 3,  /* Bases in region containing actual data. */
     bbiSumStandardDeviation = 4, /* Standard deviation in window. */
     };
 
 enum bbiSummaryType bbiSummaryTypeFromString(char *string);
 /* Return summary type given a descriptive string. */
 
 char *bbiSummaryTypeToString(enum bbiSummaryType type);
 /* Convert summary type from enum to string representation. */
 
 struct bbiSummary
 /* A summary type item. */
     {
     struct bbiSummary *next;
     bits32 chromId;		/* ID of associated chromosome. */
     bits32 start,end;		/* Range of chromosome covered. */
     bits32 validCount;		/* Count of (bases) with actual data. */
     float minVal;		/* Minimum value of items */
     float maxVal;		/* Maximum value of items */
     float sumData;		/* sum of values for each base. */
     float sumSquares;		/* sum of squares for each base. */
     bits64 fileOffset;		/* Offset of summary in file. */
     };
 
 #define bbiSummaryFreeList slFreeList
 
 
 struct bbiSummaryOnDisk
 /* The part of the summary that ends up on disk - in the same order written to disk. */
     {
     bits32 chromId;		/* ID of associated chromosome. */
     bits32 start,end;		/* Range of chromosome covered. */
     bits32 validCount;		/* Count of (bases) with actual data. */
     float minVal;		/* Minimum value of items */
     float maxVal;		/* Maximum value of items */
     float sumData;		/* sum of values for each base. */
     float sumSquares;		/* sum of squares for each base. */
     };
 
 struct bbiInterval
 /* Data on a single interval. */
     {
     struct bbiInterval *next;	/* Next in list. */
     bits32 start, end;			/* Position in chromosome, half open. */
     double val;				/* Value at that position. */
     };
 
 typedef struct bbiInterval *(*BbiFetchIntervals)(struct bbiFile *bbi, char *chrom, 
 					    bits32 start, bits32 end, struct lm *lm);
 /* A callback function that returns a bbiInterval list. */
 
 void bbiAttachUnzoomedCir(struct bbiFile *bbi);
 /* Make sure unzoomed cir is attached. */
 
 struct bbiSummaryElement
 /* An element of a summary from the user side. */
     {
     bits64 validCount;		/* Count of (bases) with actual data. */
     double minVal;		/* Minimum value of items */
     double maxVal;		/* Maximum value of items */
     double sumData;		/* sum of values for each base. */
     double sumSquares;		/* sum of squares for each base. */
     };
 
 boolean bbiSummaryArrayExtended(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end,
 	BbiFetchIntervals fetchIntervals,
 	int summarySize, struct bbiSummaryElement *summary);
 /* Fill in summary with  data from indicated chromosome range in bigWig/bigBed file. 
  * Returns FALSE if no data at that position. */
 
 boolean bbiSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end,
 	BbiFetchIntervals fetchIntervals,
 	enum bbiSummaryType summaryType, int summarySize, double *summaryValues);
 /* Fill in summaryValues with  data from indicated chromosome range in bigWig file.
  * Be sure to initialize summaryValues to a default value, which will not be touched
  * for regions without data in file.  (Generally you want the default value to either
  * be 0.0 or nan("") depending on the application.)  Returns FALSE if no data
  * at that position. */
 
 struct bbiSummaryElement bbiTotalSummary(struct bbiFile *bbi);
 /* Return summary of entire file! */
 
 /****** Write side of things - implemented in bbiWrite.c ********/
 
 struct bbiBoundsArray
 /* Minimum info needed for r-tree indexer - where a section lives on disk and the
  * range it covers. */
     {
     bits64 offset;		/* Offset within file. */
     struct cirTreeRange range;	/* What is covered. */
     };
 
 struct cirTreeRange bbiBoundsArrayFetchKey(const void *va, void *context);
 /* Fetch bbiBoundsArray key for r-tree */
 
 bits64 bbiBoundsArrayFetchOffset(const void *va, void *context);
 /* Fetch bbiBoundsArray file offset for r-tree */
 
 struct bbiSumOutStream
 /* Buffer output to file so have a chance to compress. */
     {
     struct bbiSummaryOnDisk *array;
     int elCount;
     int allocCount;
     FILE *f;
     boolean doCompress;
     };
 
 struct bbiSumOutStream *bbiSumOutStreamOpen(int allocCount, FILE *f, boolean doCompress);
 /* Open new bbiSumOutStream. */
 
 void bbiSumOutStreamClose(struct bbiSumOutStream **pStream);
 /* Free up bbiSumOutStream */
 
 void bbiSumOutStreamWrite(struct bbiSumOutStream *stream, struct bbiSummary *sum);
 /* Write out next one to stream. */
 
 void bbiOutputOneSummaryFurtherReduce(struct bbiSummary *sum, 
 	struct bbiSummary **pTwiceReducedList, 
 	int doubleReductionSize, struct bbiBoundsArray **pBoundsPt, 
 	struct bbiBoundsArray *boundsEnd, bits32 chromSize, struct lm *lm, 
 	struct bbiSumOutStream *stream);
 /* Write out sum to file, keeping track of minimal info on it in *pBoundsPt, and also adding
  * it to second level summary. */
 
 struct bbiSummary *bbiSummarySimpleReduce(struct bbiSummary *list, int reduction, struct lm *lm);
 /* Do a simple reduction - where among other things the reduction level is an integral
  * multiple of the previous reduction level, and the list is sorted. Allocate result out of lm. */
 
 #define bbiMaxZoomLevels 10	/* Maximum zoom levels produced by writers. */
 
 void bbiWriteDummyHeader(FILE *f);
 /* Write out all-zero header, just to reserve space for it. */
 
 void bbiWriteDummyZooms(FILE *f);
 /* Write out zeroes to reserve space for ten zoom levels. */
 
 void bbiSummaryElementWrite(FILE *f, struct bbiSummaryElement *sum);
 /* Write out summary element to file. */
 
 void bbiWriteChromInfo(struct bbiChromUsage *usageList, int blockSize, FILE *f);
 /* Write out information on chromosomes to file. */
 
 void bbiWriteFloat(FILE *f, float val);
 /* Write out floating point val to file.  Mostly to convert from double... */
 
 struct hash *bbiChromSizesFromFile(char *fileName);
 /* Read two column file into hash keyed by chrom. */
 
 bits64 bbiTotalSummarySize(struct bbiSummary *list);
 /* Return size on disk of all summaries. */
 
 void bbiChromUsageFree(struct bbiChromUsage **pUsage);
 /* free a single bbiChromUsage structure */
 
 void bbiChromUsageFreeList(struct bbiChromUsage **pList);
 /* free a list of bbiChromUsage structures */
 
 struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, 
 	struct hash *chromSizesHash, int *retMinDiff, double *retAveSize, bits64 *retBedCount);
 /* Go through bed file and collect chromosomes and statistics. Free with bbiChromUsageFreeList */
 
 int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot);
 /* Count up number of sections needed for data. */
 
 void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, 
 	bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares,  
 	int reduction, struct bbiSummary **pOutList);
 /* Add data range to summary - putting it onto top of list if possible, otherwise
  * expanding list. */
 
 void bbiAddRangeToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, 
 	double val, int reduction, struct bbiSummary **pOutList);
 /* Add chromosome range to summary - putting it onto top of list if possible, otherwise
  * expanding list. */
 
 struct bbiSummary *bbiReduceSummaryList(struct bbiSummary *inList, 
 	struct bbiChromInfo *chromInfoArray, int reduction);
 /* Reduce summary list to another summary list. */
 
 bits64 bbiWriteSummaryAndIndex(struct bbiSummary *summaryList, 
 	int blockSize, int itemsPerSlot, boolean doCompress, FILE *f);
 /* Write out summary and index to summary, returning start position of
  * summary index. */
 
+boolean bbiFileCheckSigs(char *fileName, bits32 sig, char *typeName);
+/* check file signatures at beginning and end of file */
+
 #endif /* BBIFILE_H */