6648b8b2419629b3d401c01f38086be43a2eeff8 kent Fri Mar 8 16:27:46 2013 -0800 Making bedToBigBed and bedGraphToBigWig share more code. Cure is only half as bad as disease (trading sharing 64 lines for having a function with 16 parameters.) diff --git src/inc/bbiFile.h src/inc/bbiFile.h index e492410..c8874ca 100644 --- src/inc/bbiFile.h +++ src/inc/bbiFile.h @@ -268,31 +268,31 @@ /* Fill in summary with data from indicated chromosome range in bigWig/bigBed file. * Returns FALSE if no data at that position. */ boolean bbiSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, BbiFetchIntervals fetchIntervals, enum bbiSummaryType summaryType, int summarySize, double *summaryValues); /* Fill in summaryValues with data from indicated chromosome range in bigWig file. * Be sure to initialize summaryValues to a default value, which will not be touched * for regions without data in file. (Generally you want the default value to either * be 0.0 or nan("") depending on the application.) Returns FALSE if no data * at that position. */ struct bbiSummaryElement bbiTotalSummary(struct bbiFile *bbi); /* Return summary of entire file! */ -/****** Write side of things - implemented in bbiWrite.c ********/ +/****** Write side of things - implemented in bbiWrite.c. Few people need this. ********/ struct bbiBoundsArray /* Minimum info needed for r-tree indexer - where a section lives on disk and the * range it covers. */ { bits64 offset; /* Offset within file. */ struct cirTreeRange range; /* What is covered. */ }; struct cirTreeRange bbiBoundsArrayFetchKey(const void *va, void *context); /* Fetch bbiBoundsArray key for r-tree */ bits64 bbiBoundsArrayFetchOffset(const void *va, void *context); /* Fetch bbiBoundsArray file offset for r-tree */ @@ -315,32 +315,30 @@ void bbiSumOutStreamWrite(struct bbiSumOutStream *stream, struct bbiSummary *sum); /* Write out next one to stream. */ void bbiOutputOneSummaryFurtherReduce(struct bbiSummary *sum, struct bbiSummary **pTwiceReducedList, int doubleReductionSize, struct bbiBoundsArray **pBoundsPt, struct bbiBoundsArray *boundsEnd, bits32 chromSize, struct lm *lm, struct bbiSumOutStream *stream); /* Write out sum to file, keeping track of minimal info on it in *pBoundsPt, and also adding * it to second level summary. */ struct bbiSummary *bbiSummarySimpleReduce(struct bbiSummary *list, int reduction, struct lm *lm); /* Do a simple reduction - where among other things the reduction level is an integral * multiple of the previous reduction level, and the list is sorted. Allocate result out of lm. */ -#define bbiMaxZoomLevels 10 /* Maximum zoom levels produced by writers. */ - void bbiWriteDummyHeader(FILE *f); /* Write out all-zero header, just to reserve space for it. */ void bbiWriteDummyZooms(FILE *f); /* Write out zeroes to reserve space for ten zoom levels. */ void bbiSummaryElementWrite(FILE *f, struct bbiSummaryElement *sum); /* Write out summary element to file. */ void bbiWriteChromInfo(struct bbiChromUsage *usageList, int blockSize, FILE *f); /* Write out information on chromosomes to file. */ void bbiWriteFloat(FILE *f, float val); /* Write out floating point val to file. Mostly to convert from double... */ @@ -379,30 +377,59 @@ }; struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount); /* Go through bed file and collect chromosomes and statistics. If eim parameter is non-NULL * collect max field sizes there too. */ #define bbiMaxZoomLevels 10 /* Max number of zoom levels */ #define bbiResIncrement 4 /* Amount to reduce at each zoom level */ int bbiCalcResScalesAndSizes(int aveSize, int resScales[bbiMaxZoomLevels], int resSizes[bbiMaxZoomLevels]); /* Fill in resScales with amount to zoom at each level, and zero out resSizes based * on average span. Returns the number of zoom levels we actually will use. */ +typedef struct bbiSummary *bbiWriteReducedOnceReturnReducedTwice( + struct bbiChromUsage *usageList, int fieldCount, + struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, + int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress, + struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart, + struct bbiSummaryElement *totalSum); +/* Typedef for a function that writes out data reduced by factor of initial reduction, and + * also returns an array of bbiSummaries for the next reduction level. */ + +int bbiWriteZoomLevels( + struct lineFile *lf, /* Input file. */ + FILE *f, /* Output. */ + int blockSize, /* Size of index block */ + int itemsPerSlot, /* Number of data points bundled at lowest level. */ + bbiWriteReducedOnceReturnReducedTwice writeReducedOnceReturnReducedTwice, /* callback */ + int fieldCount, /* Number of fields in bed (4 for bedGraph) */ + boolean doCompress, /* Do we compress. Answer really should be yes! */ + bits64 dataSize, /* Size of data on disk (after compression if any). */ + struct bbiChromUsage *usageList, /* Result from bbiChromUsageFromBedFile */ + int resTryCount, int resScales[], int resSizes[], /* How much to zoom at each level */ + bits32 zoomAmounts[bbiMaxZoomLevels], /* Fills in amount zoomed at each level. */ + bits64 zoomDataOffsets[bbiMaxZoomLevels], /* Fills in where data starts for each zoom level. */ + bits64 zoomIndexOffsets[bbiMaxZoomLevels], /* Fills in where index starts for each level. */ + struct bbiSummaryElement *totalSum); +/* Write out all the zoom levels and return the number of levels written. Writes + * actual zoom amount and the offsets of the zoomed data and index in the last three + * parameters. Sorry for all the parameters - it was this or duplicate a big chunk of + * code between bedToBigBed and bedGraphToBigWig. */ + int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot); /* Count up number of sections needed for data. */ void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares, int reduction, struct bbiSummary **pOutList); /* Add data range to summary - putting it onto top of list if possible, otherwise * expanding list. */ void bbiAddRangeToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, double val, int reduction, struct bbiSummary **pOutList); /* Add chromosome range to summary - putting it onto top of list if possible, otherwise * expanding list. */ struct bbiSummary *bbiReduceSummaryList(struct bbiSummary *inList,