8fdef9e866ecc228bfe4ea54102d0079818f0366 braney Fri Apr 8 14:49:38 2022 -0700 let bedGraphToBigWig use chromAlias.bb as chromSizes file. Add tests to both bedGraphToBigWig and bedToBigBed diff --git src/lib/bbiWrite.c src/lib/bbiWrite.c index 37bfdd9..fc31ab1 100644 --- src/lib/bbiWrite.c +++ src/lib/bbiWrite.c @@ -1,32 +1,33 @@ /* bbiWrite.c - Routines to help write bigWig and bigBed files. See also bbiFile.h */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "limits.h" #include "common.h" #include "hash.h" #include "linefile.h" #include "sqlNum.h" #include "zlibFace.h" #include "cirTree.h" #include "bPlusTree.h" #include "bbiFile.h" -//#include "bbiAlias.h" +#include "bbiAlias.h" #include "net.h" #include "obscure.h" +#include "bigBed.h" void bbiWriteDummyHeader(FILE *f) /* Write out all-zero header, just to reserve space for it. */ { repeatCharOut(f, 0, 64); } void bbiWriteDummyZooms(FILE *f) /* Write out zeroes to reserve space for ten zoom levels. */ { repeatCharOut(f, 0, bbiMaxZoomLevels * 24); } void bbiSummaryElementWrite(FILE *f, struct bbiSummaryElement *sum) /* Write out summary element to file. */ @@ -158,31 +159,31 @@ } void bbExIndexMakerUpdateMaxFieldSize(struct bbExIndexMaker *eim, char **row) /* Fold in information about row into bbExIndexMaker into eim->maxFieldSize */ { int i; for (i=0; i<eim->indexCount; ++i) { int rowIx = eim->indexFields[i]; int size = strlen(row[rowIx]); if (size > eim->maxFieldSize[i]) eim->maxFieldSize[i] = size; } } -struct bbiChromUsage *bbiChromUsageFromBedFileAlias(struct lineFile *lf, +struct bbiChromUsage *bbiChromUsageFromBedFileInternal(struct lineFile *lf, bbiChromSizeFunc chromSizeFunc, void *chromSizeClosure, struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep) /* Go through bed file and collect chromosomes and statistics. If eim parameter is non-NULL * collect max field sizes there too. */ { int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1); char *row[maxRowSize]; struct bbiChromUsage *usage = NULL, *usageList = NULL; int lastStart = -1; bits32 id = 0; bits64 totalBases = 0, bedCount = 0; int minDiff = BIGNUM; lineFileRemoveInitialCustomTrackLines(lf); @@ -205,31 +206,31 @@ if (start > end) { errAbort("end (%d) before start (%d) line %d of %s", end, start, lf->lineIx, lf->fileName); } ++bedCount; totalBases += (end - start); if (usage == NULL || differentString(usage->name, chrom)) { /* make sure chrom names are sorted in ASCII order */ if ((usage != NULL) && strcmp(usage->name, chrom) > 0) { errAbort("%s is not case-sensitive sorted at line %d. Please use \"sort -k1,1 -k2,2n\" with LC_COLLATE=C, or bedSort and try again.", lf->fileName, lf->lineIx); } - int chromSize = (*chromSizeFunc)(chromSizeClosure, chrom); + int chromSize = (*chromSizeFunc)(chromSizeClosure, chrom, lf->lineIx); if (chromSize == 0) errAbort("%s is not found in chromosome sizes file", chrom); AllocVar(usage); usage->name = cloneString(chrom); usage->id = id++; usage->size = chromSize; slAddHead(&usageList, usage); lastStart = -1; } if (end > usage->size) errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName); usage->itemCount += 1; if (lastStart >= 0) { @@ -242,45 +243,87 @@ minDiff = diff; } } lastStart = start; } slReverse(&usageList); double aveSize = 0; if (bedCount > 0) aveSize = (double)totalBases/bedCount; *retMinDiff = minDiff; *retAveSize = aveSize; *retBedCount = bedCount; return usageList; } -static int chromHashSizeFunc(void *closure, char *chrom) +struct chromSizeClosure // a structure that contains the data we need to get a chromosome size from a bigBed +{ + struct bbiFile *bbi; + struct bptIndex *bptIndex; + struct lm *lm; + struct hash *usedAlias; +}; + +static int bbChromSizeFunc(void *closure, char *chrom, int lineIx) +/* A function to return the size of a given sequence. */ +{ +struct chromSizeClosure *bbChromSizeClosure = (struct chromSizeClosure *)closure; + +return bbiAliasChromSizeExt(bbChromSizeClosure->bbi, bbChromSizeClosure->bptIndex, bbChromSizeClosure->lm, chrom, bbChromSizeClosure->usedAlias, lineIx); +} + +struct bbiChromUsage *bbiChromUsageFromBedFileAlias(struct lineFile *lf, char *chromAliasBb, + struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep) +/* A wrapper for bbiChromUsageFromBedFile that uses a bigBed to find chromosome sizes. */ +{ +struct chromSizeClosure *bbChromSizeClosure = NULL; + +AllocVar(bbChromSizeClosure); +bbChromSizeClosure->bbi = bigBedFileOpen(chromAliasBb); +bbChromSizeClosure->bptIndex = bbiAliasOpenExtra(bbChromSizeClosure->bbi); +bbChromSizeClosure->lm = lmInit(0); +bbChromSizeClosure->usedAlias = hashNew(0); + +struct bbiChromUsage *usageList = bbiChromUsageFromBedFileInternal(lf, bbChromSizeFunc, bbChromSizeClosure, eim, retMinDiff, retAveSize, retBedCount, tabSep); + +bbiFileClose(&bbChromSizeClosure->bbi); +struct bptIndex *next, *bptIndex = bbChromSizeClosure->bptIndex; +for(; bptIndex; bptIndex = next) + { + next = bptIndex->next; + freez(&bptIndex); + } +lmCleanup(&bbChromSizeClosure->lm); + +return usageList; +} + +static int chromHashSizeFunc(void *closure, char *chrom, int lineIx) /* Function to find the size of sequence using a hash passed in as a closure. */ { struct hash *chromSizesHash = (struct hash *)closure; struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom); if (chromHashEl == NULL) errAbort("%s is not found in chromosome sizes file", chrom); return ptToInt(chromHashEl->val); } struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep) /* A wrapper for bbiChromUsageFromBedFile that uses a hash to find chromosome sizes. */ { -return bbiChromUsageFromBedFileAlias(lf, chromHashSizeFunc, chromSizesHash, +return bbiChromUsageFromBedFileInternal(lf, chromHashSizeFunc, chromSizesHash, eim, retMinDiff, retAveSize, retBedCount, tabSep); } int bbiCalcResScalesAndSizes(int aveSize, int resScales[bbiMaxZoomLevels], int resSizes[bbiMaxZoomLevels]) /* Fill in resScales with amount to zoom at each level, and zero out resSizes based * on average span. Returns the number of zoom levels we actually will use. */ { int resTryCount = bbiMaxZoomLevels, resTry; int resIncrement = bbiResIncrement; int minZoom = 10; int res = aveSize; if (res < minZoom) res = minZoom; for (resTry = 0; resTry < resTryCount; ++resTry)