7df99795b147931dfea8220ed5ab305d11fde6b4 braney Thu Mar 10 15:41:46 2022 -0800 move some chromAlias stuff around so bedToBigBed can use a chromAlias bigBed as a chromAlias file. diff --git src/lib/bbiWrite.c src/lib/bbiWrite.c index 5469ce7..37bfdd9 100644 --- src/lib/bbiWrite.c +++ src/lib/bbiWrite.c @@ -1,29 +1,30 @@ /* bbiWrite.c - Routines to help write bigWig and bigBed files. See also bbiFile.h */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "limits.h" #include "common.h" #include "hash.h" #include "linefile.h" #include "sqlNum.h" #include "zlibFace.h" #include "cirTree.h" #include "bPlusTree.h" #include "bbiFile.h" +//#include "bbiAlias.h" #include "net.h" #include "obscure.h" void bbiWriteDummyHeader(FILE *f) /* Write out all-zero header, just to reserve space for it. */ { repeatCharOut(f, 0, 64); } void bbiWriteDummyZooms(FILE *f) /* Write out zeroes to reserve space for ten zoom levels. */ { repeatCharOut(f, 0, bbiMaxZoomLevels * 24); } @@ -157,31 +158,32 @@ } void bbExIndexMakerUpdateMaxFieldSize(struct bbExIndexMaker *eim, char **row) /* Fold in information about row into bbExIndexMaker into eim->maxFieldSize */ { int i; for (i=0; i<eim->indexCount; ++i) { int rowIx = eim->indexFields[i]; int size = strlen(row[rowIx]); if (size > eim->maxFieldSize[i]) eim->maxFieldSize[i] = size; } } -struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, +struct bbiChromUsage *bbiChromUsageFromBedFileAlias(struct lineFile *lf, + bbiChromSizeFunc chromSizeFunc, void *chromSizeClosure, struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep) /* Go through bed file and collect chromosomes and statistics. If eim parameter is non-NULL * collect max field sizes there too. */ { int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1); char *row[maxRowSize]; struct bbiChromUsage *usage = NULL, *usageList = NULL; int lastStart = -1; bits32 id = 0; bits64 totalBases = 0, bedCount = 0; int minDiff = BIGNUM; lineFileRemoveInitialCustomTrackLines(lf); for (;;) @@ -203,34 +205,34 @@ if (start > end) { errAbort("end (%d) before start (%d) line %d of %s", end, start, lf->lineIx, lf->fileName); } ++bedCount; totalBases += (end - start); if (usage == NULL || differentString(usage->name, chrom)) { /* make sure chrom names are sorted in ASCII order */ if ((usage != NULL) && strcmp(usage->name, chrom) > 0) { errAbort("%s is not case-sensitive sorted at line %d. Please use \"sort -k1,1 -k2,2n\" with LC_COLLATE=C, or bedSort and try again.", lf->fileName, lf->lineIx); } - struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom); - if (chromHashEl == NULL) + int chromSize = (*chromSizeFunc)(chromSizeClosure, chrom); + if (chromSize == 0) errAbort("%s is not found in chromosome sizes file", chrom); - int chromSize = ptToInt(chromHashEl->val); + AllocVar(usage); usage->name = cloneString(chrom); usage->id = id++; usage->size = chromSize; slAddHead(&usageList, usage); lastStart = -1; } if (end > usage->size) errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName); usage->itemCount += 1; if (lastStart >= 0) { int diff = start - lastStart; if (diff < minDiff) { @@ -240,30 +242,48 @@ minDiff = diff; } } lastStart = start; } slReverse(&usageList); double aveSize = 0; if (bedCount > 0) aveSize = (double)totalBases/bedCount; *retMinDiff = minDiff; *retAveSize = aveSize; *retBedCount = bedCount; return usageList; } +static int chromHashSizeFunc(void *closure, char *chrom) +/* Function to find the size of sequence using a hash passed in as a closure. */ +{ +struct hash *chromSizesHash = (struct hash *)closure; +struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom); +if (chromHashEl == NULL) + errAbort("%s is not found in chromosome sizes file", chrom); +return ptToInt(chromHashEl->val); +} + +struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, + struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep) +/* A wrapper for bbiChromUsageFromBedFile that uses a hash to find chromosome sizes. */ +{ +return bbiChromUsageFromBedFileAlias(lf, chromHashSizeFunc, chromSizesHash, + eim, retMinDiff, retAveSize, retBedCount, tabSep); +} + int bbiCalcResScalesAndSizes(int aveSize, int resScales[bbiMaxZoomLevels], int resSizes[bbiMaxZoomLevels]) /* Fill in resScales with amount to zoom at each level, and zero out resSizes based * on average span. Returns the number of zoom levels we actually will use. */ { int resTryCount = bbiMaxZoomLevels, resTry; int resIncrement = bbiResIncrement; int minZoom = 10; int res = aveSize; if (res < minZoom) res = minZoom; for (resTry = 0; resTry < resTryCount; ++resTry) { resSizes[resTry] = 0; resScales[resTry] = res;