8fdef9e866ecc228bfe4ea54102d0079818f0366 braney Fri Apr 8 14:49:38 2022 -0700 let bedGraphToBigWig use chromAlias.bb as chromSizes file. Add tests to both bedGraphToBigWig and bedToBigBed diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c index c6fdb84..723ab13 100644 --- src/utils/bedToBigBed/bedToBigBed.c +++ src/utils/bedToBigBed/bedToBigBed.c @@ -8,32 +8,33 @@ #include "options.h" #include "dystring.h" #include "obscure.h" #include "asParse.h" #include "basicBed.h" #include "memalloc.h" #include "sig.h" #include "rangeTree.h" #include "zlibFace.h" #include "sqlNum.h" #include "bPlusTree.h" #include "bigBed.h" #include "bbiAlias.h" #include "twoBit.h" -char *version = "2.8"; // when changing, change in bedToBigBed, bedGraphToBigWig, and wigToBigWig +char *version = "2.9"; // when changing, change in bedToBigBed, bedGraphToBigWig, and wigToBigWig /* Version history from 2.6 on at least - + * 2.9 - ability to specify chromAlias bigBed as chromSizes file * 2.8 - Various changes where developer didn't increment version id * 2.7 - Added check for duplicate field names in asParse.c * 2.6 - Made it not crash on empty input. * */ /* Things set directly or indirectly by command lne in main() routine. */ int blockSize = 256; int itemsPerSlot = 512; char *extraIndex = NULL; int bedN = 0; /* number of standard bed fields */ int bedP = 0; /* number of bed plus fields */ char *asFile = NULL; char *asText = NULL; char *udcDir = NULL; static boolean doCompress = FALSE; @@ -557,85 +558,61 @@ { if (eim->chunkArrayArray != NULL) { int i; for (i=0; i < eim->indexCount; ++i) freeMem(eim->chunkArrayArray[i]); } freeMem(eim->indexFields); freeMem(eim->maxFieldSize); freeMem(eim->chunkArrayArray); freeMem(eim->fileOffsets); freez(pEim); } } -struct chromSizeClosure // a structure that contains the data we need to get a chromosome size from a bigBed -{ - struct bbiFile *bbi; - struct bptIndex *bptIndex; - struct lm *lm; -}; - -static int ourChromSizeFunc(void *closure, char *chrom) -/* A function to return the size of a given sequence. */ -{ -struct chromSizeClosure *ourClosure = (struct chromSizeClosure *)closure; - -return bbiAliasChromSize(ourClosure->bbi, ourClosure->bptIndex, ourClosure->lm, chrom); -} - void bbFileCreate( char *inName, /* Input file in a tabular bed format <chrom><start><end> + whatever. */ char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ char *asText, /* Field definitions in a string */ struct asObject *as, /* Field definitions parsed out */ boolean doCompress, /* If TRUE then compress data. */ struct slName *extraIndexList, /* List of extra indexes to add */ char *outName) /* BigBed output file name. */ /* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */ { /* Set up timing measures. */ verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); bits16 fieldCount = slCount(as->columnList); bits16 extraIndexCount = slCount(extraIndexList); struct bbExIndexMaker *eim = NULL; if (extraIndexList != NULL) eim = bbExIndexMakerNew(extraIndexList, as); /* Do first pass, mostly just scanning file and counting hits per chromosome. */ int minDiff = 0; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = NULL; if (sizesIsBb) - { - struct chromSizeClosure *ourClosure = NULL; - - AllocVar(ourClosure); - ourClosure->bbi = bigBedFileOpen(chromSizes); - ourClosure->bptIndex = bbiAliasOpenExtra(ourClosure->bbi); - ourClosure->lm = lmInit(0); - usageList = bbiChromUsageFromBedFileAlias(lf, ourChromSizeFunc, ourClosure, eim, &minDiff, &aveSize, &bedCount, tabSep); - // should close and free the closure contents - } + usageList = bbiChromUsageFromBedFileAlias(lf, chromSizes, eim, &minDiff, &aveSize, &bedCount, tabSep); else { struct hash *chromSizesHash = NULL; if (sizesIs2Bit) chromSizesHash = twoBitChromHash(chromSizes); else chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "Read %d chromosomes and sizes from %s\n", chromSizesHash->elCount, chromSizes); usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, &minDiff, &aveSize, &bedCount, tabSep); freeHash(&chromSizesHash); } verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList)); verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize); /* Open output file and write dummy header. */