src/lib/bwgCreate.c 1.25
1.25 2010/05/29 22:28:44 kent
Moving some stuff from bwgCreate to bwgInternal.h where it can be shared a bit more. Tweaking things so first zoom level is a better size, which speeds up 100k views.
Index: src/lib/bwgCreate.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bwgCreate.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -b -B -U 4 -r1.24 -r1.25
--- src/lib/bwgCreate.c 25 May 2010 19:24:16 -0000 1.24
+++ src/lib/bwgCreate.c 29 May 2010 22:28:44 -0000 1.25
@@ -16,71 +16,8 @@
#include "bigWig.h"
static char const rcsid[] = "$Id$";
-struct bwgBedGraphItem
-/* An bedGraph-type item in a bwgSection. */
- {
- struct bwgBedGraphItem *next; /* Next in list. */
- bits32 start,end; /* Range of chromosome covered. */
- float val; /* Value. */
- };
-
-struct bwgVariableStepItem
-/* An variableStep type item in a bwgSection. */
- {
- struct bwgVariableStepItem *next; /* Next in list. */
- bits32 start; /* Start position in chromosome. */
- float val; /* Value. */
- };
-
-struct bwgVariableStepPacked
-/* An variableStep type item in a bwgSection. */
- {
- bits32 start; /* Start position in chromosome. */
- float val; /* Value. */
- };
-
-struct bwgFixedStepItem
-/* An fixedStep type item in a bwgSection. */
- {
- struct bwgFixedStepItem *next; /* Next in list. */
- float val; /* Value. */
- };
-
-struct bwgFixedStepPacked
-/* An fixedStep type item in a bwgSection. */
- {
- float val; /* Value. */
- };
-
-union bwgItem
-/* Union of item pointers for all possible section types. */
- {
- struct bwgBedGraphItem *bedGraphList; /* A linked list */
- struct bwgFixedStepPacked *fixedStepPacked; /* An array */
- struct bwgVariableStepPacked *variableStepPacked; /* An array */
- /* No packed format for bedGraph... */
- };
-
-struct bwgSection
-/* A section of a bigWig file - all on same chrom. This is a somewhat fat data
- * structure used by the bigWig creation code. See also bwgSection for the
- * structure returned by the bigWig reading code. */
- {
- struct bwgSection *next; /* Next in list. */
- char *chrom; /* Chromosome name. */
- bits32 start,end; /* Range of chromosome covered. */
- enum bwgSectionType type;
- union bwgItem items; /* List/array of items in this section. */
- bits32 itemStep; /* Step within item if applicable. */
- bits32 itemSpan; /* Item span if applicable. */
- bits16 itemCount; /* Number of items in section. */
- bits32 chromId; /* Unique small integer value for chromosome. */
- bits64 fileOffset; /* Offset of section in file. */
- };
-
-
static int bwgBedGraphItemCmp(const void *va, const void *vb)
/* Compare to sort based on query start. */
{
const struct bwgBedGraphItem *a = *((struct bwgBedGraphItem **)va);
@@ -199,9 +136,9 @@
return bufSize;
}
-static int bwgSectionCmp(const void *va, const void *vb)
+int bwgSectionCmp(const void *va, const void *vb)
/* Compare to sort based on chrom,start,end. */
{
const struct bwgSection *a = *((struct bwgSection **)va);
const struct bwgSection *b = *((struct bwgSection **)vb);
@@ -467,9 +404,9 @@
/* Check that we have all that are required and no more, and call type-specific routine to parse
* rest of section. */
if (chrom == NULL)
errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName);
-bits32 chromSize = hashIntVal(chromSizeHash, chrom);
+bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM);
if (start >= chromSize)
{
warn("line %d of %s: chromosome %s has %u bases, but item starts at %u",
lf->lineIx, lf->fileName, chrom, chromSize, start);
@@ -549,9 +486,9 @@
if (chrom == NULL)
{
lmAllocVar(chromHash->lm, chrom);
hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
- chrom->size = hashIntVal(chromSizeHash, chromName);
+ chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
slAddHead(&chromList, chrom);
}
/* Convert to item and add to chromosome list. */
@@ -864,14 +801,14 @@
struct bbiChromInfo *chromInfoArray;
int chromCount, maxChromNameSize;
bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
-/* Figure out initial summary level - starting with a summary 20 times the amount
+/* Figure out initial summary level - starting with a summary 10 times the amount
* of the smallest item. See if summarized data is smaller than half input data, if
* not bump up reduction by a factor of 2 until it is, or until further summarying
* yeilds no size reduction. */
int minRes = bwgAverageResolution(sectionList);
-int initialReduction = minRes*20;
+int initialReduction = minRes*10;
bits64 fullSize = bwgTotalSectionSize(sectionList);
bits64 maxReducedSize = fullSize/2;
struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
bits64 lastSummarySize = 0, summarySize;
@@ -880,10 +817,9 @@
summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
bits64 summarySize = bbiTotalSummarySize(summaryList);
if (doCompress)
{
- summarySize *= 4; // Compensate for summary not compressing as well as primary data
- initialReduction *= 4;
+ summarySize *= 2; // Compensate for summary not compressing as well as primary data
}
if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
{
/* Need to do more reduction. First scale reduction by amount that it missed
@@ -1060,10 +996,14 @@
freez(&chromInfoArray);
carefulClose(&f);
}
-struct bwgSection *bwgParseWig(char *fileName, boolean clipDontDie, struct hash *chromSizeHash,
- int maxSectionSize, struct lm *lm)
+struct bwgSection *bwgParseWig(
+ char *fileName, /* Name of ascii wig file. */
+ boolean clipDontDie, /* Skip items outside chromosome rather than aborting. */
+ struct hash *chromSizeHash, /* If non-NULL items checked to be inside chromosome. */
+ int maxSectionSize, /* Biggest size of a section. 100 - 100,000 is usual range. */
+ struct lm *lm) /* Memory pool to allocate from. */
/* Parse out ascii wig file - allocating memory in lm. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;