src/lib/bwgCreate.c 1.25

1.25 2010/05/29 22:28:44 kent
Moving some stuff from bwgCreate to bwgInternal.h where it can be shared a bit more. Tweaking things so first zoom level is a better size, which speeds up 100k views.
Index: src/lib/bwgCreate.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bwgCreate.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -b -B -U 4 -r1.24 -r1.25
--- src/lib/bwgCreate.c	25 May 2010 19:24:16 -0000	1.24
+++ src/lib/bwgCreate.c	29 May 2010 22:28:44 -0000	1.25
@@ -16,71 +16,8 @@
 #include "bigWig.h"
 
 static char const rcsid[] = "$Id$";
 
-struct bwgBedGraphItem
-/* An bedGraph-type item in a bwgSection. */
-    {
-    struct bwgBedGraphItem *next;	/* Next in list. */
-    bits32 start,end;		/* Range of chromosome covered. */
-    float val;			/* Value. */
-    };
-
-struct bwgVariableStepItem
-/* An variableStep type item in a bwgSection. */
-    {
-    struct bwgVariableStepItem *next;	/* Next in list. */
-    bits32 start;		/* Start position in chromosome. */
-    float val;			/* Value. */
-    };
-
-struct bwgVariableStepPacked
-/* An variableStep type item in a bwgSection. */
-    {
-    bits32 start;		/* Start position in chromosome. */
-    float val;			/* Value. */
-    };
-
-struct bwgFixedStepItem
-/* An fixedStep type item in a bwgSection. */
-    {
-    struct bwgFixedStepItem *next;	/* Next in list. */
-    float val;			/* Value. */
-    };
-
-struct bwgFixedStepPacked
-/* An fixedStep type item in a bwgSection. */
-    {
-    float val;			/* Value. */
-    };
-
-union bwgItem
-/* Union of item pointers for all possible section types. */
-    {
-    struct bwgBedGraphItem *bedGraphList;		/* A linked list */
-    struct bwgFixedStepPacked *fixedStepPacked;		/* An array */
-    struct bwgVariableStepPacked *variableStepPacked;	/* An array */
-    /* No packed format for bedGraph... */
-    };
-
-struct bwgSection
-/* A section of a bigWig file - all on same chrom.  This is a somewhat fat data
- * structure used by the bigWig creation code.  See also bwgSection for the
- * structure returned by the bigWig reading code. */
-    {
-    struct bwgSection *next;		/* Next in list. */
-    char *chrom;			/* Chromosome name. */
-    bits32 start,end;			/* Range of chromosome covered. */
-    enum bwgSectionType type;
-    union bwgItem items;		/* List/array of items in this section. */
-    bits32 itemStep;			/* Step within item if applicable. */
-    bits32 itemSpan;			/* Item span if applicable. */
-    bits16 itemCount;			/* Number of items in section. */
-    bits32 chromId;			/* Unique small integer value for chromosome. */
-    bits64 fileOffset;			/* Offset of section in file. */
-    };
-
-
 static int bwgBedGraphItemCmp(const void *va, const void *vb)
 /* Compare to sort based on query start. */
 {
 const struct bwgBedGraphItem *a = *((struct bwgBedGraphItem **)va);
@@ -199,9 +136,9 @@
 return bufSize;
 }
 
 
-static int bwgSectionCmp(const void *va, const void *vb)
+int bwgSectionCmp(const void *va, const void *vb)
 /* Compare to sort based on chrom,start,end.  */
 {
 const struct bwgSection *a = *((struct bwgSection **)va);
 const struct bwgSection *b = *((struct bwgSection **)vb);
@@ -467,9 +404,9 @@
 /* Check that we have all that are required and no more, and call type-specific routine to parse
  * rest of section. */
 if (chrom == NULL)
     errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName);
-bits32 chromSize = hashIntVal(chromSizeHash, chrom);
+bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM);
 if (start >= chromSize)
     {
     warn("line %d of %s: chromosome %s has %u bases, but item starts at %u",
     	lf->lineIx, lf->fileName, chrom, chromSize, start);
@@ -549,9 +486,9 @@
     if (chrom == NULL)
         {
 	lmAllocVar(chromHash->lm, chrom);
 	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
-	chrom->size = hashIntVal(chromSizeHash, chromName);
+	chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
 	slAddHead(&chromList, chrom);
 	}
 
     /* Convert to item and add to chromosome list. */
@@ -864,14 +801,14 @@
 struct bbiChromInfo *chromInfoArray;
 int chromCount, maxChromNameSize;
 bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
 
-/* Figure out initial summary level - starting with a summary 20 times the amount
+/* Figure out initial summary level - starting with a summary 10 times the amount
  * of the smallest item.  See if summarized data is smaller than half input data, if
  * not bump up reduction by a factor of 2 until it is, or until further summarying
  * yeilds no size reduction. */
 int  minRes = bwgAverageResolution(sectionList);
-int initialReduction = minRes*20;
+int initialReduction = minRes*10;
 bits64 fullSize = bwgTotalSectionSize(sectionList);
 bits64 maxReducedSize = fullSize/2;
 struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
 bits64 lastSummarySize = 0, summarySize;
@@ -880,10 +817,9 @@
     summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
     bits64 summarySize = bbiTotalSummarySize(summaryList);
     if (doCompress)
 	{
-        summarySize *= 4;	// Compensate for summary not compressing as well as primary data
-	initialReduction *= 4;
+        summarySize *= 2;	// Compensate for summary not compressing as well as primary data
 	}
     if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
         {
 	/* Need to do more reduction.  First scale reduction by amount that it missed
@@ -1060,10 +996,14 @@
 freez(&chromInfoArray);
 carefulClose(&f);
 }
 
-struct bwgSection *bwgParseWig(char *fileName, boolean clipDontDie, struct hash *chromSizeHash,
-	int maxSectionSize, struct lm *lm)
+struct bwgSection *bwgParseWig(
+	char *fileName,       /* Name of ascii wig file. */
+	boolean clipDontDie,  /* Skip items outside chromosome rather than aborting. */
+	struct hash *chromSizeHash,  /* If non-NULL items checked to be inside chromosome. */
+	int maxSectionSize,   /* Biggest size of a section.  100 - 100,000 is usual range. */
+	struct lm *lm)	      /* Memory pool to allocate from. */
 /* Parse out ascii wig file - allocating memory in lm. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line;