src/hg/hgTables/bigWig.c 1.1

1.1 2009/03/12 16:45:14 kent
Making filter and non-base-pair intersections work with bigWig. (Base-by-base intersections work 80%, but going to have to change design to get last 20% working.) Fixed bug I introduced in correlation of regular wig that Kayla spotted. Moving bigWig stuff to it's own module.
Index: src/hg/hgTables/bigWig.c
===================================================================
RCS file: src/hg/hgTables/bigWig.c
diff -N src/hg/hgTables/bigWig.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/hgTables/bigWig.c	12 Mar 2009 16:45:14 -0000	1.1
@@ -0,0 +1,266 @@
+/* bigWig - stuff to handle bigWig in the Table Browser. */
+
+#include "common.h"
+#include "hash.h"
+#include "linefile.h"
+#include "dystring.h"
+#include "localmem.h"
+#include "jksql.h"
+#include "cheapcgi.h"
+#include "cart.h"
+#include "web.h"
+#include "bed.h"
+#include "hdb.h"
+#include "trackDb.h"
+#include "customTrack.h"
+#include "wiggle.h"
+#include "hmmstats.h"
+#include "correlate.h"
+#include "bbiFile.h"
+#include "bigWig.h"
+#include "hgTables.h"
+
+static char const rcsid[] = "$Id$";
+
+boolean isBigWig(char *table)
+/* Return TRUE if table corresponds to a bigWig file. */
+{
+return trackIsType(table, "bigWig");
+}
+
+char *bigWigFileName(char *table, struct sqlConnection *conn)
+/* Return file name associated with bigWig.  This handles differences whether it's
+ * a custom or built-in track.  Do a freeMem on returned string when done. */
+{
+char *fileName = NULL;
+if (isCustomTrack(table))
+    {
+    struct customTrack *ct = lookupCt(table);
+    if (ct != NULL)
+        fileName = cloneString(trackDbSetting(ct->tdb, "dataUrl"));
+    }
+else
+    {
+    char query[256];
+    safef(query, sizeof(query), "select fileName from %s", table);
+    fileName = sqlQuickString(conn, query);
+    }
+return fileName;
+}
+
+struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn, 
+	struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl,
+	double filterUl, struct lm *lm)
+/* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file
+ * and if necessary apply filter and intersection.  Return list which is allocated in lm. */
+{
+char *chrom = region->chrom;
+int chromSize = hChromSize(database, chrom);
+struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm);
+
+/* Run filter if necessary */
+if (filterCmp != wigNoOp_e)
+    {
+    struct bbiInterval *next, *newList = NULL;
+    for (iv = ivList; iv != NULL; iv = next)
+        {
+	next = iv->next;
+	if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl))
+	    {
+	    slAddHead(&newList, iv);
+	    }
+	}
+    slReverse(&newList);
+    ivList = newList;
+    }
+
+/* Run intersection if necessary */
+if (anyIntersection())
+    {
+    boolean isBpWise = intersectionIsBpWise();
+    Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise);
+    struct bbiInterval *next, *newList = NULL;
+    double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01;
+    double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01;
+    char *op = cartString(cart, hgtaIntersectOp);
+    for (iv = ivList; iv != NULL; iv = next)
+	{
+	next = iv->next;
+	int start = iv->start;
+	int size = iv->end - start;
+	int overlap = bitCountRange(bits2, start, size);
+	if (isBpWise)
+	    {
+	    if (overlap == size)
+	        {
+		slAddHead(&newList, iv);
+		}
+	    else if (overlap > 0)
+	        {
+		/* Here we have to break things up. */
+		double val = iv->val;
+		struct bbiInterval *partIv = iv;	// Reuse memory for first interval
+		int s = start, end = start+size;
+		for (;;)
+		    {
+		    int bitsClear = bitFindClear(bits2, s, end-s);
+		    s += bitsClear;
+		    if (s >= end)
+		        break;
+		    int bitsSet = bitFindSet(bits2, s, end-s);	// Always > 0
+		    if (partIv == NULL)
+			lmAllocVar(lm, partIv);
+		    partIv->start = s;
+		    partIv->end = s + bitsSet;
+		    partIv->val = val;
+		    slAddHead(&newList, partIv);
+		    partIv = NULL;
+		    s += bitsSet;
+		    if (s >= end)
+		        break;
+		    }
+		}
+	    }
+	else
+	    {
+	    double coverage = (double)overlap/size;
+	    if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage))
+		{
+		slAddHead(&newList, iv);
+		}
+	    }
+	}
+    slReverse(&newList);
+    ivList = newList;
+    bitFree(&bits2);
+    }
+
+return ivList;
+}
+
+void getWigFilter(char *database, char *table, enum wigCompare *retCmp, double *retLl, double *retUl)
+/* Get wiggle filter variables from cart and convert them into numbers. */
+{
+char *dataConstraint;
+enum wigCompare cmp = wigNoOp_e;
+checkWigDataFilter(database, curTable, &dataConstraint, retLl, retUl);
+if (dataConstraint != NULL)
+    cmp = wigCompareFromString(dataConstraint);
+*retCmp = cmp;
+}
+
+int bigWigOutRegion(char *table, struct sqlConnection *conn,
+			     struct region *region, int maxOut,
+			     enum wigOutputType wigOutType)
+/* Write out bigWig for region, doing intersecting and filtering as need be. */
+{
+int resultCount = 0;
+char *wigFileName = bigWigFileName(table, conn);
+if (wigFileName)
+    {
+    struct bbiFile *bwf = bigWigFileOpen(wigFileName);
+    if (bwf)
+	{
+	if (!anyFilter() && !anyIntersection())
+	    resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end, 
+		    maxOut, stdout);
+	else
+	    {
+	    double ll, ul;
+	    enum wigCompare cmp;
+	    getWigFilter(database, curTable, &cmp, &ll, &ul);
+	    struct lm *lm = lmInit(0);
+	    struct bbiInterval *ivList, *iv;
+	    ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
+	    for (iv=ivList; iv != NULL && resultCount < maxOut; iv = iv->next, ++resultCount)
+	        {
+		fprintf(stdout, "%s\t%d\t%d\t%g\n", region->chrom, iv->start, iv->end, iv->val);
+		}
+	    lmCleanup(&lm);
+	    }
+	}
+    bbiFileClose(&bwf);
+    }
+freeMem(wigFileName);
+return resultCount;
+}
+
+void doSummaryStatsBigWig(struct sqlConnection *conn)
+/* Put up page showing summary stats for bigWig track. */
+{
+struct trackDb *track = curTrack;
+char *table = curTable;
+char *shortLabel = (track == NULL ? table : track->shortLabel);
+char *fileName = bigWigFileName(table, conn);
+long startTime = clock1000();
+
+htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table);
+
+struct bbiFile *bwf = bigWigFileOpen(fileName);
+struct region *region, *regionList = getRegions();
+double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
+bits64 validCount = 0;
+
+if (!anyFilter() && !anyIntersection())
+    {
+    for (region = regionList; region != NULL; region = region->next)
+	{
+	struct bbiSummaryElement sum;
+	if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, 
+		bigWigIntervalQuery, 1, &sum))
+	    {
+	    if (validCount == 0)
+		{
+		minVal = sum.minVal;
+		maxVal = sum.maxVal;
+		}
+	    sumData += sum.sumData;
+	    sumSquares += sum.sumSquares;
+	    validCount += sum.validCount;
+	    }
+	}
+    }
+else
+    {
+    double ll, ul;
+    enum wigCompare cmp;
+    getWigFilter(database, curTable, &cmp, &ll, &ul);
+    for (region = regionList; region != NULL; region = region->next)
+        {
+	struct lm *lm = lmInit(0);
+	struct bbiInterval *iv, *ivList;
+	ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
+	for (iv = ivList; iv != NULL; iv = iv->next)
+	    {
+	    double val = iv->val;
+	    double size = iv->end - iv->start;
+	    if (validCount == 0)
+		minVal = maxVal = val;
+	    sumData += size*val;
+	    sumSquares += size*val*val;
+	    validCount += size;
+	    }
+	lmCleanup(&lm);
+	}
+    }
+
+hTableStart();
+floatStatRow("mean", sumData/validCount);
+floatStatRow("min", minVal);
+floatStatRow("max", maxVal);
+floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
+numberStatRow("bases with data", validCount);
+long long regionSize = basesInRegion(regionList,0);
+long long gapTotal = gapsInRegion(conn, regionList,0);
+numberStatRow("bases with sequence", regionSize - gapTotal);
+numberStatRow("bases in region", regionSize);
+wigFilterStatRow(conn);
+stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
+long wigFetchTime = clock1000() - startTime;
+floatStatRow("load and calc time", 0.001*wigFetchTime);
+hTableEnd();
+
+bbiFileClose(&bwf);
+htmlClose();
+}
+