src/hg/hgTables/bigWig.c 1.1
1.1 2009/03/12 16:45:14 kent
Making filter and non-base-pair intersections work with bigWig. (Base-by-base intersections work 80%, but going to have to change design to get last 20% working.) Fixed bug I introduced in correlation of regular wig that Kayla spotted. Moving bigWig stuff to it's own module.
Index: src/hg/hgTables/bigWig.c
===================================================================
RCS file: src/hg/hgTables/bigWig.c
diff -N src/hg/hgTables/bigWig.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/hgTables/bigWig.c 12 Mar 2009 16:45:14 -0000 1.1
@@ -0,0 +1,266 @@
+/* bigWig - stuff to handle bigWig in the Table Browser. */
+
+#include "common.h"
+#include "hash.h"
+#include "linefile.h"
+#include "dystring.h"
+#include "localmem.h"
+#include "jksql.h"
+#include "cheapcgi.h"
+#include "cart.h"
+#include "web.h"
+#include "bed.h"
+#include "hdb.h"
+#include "trackDb.h"
+#include "customTrack.h"
+#include "wiggle.h"
+#include "hmmstats.h"
+#include "correlate.h"
+#include "bbiFile.h"
+#include "bigWig.h"
+#include "hgTables.h"
+
+static char const rcsid[] = "$Id$";
+
+boolean isBigWig(char *table)
+/* Return TRUE if table corresponds to a bigWig file. */
+{
+return trackIsType(table, "bigWig");
+}
+
+char *bigWigFileName(char *table, struct sqlConnection *conn)
+/* Return file name associated with bigWig. This handles differences whether it's
+ * a custom or built-in track. Do a freeMem on returned string when done. */
+{
+char *fileName = NULL;
+if (isCustomTrack(table))
+ {
+ struct customTrack *ct = lookupCt(table);
+ if (ct != NULL)
+ fileName = cloneString(trackDbSetting(ct->tdb, "dataUrl"));
+ }
+else
+ {
+ char query[256];
+ safef(query, sizeof(query), "select fileName from %s", table);
+ fileName = sqlQuickString(conn, query);
+ }
+return fileName;
+}
+
+struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn,
+ struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl,
+ double filterUl, struct lm *lm)
+/* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file
+ * and if necessary apply filter and intersection. Return list which is allocated in lm. */
+{
+char *chrom = region->chrom;
+int chromSize = hChromSize(database, chrom);
+struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm);
+
+/* Run filter if necessary */
+if (filterCmp != wigNoOp_e)
+ {
+ struct bbiInterval *next, *newList = NULL;
+ for (iv = ivList; iv != NULL; iv = next)
+ {
+ next = iv->next;
+ if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl))
+ {
+ slAddHead(&newList, iv);
+ }
+ }
+ slReverse(&newList);
+ ivList = newList;
+ }
+
+/* Run intersection if necessary */
+if (anyIntersection())
+ {
+ boolean isBpWise = intersectionIsBpWise();
+ Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise);
+ struct bbiInterval *next, *newList = NULL;
+ double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01;
+ double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01;
+ char *op = cartString(cart, hgtaIntersectOp);
+ for (iv = ivList; iv != NULL; iv = next)
+ {
+ next = iv->next;
+ int start = iv->start;
+ int size = iv->end - start;
+ int overlap = bitCountRange(bits2, start, size);
+ if (isBpWise)
+ {
+ if (overlap == size)
+ {
+ slAddHead(&newList, iv);
+ }
+ else if (overlap > 0)
+ {
+ /* Here we have to break things up. */
+ double val = iv->val;
+ struct bbiInterval *partIv = iv; // Reuse memory for first interval
+ int s = start, end = start+size;
+ for (;;)
+ {
+ int bitsClear = bitFindClear(bits2, s, end-s);
+ s += bitsClear;
+ if (s >= end)
+ break;
+ int bitsSet = bitFindSet(bits2, s, end-s); // Always > 0
+ if (partIv == NULL)
+ lmAllocVar(lm, partIv);
+ partIv->start = s;
+ partIv->end = s + bitsSet;
+ partIv->val = val;
+ slAddHead(&newList, partIv);
+ partIv = NULL;
+ s += bitsSet;
+ if (s >= end)
+ break;
+ }
+ }
+ }
+ else
+ {
+ double coverage = (double)overlap/size;
+ if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage))
+ {
+ slAddHead(&newList, iv);
+ }
+ }
+ }
+ slReverse(&newList);
+ ivList = newList;
+ bitFree(&bits2);
+ }
+
+return ivList;
+}
+
+void getWigFilter(char *database, char *table, enum wigCompare *retCmp, double *retLl, double *retUl)
+/* Get wiggle filter variables from cart and convert them into numbers. */
+{
+char *dataConstraint;
+enum wigCompare cmp = wigNoOp_e;
+checkWigDataFilter(database, curTable, &dataConstraint, retLl, retUl);
+if (dataConstraint != NULL)
+ cmp = wigCompareFromString(dataConstraint);
+*retCmp = cmp;
+}
+
+int bigWigOutRegion(char *table, struct sqlConnection *conn,
+ struct region *region, int maxOut,
+ enum wigOutputType wigOutType)
+/* Write out bigWig for region, doing intersecting and filtering as need be. */
+{
+int resultCount = 0;
+char *wigFileName = bigWigFileName(table, conn);
+if (wigFileName)
+ {
+ struct bbiFile *bwf = bigWigFileOpen(wigFileName);
+ if (bwf)
+ {
+ if (!anyFilter() && !anyIntersection())
+ resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end,
+ maxOut, stdout);
+ else
+ {
+ double ll, ul;
+ enum wigCompare cmp;
+ getWigFilter(database, curTable, &cmp, &ll, &ul);
+ struct lm *lm = lmInit(0);
+ struct bbiInterval *ivList, *iv;
+ ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
+ for (iv=ivList; iv != NULL && resultCount < maxOut; iv = iv->next, ++resultCount)
+ {
+ fprintf(stdout, "%s\t%d\t%d\t%g\n", region->chrom, iv->start, iv->end, iv->val);
+ }
+ lmCleanup(&lm);
+ }
+ }
+ bbiFileClose(&bwf);
+ }
+freeMem(wigFileName);
+return resultCount;
+}
+
+void doSummaryStatsBigWig(struct sqlConnection *conn)
+/* Put up page showing summary stats for bigWig track. */
+{
+struct trackDb *track = curTrack;
+char *table = curTable;
+char *shortLabel = (track == NULL ? table : track->shortLabel);
+char *fileName = bigWigFileName(table, conn);
+long startTime = clock1000();
+
+htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table);
+
+struct bbiFile *bwf = bigWigFileOpen(fileName);
+struct region *region, *regionList = getRegions();
+double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
+bits64 validCount = 0;
+
+if (!anyFilter() && !anyIntersection())
+ {
+ for (region = regionList; region != NULL; region = region->next)
+ {
+ struct bbiSummaryElement sum;
+ if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end,
+ bigWigIntervalQuery, 1, &sum))
+ {
+ if (validCount == 0)
+ {
+ minVal = sum.minVal;
+ maxVal = sum.maxVal;
+ }
+ sumData += sum.sumData;
+ sumSquares += sum.sumSquares;
+ validCount += sum.validCount;
+ }
+ }
+ }
+else
+ {
+ double ll, ul;
+ enum wigCompare cmp;
+ getWigFilter(database, curTable, &cmp, &ll, &ul);
+ for (region = regionList; region != NULL; region = region->next)
+ {
+ struct lm *lm = lmInit(0);
+ struct bbiInterval *iv, *ivList;
+ ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
+ for (iv = ivList; iv != NULL; iv = iv->next)
+ {
+ double val = iv->val;
+ double size = iv->end - iv->start;
+ if (validCount == 0)
+ minVal = maxVal = val;
+ sumData += size*val;
+ sumSquares += size*val*val;
+ validCount += size;
+ }
+ lmCleanup(&lm);
+ }
+ }
+
+hTableStart();
+floatStatRow("mean", sumData/validCount);
+floatStatRow("min", minVal);
+floatStatRow("max", maxVal);
+floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
+numberStatRow("bases with data", validCount);
+long long regionSize = basesInRegion(regionList,0);
+long long gapTotal = gapsInRegion(conn, regionList,0);
+numberStatRow("bases with sequence", regionSize - gapTotal);
+numberStatRow("bases in region", regionSize);
+wigFilterStatRow(conn);
+stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
+long wigFetchTime = clock1000() - startTime;
+floatStatRow("load and calc time", 0.001*wigFetchTime);
+hTableEnd();
+
+bbiFileClose(&bwf);
+htmlClose();
+}
+