src/hg/hgTables/wiggle.c 1.67
1.67 2009/03/10 01:25:24 kent
First cut of bigWig integration. Handles data point and stats output in cases where there are no filters or intersections going.
Index: src/hg/hgTables/wiggle.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/wiggle.c,v
retrieving revision 1.66
retrieving revision 1.67
diff -b -B -U 4 -r1.66 -r1.67
--- src/hg/hgTables/wiggle.c 14 Jan 2009 18:54:54 -0000 1.66
+++ src/hg/hgTables/wiggle.c 10 Mar 2009 01:25:24 -0000 1.67
@@ -17,9 +17,12 @@
#include "hgColors.h"
#include "trackDb.h"
#include "customTrack.h"
#include "wiggle.h"
+#include "hmmstats.h"
#include "correlate.h"
+#include "bbiFile.h"
+#include "bigWig.h"
#include "hgTables.h"
static char const rcsid[] = "$Id$";
@@ -457,8 +460,29 @@
dataVectorFree(&dv);
return resultCount;
}
+static int bigWigOutRegion(char *table, struct sqlConnection *conn,
+ struct region *region, int maxOut,
+ enum wigOutputType wigOutType)
+/* Read in bigWig as dataVector (filtering is handled there),
+ * intersect if necessary, and print it out. */
+{
+int resultCount = 0;
+char *wigFileName = bigWigFileName(table, conn);
+if (wigFileName)
+ {
+ struct bbiFile *bwf = bigWigFileOpen(wigFileName);
+ if (bwf)
+ resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end,
+ maxOut, stdout);
+ bbiFileClose(&bwf);
+ }
+freeMem(wigFileName);
+return resultCount;
+}
+
+
static struct trackDb *trackDbWithWiggleSettings(char *table)
/* Get trackDb for a table in the database -- or if it has a parent/composite
* track, then return that because it contains the wiggle settings. */
{
@@ -477,15 +501,15 @@
}
}
/* OK, table is not curTrack nor any of its subtracks -- look it up (and its
* parent if there is one): */
-{
-struct trackDb *tdb = hTrackDbForTrack(database, table);
-struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb);
-if (cTdb != NULL)
+ {
+ struct trackDb *tdb = hTrackDbForTrack(database, table);
+ struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb);
+ if (cTdb != NULL)
return cTdb;
-return tdb;
-}
+ return tdb;
+ }
}
static int wigOutRegion(char *table, struct sqlConnection *conn,
struct region *region, int maxOut, enum wigOutputType wigOutType,
@@ -684,11 +708,13 @@
int curMaxOut = maxOut - curOut;
if (anySubtrackMerge(database, table))
outCount = mergedWigOutRegion(table, conn, region, curMaxOut,
wigOutType);
- else if (startsWith("bedGraph ", track->type))
+ else if (startsWithWord("bedGraph", track->type))
outCount = bedGraphOutRegion(table, conn, region, curMaxOut,
wigOutType);
+ else if (startsWithWord("bigWig", track->type))
+ outCount = bigWigOutRegion(table, conn, region, curMaxOut, wigOutType);
else
outCount = wigOutRegion(table, conn, region, curMaxOut,
wigOutType, NULL, 0);
curOut += outCount;
@@ -701,8 +727,28 @@
/*********** PUBLIC ROUTINES *********************************/
+char *bigWigFileName(char *table, struct sqlConnection *conn)
+/* Return file name associated with bigWig. This handles differences whether it's
+ * a custom or built-in track. Do a freeMem on returned string when done. */
+{
+char *fileName = NULL;
+if (isCustomTrack(table))
+ {
+ struct customTrack *ct = lookupCt(table);
+ if (ct != NULL)
+ fileName = cloneString(trackDbSetting(ct->tdb, "dataUrl"));
+ }
+else
+ {
+ char query[256];
+ safef(query, sizeof(query), "select fileName from %s", table);
+ fileName = sqlQuickString(conn, query);
+ }
+return fileName;
+}
+
struct dataVector *bedGraphDataVector(char *table,
struct sqlConnection *conn, struct region *region)
/* Read in bedGraph as dataVector and return it. Filtering, subtrack merge
* and intersection are handled. */
@@ -732,8 +778,31 @@
}
return dv;
}
+#ifdef SOON
+struct dataVector *bigWigDataVector(char *table,
+ struct sqlConnection *conn, struct region *region)
+/* Read in bigWig as dataVector and return it. Filtering, subtrack merge
+ * and intersection are handled. */
+{
+struct dataVector *dv = NULL;
+
+if (anySubtrackMerge(database, table))
+ dv = mergedWigDataVector(table, conn, region);
+else
+ {
+ char *fileName = bigWigFileName(table, conn);
+ struct dataVector *allocDataVector(char *chrom, int size)
+ struct trackTable *tt1 = trackTableNew(tdb, table, conn);
+ dv = dataVectorFetchOneRegion(tt1, region, conn);
+ intersectDataVector(table, dv, region, conn);
+ }
+return dv;
+}
+#endif /* SOON */
+
+
struct dataVector *wiggleDataVector(struct trackDb *tdb, char *table,
struct sqlConnection *conn, struct region *region)
/* Read in wiggle as dataVector and return it. Filtering, subtrack merge
* and intersection are handled. */
@@ -790,9 +859,9 @@
{
if (isCustomTrack(table))
{
struct customTrack *ct = lookupCt(table);
- if (ct->wiggle)
+ if (ct != NULL && ct->wiggle)
typeWiggle = TRUE;
}
else
{
@@ -802,20 +871,19 @@
}
return(typeWiggle);
}
+boolean isBigWig(char *table)
+/* Return TRUE if table corresponds to a bigWig file. */
+{
+return trackIsType(table, "bigWig");
+}
+
boolean isBedGraph(char *table)
/* Return TRUE if table is specified as a bedGraph in the current database's
* trackDb. */
{
-if (curTrack && sameString(curTrack->tableName, table))
- return startsWith("bedGraph", curTrack->type);
-else
- {
- struct trackDb *tdb = hTrackDbForTrack(database, table);
- return (tdb && startsWith("bedGraph", tdb->type));
- }
-return FALSE;
+return trackIsType(table, "bedGraph");
}
struct bed *getWiggleAsBed(
char *db, char *table, /* Database and table. */
@@ -1246,8 +1314,56 @@
hTableEnd();
htmlClose();
} /* void doSummaryStatsWiggle(struct sqlConnection *conn) */
+void doSummaryStatsBigWig(struct sqlConnection *conn)
+/* Put up page showing summary stats for bigWig track. */
+{
+struct trackDb *track = curTrack;
+char *table = curTable;
+char *shortLabel = (track == NULL ? table : track->shortLabel);
+char *fileName = bigWigFileName(table, conn);
+struct bbiFile *bwf = bigWigFileOpen(fileName);
+
+htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table);
+
+struct region *region, *regionList = getRegions();
+double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
+bits64 validCount = 0;
+for (region = regionList; region != NULL; region = region->next)
+ {
+ struct bbiSummaryElement sum;
+ if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, bigWigIntervalQuery,
+ 1, &sum))
+ {
+ if (validCount == 0)
+ {
+ minVal = sum.minVal;
+ maxVal = sum.maxVal;
+ }
+ sumData += sum.sumData;
+ sumSquares += sum.sumSquares;
+ validCount += sum.validCount;
+ }
+ }
+
+
+hTableStart();
+floatStatRow("mean", sumData/validCount);
+floatStatRow("min", minVal);
+floatStatRow("max", maxVal);
+floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
+numberStatRow("bases with data", validCount);
+bits64 regionSize = basesInRegion(regionList, 0);
+bits64 gapTotal = gapsInRegion(conn, regionList, 0);
+numberStatRow("bases with sequence", regionSize - gapTotal);
+numberStatRow("bases in region", regionSize);
+hTableEnd();
+
+bbiFileClose(&bwf);
+htmlClose();
+}
+
void wigShowFilter(struct sqlConnection *conn)
/* print out wiggle data value filter */
{
double ll, ul;