src/hg/hgTables/wiggle.c 1.67

1.67 2009/03/10 01:25:24 kent
First cut of bigWig integration. Handles data point and stats output in cases where there are no filters or intersections going.
Index: src/hg/hgTables/wiggle.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/wiggle.c,v
retrieving revision 1.66
retrieving revision 1.67
diff -b -B -U 4 -r1.66 -r1.67
--- src/hg/hgTables/wiggle.c	14 Jan 2009 18:54:54 -0000	1.66
+++ src/hg/hgTables/wiggle.c	10 Mar 2009 01:25:24 -0000	1.67
@@ -17,9 +17,12 @@
 #include "hgColors.h"
 #include "trackDb.h"
 #include "customTrack.h"
 #include "wiggle.h"
+#include "hmmstats.h"
 #include "correlate.h"
+#include "bbiFile.h"
+#include "bigWig.h"
 #include "hgTables.h"
 
 static char const rcsid[] = "$Id$";
 
@@ -457,8 +460,29 @@
 dataVectorFree(&dv);
 return resultCount;
 }
 
+static int bigWigOutRegion(char *table, struct sqlConnection *conn,
+			     struct region *region, int maxOut,
+			     enum wigOutputType wigOutType)
+/* Read in bigWig as dataVector (filtering is handled there), 
+ * intersect if necessary, and print it out. */
+{
+int resultCount = 0;
+char *wigFileName = bigWigFileName(table, conn);
+if (wigFileName)
+    {
+    struct bbiFile *bwf = bigWigFileOpen(wigFileName);
+    if (bwf)
+	resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end, 
+		maxOut, stdout);
+    bbiFileClose(&bwf);
+    }
+freeMem(wigFileName);
+return resultCount;
+}
+
+
 static struct trackDb *trackDbWithWiggleSettings(char *table)
 /* Get trackDb for a table in the database -- or if it has a parent/composite 
  * track, then return that because it contains the wiggle settings. */
 {
@@ -477,15 +501,15 @@
 	}
     }
 /* OK, table is not curTrack nor any of its subtracks -- look it up (and its 
  * parent if there is one): */
-{
-struct trackDb *tdb = hTrackDbForTrack(database, table);
-struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb);
-if (cTdb != NULL)
+    {
+    struct trackDb *tdb = hTrackDbForTrack(database, table);
+    struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb);
+    if (cTdb != NULL)
     return cTdb;
-return tdb;
-}
+    return tdb;
+    }
 }
 
 static int wigOutRegion(char *table, struct sqlConnection *conn,
 	struct region *region, int maxOut, enum wigOutputType wigOutType,
@@ -684,11 +708,13 @@
     int curMaxOut = maxOut - curOut;
     if (anySubtrackMerge(database, table))
 	outCount = mergedWigOutRegion(table, conn, region, curMaxOut,
 				      wigOutType);
-    else if (startsWith("bedGraph ", track->type))
+    else if (startsWithWord("bedGraph", track->type))
 	outCount = bedGraphOutRegion(table, conn, region, curMaxOut,
 				     wigOutType);
+    else if (startsWithWord("bigWig", track->type))
+        outCount = bigWigOutRegion(table, conn, region, curMaxOut, wigOutType);
     else
 	outCount = wigOutRegion(table, conn, region, curMaxOut,
 				wigOutType, NULL, 0);
     curOut += outCount;
@@ -701,8 +727,28 @@
 
 
 /***********   PUBLIC ROUTINES  *********************************/
 
+char *bigWigFileName(char *table, struct sqlConnection *conn)
+/* Return file name associated with bigWig.  This handles differences whether it's
+ * a custom or built-in track.  Do a freeMem on returned string when done. */
+{
+char *fileName = NULL;
+if (isCustomTrack(table))
+    {
+    struct customTrack *ct = lookupCt(table);
+    if (ct != NULL)
+        fileName = cloneString(trackDbSetting(ct->tdb, "dataUrl"));
+    }
+else
+    {
+    char query[256];
+    safef(query, sizeof(query), "select fileName from %s", table);
+    fileName = sqlQuickString(conn, query);
+    }
+return fileName;
+}
+
 struct dataVector *bedGraphDataVector(char *table,
 	struct sqlConnection *conn, struct region *region)
 /* Read in bedGraph as dataVector and return it.  Filtering, subtrack merge 
  * and intersection are handled. */
@@ -732,8 +778,31 @@
     }
 return dv;
 }
 
+#ifdef SOON
+struct dataVector *bigWigDataVector(char *table,
+	struct sqlConnection *conn, struct region *region)
+/* Read in bigWig as dataVector and return it.  Filtering, subtrack merge 
+ * and intersection are handled. */
+{
+struct dataVector *dv = NULL;
+
+if (anySubtrackMerge(database, table))
+    dv = mergedWigDataVector(table, conn, region);
+else
+    {
+    char *fileName = bigWigFileName(table, conn);
+    struct dataVector *allocDataVector(char *chrom, int size)
+    struct trackTable *tt1 = trackTableNew(tdb, table, conn);
+    dv = dataVectorFetchOneRegion(tt1, region, conn);
+    intersectDataVector(table, dv, region, conn);
+    }
+return dv;
+}
+#endif /* SOON */
+
+
 struct dataVector *wiggleDataVector(struct trackDb *tdb, char *table,
 	struct sqlConnection *conn, struct region *region)
 /* Read in wiggle as dataVector and return it.  Filtering, subtrack merge 
  * and intersection are handled. */
@@ -790,9 +859,9 @@
     {
     if (isCustomTrack(table))
 	{
 	struct customTrack *ct = lookupCt(table);
-	if (ct->wiggle)
+	if (ct != NULL && ct->wiggle)
 	    typeWiggle = TRUE;
 	}
     else
 	{
@@ -802,20 +871,19 @@
     }
 return(typeWiggle);
 }
 
+boolean isBigWig(char *table)
+/* Return TRUE if table corresponds to a bigWig file. */
+{
+return trackIsType(table, "bigWig");
+}
+
 boolean isBedGraph(char *table)
 /* Return TRUE if table is specified as a bedGraph in the current database's 
  * trackDb. */
 {
-if (curTrack && sameString(curTrack->tableName, table))
-    return startsWith("bedGraph", curTrack->type);
-else
-    {
-    struct trackDb *tdb = hTrackDbForTrack(database, table);
-    return (tdb && startsWith("bedGraph", tdb->type));
-    }
-return FALSE;
+return trackIsType(table, "bedGraph");
 }
 
 struct bed *getWiggleAsBed(
     char *db, char *table, 	/* Database and table. */
@@ -1246,8 +1314,56 @@
 hTableEnd();
 htmlClose();
 }	/*	void doSummaryStatsWiggle(struct sqlConnection *conn)	*/
 
+void doSummaryStatsBigWig(struct sqlConnection *conn)
+/* Put up page showing summary stats for bigWig track. */
+{
+struct trackDb *track = curTrack;
+char *table = curTable;
+char *shortLabel = (track == NULL ? table : track->shortLabel);
+char *fileName = bigWigFileName(table, conn);
+struct bbiFile *bwf = bigWigFileOpen(fileName);
+
+htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table);
+
+struct region *region, *regionList = getRegions();
+double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
+bits64 validCount = 0;
+for (region = regionList; region != NULL; region = region->next)
+    {
+    struct bbiSummaryElement sum;
+    if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, bigWigIntervalQuery, 
+    	1, &sum))
+        {
+	if (validCount == 0)
+	    {
+	    minVal = sum.minVal;
+	    maxVal = sum.maxVal;
+	    }
+	sumData += sum.sumData;
+	sumSquares += sum.sumSquares;
+	validCount += sum.validCount;
+	}
+    }
+
+
+hTableStart();
+floatStatRow("mean", sumData/validCount);
+floatStatRow("min", minVal);
+floatStatRow("max", maxVal);
+floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
+numberStatRow("bases with data", validCount);
+bits64 regionSize = basesInRegion(regionList, 0);
+bits64 gapTotal = gapsInRegion(conn, regionList, 0);
+numberStatRow("bases with sequence", regionSize - gapTotal);
+numberStatRow("bases in region", regionSize);
+hTableEnd();
+
+bbiFileClose(&bwf);
+htmlClose();
+}
+
 void wigShowFilter(struct sqlConnection *conn)
 /* print out wiggle data value filter */
 {
 double ll, ul;