src/hg/hgc/peakClusters.c 1.2

1.2 2010/05/17 02:30:20 kent
Working on details pages for DNase Clusters and Txn Factor ChIP tracks.
Index: src/hg/hgc/peakClusters.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/peakClusters.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/hgc/peakClusters.c	15 May 2010 22:08:41 -0000	1.1
+++ src/hg/hgc/peakClusters.c	17 May 2010 02:30:20 -0000	1.2
@@ -1,200 +1,259 @@
-/* Stuff to display details on tracks that are clusters of peaks (items) in other tracks. */
+/* Stuff to display details on tracks that are clusters of peaks (items) in other tracks. 
+ * In particular peaks in either ENCODE narrowPeak or broadPeak settings, and residing in
+ * composite tracks. 
+ *
+ * These come in two main forms currently:
+ *     DNAse hypersensitive clusters - peaks clustered across cell lines stored in bed 5
+ *                                     with no special type
+ *     Transcription Factor Binding Sites (TFBS) - peaks from transcription factor ChIP-seq
+ *                   across a number of transcription factors and cell lines. Stored in bed 15
+ *                   plus sourceTable with type factorSource */
+ 
 #include "common.h"
 #include "hash.h"
 #include "jksql.h"
 #include "obscure.h"
 #include "hCommon.h"
 #include "hdb.h"
 #include "web.h"
 #include "cart.h"
 #include "trackDb.h"
+#include "hui.h"
 #include "hgc.h"
 #include "encode/encodePeak.h"
+#include "expRecord.h"
 
 
-static boolean pairInList(struct slPair *pair, struct slPair *list)
-/* Return TRUE if pair is in list. */
+char *findGroupTagVal(struct trackDb *tdb, char *tag)
+/* Find value of given tag inside of subgroups field. */ 
 {
-struct slPair *el;
+char *subGroups = trackDbSetting(tdb, "subGroups");
+struct slPair *el, *list = slPairFromString(subGroups);
+char *val = NULL;
 for (el = list; el != NULL; el = el->next)
-    if (sameString(pair->name, el->name) && sameString(pair->val, el->val))
-        return TRUE;
-return FALSE;
+    {
+    if (sameString(el->name, tag))
+	{
+        val = el->val;
+	break;
+	}
+    }
+return val;
 }
 
-static boolean selGroupListMatch(struct trackDb *tdb, struct slPair *selGroupList)
-/* Return TRUE if tdb has match to every item in selGroupList */
+char *mustFindGroupTagVal(struct trackDb *tdb, char *tag)
+/* Find value of given tag inside of subgroups field or abort with error message. */ 
 {
-char *subGroups = trackDbSetting(tdb, "subGroups");
-if (subGroups == NULL)
-    return FALSE;
-struct slPair *groupList = slPairFromString(subGroups);
-struct slPair *selGroup;
-for (selGroup = selGroupList; selGroup != NULL; selGroup = selGroup->next)
-    {
-    if (!pairInList(selGroup, groupList))
-        return FALSE;
-    }
-return TRUE;
+char *val = findGroupTagVal(tdb, tag);
+if (val == NULL)
+    errAbort("Couldn't find %s in subGroups tag of %s", tag, tdb->track);
+return val;
 }
 
-static void rAddMatching(struct trackDb *tdb, struct slPair *selGroupList, struct slName **pList)
-/* Add track and any descendents that match selGroupList to pList */
+char *findGroupLabel(struct trackDb *tdb, char *group)
+/* Given name of group, ruffle through all subGroupN tags, looking for one that
+ * matches group */
 {
-if (selGroupListMatch(tdb, selGroupList))
-    slNameAddHead(pList, tdb->track);
-struct trackDb *sub;
-for (sub = tdb->subtracks; sub != NULL; sub = sub->next)
-    rAddMatching(sub, selGroupList, pList);
+char *groupId = mustFindGroupTagVal(tdb, group);
+return compositeGroupLabel(tdb, group, groupId);
 }
 
-static struct slName *findMatchingSubtracks(struct slName *inTrackList, struct slPair *selGroupList)
-/* Look in track and it's descendents for tracks with groups that match all values 
- * in selGroupList. */
+static void printClusterTableHeader(struct slName *displayGroupList)
+/* Print out header fields table */
 {
-struct slName *matchList = NULL;
-struct slName *inTrack;
-for (inTrack = inTrackList; inTrack != NULL; inTrack = inTrack->next)
+webPrintLabelCell("#");
+webPrintLabelCell("signal");
+struct slName *displayGroup;
+for (displayGroup = displayGroupList; displayGroup != NULL; displayGroup = displayGroup->next)
     {
-    struct trackDb *tdb = hashFindVal(trackHash, inTrack->name);
-    if (tdb == NULL)
-        errAbort("Can't find track %s which is in inputTracks", inTrack->name);
-    rAddMatching(tdb,  selGroupList, &matchList);
+    webPrintLabelCell(displayGroup->name);
     }
-return matchList;
+webPrintLabelCell("description");
+printf("</TR><TR>\n");
 }
 
 static void printTableInfo(struct trackDb *tdb, struct trackDb *clusterTdb,
     struct slName *displayGroupList)
 /* Print out info on table. */
 {
-webPrintLinkCell(tdb->shortLabel);
+struct slName *displayGroup;
+for (displayGroup = displayGroupList; displayGroup != NULL; displayGroup = displayGroup->next)
+    {
+    char *label = findGroupLabel(tdb, displayGroup->name);
+    char *linkedLabel = compositeLabelWithVocabLink(database, tdb, tdb, displayGroup->name, label);
+    webPrintLinkCell(linkedLabel);
+    }
 webPrintLinkCell(tdb->longLabel);
 }
 
 static void showOnePeak(struct trackDb *tdb, struct bed *cluster, struct trackDb *clusterTdb,
-	struct encodePeak *peak, struct slName *displayGroupList, int *pIx)
+	struct encodePeak *peakList, struct slName *displayGroupList, int *pIx)
 /* Show info on track and peak.  Peak may be NULL in which case n/a's will be printed
  * as appropriate. */
 {
+struct encodePeak *peak;
 *pIx += 1;
 webPrintIntCell(*pIx);
-if (peak)
-    {
-    webPrintIntCell(peak->score);
-    int overlap = positiveRangeIntersection(peak->chromStart, peak->chromEnd, 
-    	cluster->chromStart, cluster->chromEnd);
-    double overlapRatio = (double)overlap/(cluster->chromEnd - cluster->chromStart);
-    webPrintLinkCellRightStart();
-    printf("%4.1f%%", overlapRatio*100);
-    webPrintLinkCellEnd();
-    }
+webPrintLinkCellRightStart();
+printf("%g", peakList->signalValue);
+for (peak = peakList->next; peak != NULL; peak = peak->next)
+    printf(",%g", peak->signalValue);
+webPrintLinkCellEnd();
 printTableInfo(tdb, clusterTdb, displayGroupList);
+printf("</TR><TR>\n");
 }
 
 static boolean showMatchingTrack(char *track, struct bed *cluster, struct sqlConnection *conn,
-	struct trackDb *clusterTdb, struct slName *displayGroupList,
-	boolean showIfTrue, boolean showIfFalse, int *pRowIx)
+	struct trackDb *clusterTdb, struct slName *displayGroupList, int *pRowIx)
 /* put out a line in an html table that describes the given track. */ 
 {
 struct trackDb *tdb = hashMustFindVal(trackHash, track);
 boolean result = FALSE;
 char **row;
 int rowOffset = 0;
 struct sqlResult *sr = hRangeQuery(conn, tdb->table, 
 	cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, &rowOffset);
-boolean gotData = FALSE;
+struct encodePeak *peakList = NULL;
+struct slDouble *slDoubleNew(double x);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     enum encodePeakType pt = encodePeakInferTypeFromTable(database, tdb->table, tdb->type);
     struct encodePeak *peak = encodePeakGeneralLoad(row + rowOffset, pt);
-    if (showIfTrue)
-	{
-	showOnePeak(tdb, cluster, clusterTdb, peak, displayGroupList, pRowIx);
-	result = TRUE;
-	}
-    gotData = TRUE;
-    }
-if (!gotData)
-    {
-    if (showIfFalse)
-	{
-	showOnePeak(tdb, cluster, clusterTdb, NULL, displayGroupList, pRowIx);
-	result = TRUE;
-	}
+    slAddTail(&peakList, peak);
     }
+if (peakList)
+    showOnePeak(tdb, cluster, clusterTdb, peakList, displayGroupList, pRowIx);
 sqlFreeResult(&sr);
 return result;
 }
 
 void doPeakClusters(struct trackDb *tdb, char *item)
 /* Display detailed info about a cluster of peaks from other tracks. */
 {
 int start = cartInt(cart, "o");
 char *table = tdb->table;
 int rowOffset = hOffsetPastBin(database, seqName, table);
 char query[256];
 struct sqlResult *sr;
 char **row;
 struct bed *cluster = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 
 char title[256];
 safef(title, sizeof(title), "%s item details", tdb->shortLabel);
 cartWebStart(cart, database, title);
-sprintf(query,
+safef(query, sizeof(query),
 	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
 	table, item, seqName, start);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
 if (row != NULL)
     cluster = bedLoadN(row + rowOffset, 5);
 sqlFreeResult(&sr);
 
 if (cluster != NULL)
     {
     /* Get list of tracks we'll look through for input. */
     char *inputTracks = trackDbRequiredSetting(tdb, "inputTracks");
     struct slName *inTrackList = stringToSlNames(inputTracks);
 
     /* Get list of subgroups to select on */
     char *inputTracksSubgroupSelect = trackDbRequiredSetting(tdb, "inputTracksSubgroupSelect");
     struct slPair *selGroupList = slPairFromString(inputTracksSubgroupSelect);
 
     /* Get list of subgroups to display */
     char *inputTracksSubgroupDisplay = trackDbRequiredSetting(tdb, "inputTracksSubgroupDisplay");
     struct slName *displayGroupList = stringToSlNames(inputTracksSubgroupDisplay);
 
-    /* Get list of tracks that match and make a table out of them. */
-    struct slName *matchTrackList = findMatchingSubtracks(inTrackList, selGroupList);
+    /* Get list of tracks that match criteria. */
+    struct slName *matchTrackList = encodeFindMatchingSubtracks(inTrackList, selGroupList);
     struct slName *matchTrack;
 
+    /* Print out some information about the cluster overall. */
     printf("<B>Items in Cluster:</B> %s of %d<BR>\n", cluster->name, slCount(matchTrackList));
-    printf("<B>Maximum Item Score (out of 1000):</B> %d<BR>\n", cluster->score);
+    printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score);
     printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL);
 
-
+    /* In a new section put up list of hits. */
+    webNewSection("List of Items in Cluster");
     webPrintLinkTableStart();
+    printClusterTableHeader(displayGroupList);
     int rowIx = 0;
     for (matchTrack = matchTrackList; matchTrack != NULL; matchTrack = matchTrack->next)
         {
-	if (showMatchingTrack(matchTrack->name, cluster, conn, tdb, displayGroupList,
-		TRUE, FALSE, &rowIx))
-	    printf("</TR><TR>\n");
+	showMatchingTrack(matchTrack->name, cluster, conn, tdb, displayGroupList,
+		&rowIx);
 	}
     webPrintLinkTableEnd();
+    }
+webNewSection("Track Description");
+printTrackHtml(tdb);
+hFreeConn(&conn);
+}
+
+char *findFactorId(struct slName *trackList, char *label)
+/* Given factor label, find factor id. */
+{
+struct slName *track;
+for (track = trackList; track != NULL; track = track->next)
+    {
+    struct trackDb *tdb = hashMustFindVal(trackHash, track->name);
+    char *factorId = compositeGroupId(tdb, "factor", label);
+    if (factorId != NULL)
+        return factorId;
+    }
+errAbort("Couldn't find factor labeled %s", label);
+return NULL;
+}
 
+void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start)
+/* Display detailed info about a cluster of peaks from other tracks. */
+{
+int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
+char **row;
+struct sqlResult *sr;
+char query[256];
+safef(query, sizeof(query),
+	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
+	tdb->table, item, seqName, start);
+sr = sqlGetResult(conn, query);
+row = sqlNextRow(sr);
+struct bed *cluster = NULL;
+if (row != NULL)
+    cluster = bedLoadN(row + rowOffset, 15);
+sqlFreeResult(&sr);
 
-    uglyf("<B>inputTracks:</B> %s<BR>\n", inputTracks);
-    uglyf("<B>inputTracksSubgroupSelect:</B> %s<BR>\n", inputTracksSubgroupSelect);
-    uglyf("<B>inputTracksSubgroupDisplay:</B> %s<BR>\n", inputTracksSubgroupDisplay);
 
-    uglyf("<B>matchingTracks:</B> %d<BR>\n", slCount(matchTrackList));
-    uglyf("<B>displayGroupList:</B> %d<BR>\n", slCount(displayGroupList));
+if (cluster != NULL)
+    {
+    printf("<B>Factor:</B> %s<BR>\n", cluster->name);
+    printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score);
+    printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL);
 
+    char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable");
+    uglyf("<B>sourceTable:</B> %s<BR>\n", sourceTable);
+
+    /* Get list of tracks we'll look through for input. */
+    char *inputTracks = trackDbRequiredSetting(tdb, "inputTracks");
+    struct slName *inTrackList = stringToSlNames(inputTracks);
+    uglyf("<B>inputTracks:</B> %s (%d)<BR>\n", inputTracks, slCount(inTrackList));
+
+    /* Get list of subgroups to select on */
+    char *inputTracksSubgroupSelect = trackDbRequiredSetting(tdb, "inputTracksSubgroupSelect");
+    struct slPair *selGroupList = slPairFromString(inputTracksSubgroupSelect);
+    uglyf("<B>inputTracksSubgroupSelect:</B> %s (%d)<BR>\n", inputTracksSubgroupSelect, slCount(selGroupList));
+
+    /* Figure out factor ID and add it as selection criteria*/
+    char *factorId = findFactorId(inTrackList, cluster->name);
+    uglyf("<B>cluster->id:</B> %s  <B>factorId:</B> %s<BR>\n", cluster->name, factorId);
+    struct slPair *factorSel = slPairNew("factor", cloneString(factorId));
+    slAddHead(&selGroupList, factorSel);
+
+    /* Get list of tracks that match criteria. */
+    struct slName *matchTrackList = encodeFindMatchingSubtracks(inTrackList, selGroupList);
+    // struct slName *matchTrack;
+    uglyf("<B>matchTrackList:</B> %d elements<BR>\n", slCount(matchTrackList));
 
     }
-printTrackHtml(tdb);
-hFreeConn(&conn);
 }