f64ab211b58975370b211059f34d198164154823
kate
  Sun Jul 21 22:25:16 2013 -0700
Create factorSource table, format BED5+, for more efficient representation of TFBS clusters.  #10097
diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c
index fd82de1..b2aa75f 100644
--- src/hg/hgc/peakClusters.c
+++ src/hg/hgc/peakClusters.c
@@ -1,43 +1,43 @@
 /* Stuff to display details on tracks that are clusters of peaks (items) in other tracks. 
  * In particular peaks in either ENCODE narrowPeak or broadPeak settings, and residing in
  * composite tracks. 
  *
  * These come in two main forms currently:
  *     DNAse hypersensitive clusters - peaks clustered across cell lines stored in bed 5
  *                                     with no special type
  *     Transcription Factor Binding Sites (TFBS) - peaks from transcription factor ChIP-seq
  *                   across a number of transcription factors and cell lines. Stored in bed 15
+ *                   (or BED5+ factorSource format)
  *                   plus sourceTable with type factorSource */
  
 #include "common.h"
 #include "hash.h"
 #include "jksql.h"
 #include "obscure.h"
 #include "hCommon.h"
 #include "hdb.h"
 #include "web.h"
 #include "cart.h"
 #include "trackDb.h"
 #include "hui.h"
 #include "hgc.h"
 #include "encode/encodePeak.h"
 #include "expRecord.h"
 #include "bed6FloatScore.h"
 #include "ra.h"
-#include "txCluster.h"
-
+#include "factorSource.h"
 
 static void printClusterTableHeader(struct slName *otherCols, 
 	boolean withAbbreviation, boolean withDescription, boolean withSignal)
 /* Print out header fields table of tracks in cluster */
 {
 webPrintLabelCell("#");
 if (withSignal)
     webPrintLabelCell("signal");
 if (withAbbreviation)
     webPrintLabelCell("abr");
 struct slName *col;
 for (col = otherCols; col != NULL; col = col->next)
     webPrintLabelCell(col->name);
 if (withDescription)
     webPrintLabelCell("description");
@@ -194,62 +194,72 @@
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     printf("</TR><TR>\n");
     webPrintIntCell(++displayNo);
     printControlledVocabFields(row+1, fieldCount, fieldList, vocabFile, vocabHash);
     printMetadataForTable(row[0]);
     }
 
 
 sqlFreeResult(&sr);
 freez(&vocabFile);
 dyStringFree(&query);
 }
 
-static void printFactorSourceTableHits(struct bed *cluster, struct sqlConnection *conn,
+static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn,
 	char *sourceTable, char *inputTrackTable, 
 	struct slName *fieldList, boolean invert, char *vocab)
 /* Put out a lines in an html table that shows assayed sources that have hits in this
  * cluster, or if invert is set, that have misses. */
 {
 char *vocabFile = NULL;
 if (vocab)
     {
     vocabFile = cloneFirstWord(vocab);
     }
 
 /* Make the monster SQL query to get all assays*/
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, 
 	inputTrackTable);
 struct slName *field;
 for (field = fieldList; field != NULL; field = field->next)
     sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name);
 sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable);
 sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable);
 sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable);
 
 int displayNo = 0;
 int fieldCount = slCount(fieldList);
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     int sourceId = sqlUnsigned(row[0]);
-    double signal = cluster->expScores[sourceId];
-    boolean hit = (signal > 0);
+    boolean hit = FALSE;
+    int i;
+    double signal = 0.0;
+    for (i=0; i<cluster->expCount; i++)
+        {
+        if (cluster->expNums[i] == sourceId)
+            {
+            hit = TRUE;
+            signal = cluster->expScores[i];
+            break;
+            }
+        }
     if (hit ^ invert)
         {
 	printf("</TR><TR>\n");
 	webPrintIntCell(++displayNo);
 	if (!invert)
 	    webPrintDoubleCell(signal);
 	webPrintLinkCell(row[1]);
 	int i;
 	for (i=0; i<fieldCount; ++i)
 	    {
 	    char *fieldVal = row[i+3];
 	    if (vocab)
 	        {
 		char *link = controlledVocabLink(vocabFile, "term", 
 			fieldVal, fieldVal, fieldVal, "");
@@ -279,31 +289,31 @@
     {
     struct slName *fieldList = stringToSlNames(inputTableFieldDisplay);
     printClusterTableHeader(fieldList, FALSE, FALSE, FALSE);
     char *vocab = trackDbSetting(clusterTdb, "controlledVocabulary");
     char *inputTrackTable = trackDbRequiredSetting(clusterTdb, "inputTrackTable");
     printPeakClusterInputs(conn, inputTrackTable, fieldList, vocab);
     }
 else
     errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", 
                 clusterTdb->track);
 webPrintLinkTableEnd();
 hFreeConn(&conn);
 }
 
 void doPeakClusters(struct trackDb *tdb, char *item)
-/* Display detailed info about a cluster of peaks from other tracks. */
+/* Display detailed info about a cluster of DNase peaks from other tracks. */
 {
 int start = cartInt(cart, "o");
 char *table = tdb->table;
 int rowOffset = hOffsetPastBin(database, seqName, table);
 char query[256];
 struct sqlResult *sr;
 char **row;
 struct bed *cluster = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 
 cartWebStart(cart, database, "%s item details", tdb->shortLabel);
 sqlSafef(query, sizeof(query),
 	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
 	table, item, seqName, start);
 sr = sqlGetResult(conn, query);
@@ -337,49 +347,49 @@
     else
 	errAbort("Missing required trackDb setting %s for track %s",
 	    "inputTableFieldDisplay", tdb->track);
     webPrintLinkTableEnd();
     }
 printf("<A HREF=\"%s&g=htcListItemsAssayed&table=%s\" TARGET_blank>", hgcPathAndSettings(),
 	tdb->track);
 printf("List all items assayed");
 printf("</A><BR>\n");
 webNewSection("Track Description");
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
 void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start)
-/* Display detailed info about a cluster of peaks from other tracks. */
+/* Display detailed info about a cluster of TFBS peaks from other tracks. */
 {
 int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
 char **row;
 struct sqlResult *sr;
 char query[256];
 char *motifTable = NULL;
 #ifdef TXCLUSTER_MOTIFS_TABLE
 motifTable = TXCLUSTER_MOTIFS_TABLE;
 #endif
 
 sqlSafef(query, sizeof(query),
 	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
 	tdb->table, item, seqName, start);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
-struct bed *cluster = NULL;
+struct factorSource *cluster = NULL;
 if (row != NULL)
-    cluster = bedLoadN(row + rowOffset, 15);
+    cluster = factorSourceLoad(row + rowOffset);
 sqlFreeResult(&sr);
 
 if (cluster != NULL)
     {
     char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable");
     struct dnaMotif *motif = NULL;
     struct dnaSeq **seqs = NULL;
     struct bed6FloatScore *hits = NULL;
 
     if(motifTable != NULL && sqlTableExists(conn, motifTable))
         {
         struct sqlResult *sr;
         int rowOffset;
         char where[256];