e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c
index 148e4ed..df02db2 100644
--- src/hg/hgc/peakClusters.c
+++ src/hg/hgc/peakClusters.c
@@ -1,542 +1,545 @@
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 /* Stuff to display details on tracks that are clusters of peaks (items) in other tracks. 
  * In particular peaks in either ENCODE narrowPeak or broadPeak settings, and residing in
  * composite tracks. 
  *
  * These come in two main forms currently:
  *     DNAse hypersensitive clusters - peaks clustered across cell lines stored in bed 5
  *                                     with no special type
  *     Transcription Factor Binding Sites (TFBS) - peaks from transcription factor ChIP-seq
  *                   across a number of transcription factors and cell lines. Stored in bed 15
  *                   (or BED5+ factorSource format)
  *                   plus sourceTable with type factorSource */
  
 #include "common.h"
 #include "hash.h"
 #include "jksql.h"
 #include "obscure.h"
 #include "hCommon.h"
 #include "hdb.h"
 #include "web.h"
 #include "cart.h"
 #include "trackDb.h"
 #include "hui.h"
 #include "hgc.h"
 #include "encode/encodePeak.h"
 #include "expRecord.h"
 #include "bed6FloatScore.h"
 #include "ra.h"
 #include "jsHelper.h"
 #include "factorSource.h"
 
 static void printClusterTableHeader(struct slName *otherCols, 
 	boolean withAbbreviation, boolean withDescription, boolean withSignal)
 /* Print out header fields table of tracks in cluster */
 {
 webPrintLabelCell("#");
 if (withSignal)
     webPrintLabelCell("signal");
 if (withAbbreviation)
     webPrintLabelCell("abr");
 struct slName *col;
 for (col = otherCols; col != NULL; col = col->next)
     webPrintLabelCell(col->name);
 if (withDescription)
     webPrintLabelCell("description");
 webPrintLabelCell("more info");
 }
 
 static double getSignalAt(char *table, struct bed *cluster)
 /* Get (average) signal from table entries that overlap cluster */
 {
 struct sqlConnection *conn = hAllocConn(database);
 int count = 0;
 double sum = 0;
 if (sqlTableExists(conn, table))  // Table might be withdrawn from data thrash
     {
     int rowOffset;
     struct sqlResult *sr = hRangeQuery(conn, table, cluster->chrom, cluster->chromStart, 
 	    cluster->chromEnd, NULL, &rowOffset);
     int signalCol = sqlFieldColumn(sr, "signalValue");
     if (signalCol < 0)
 	internalErr();
     char **row;
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	count += 1;
 	sum += sqlDouble(row[signalCol]);
 	}
     sqlFreeResult(&sr);
     hFreeConn(&conn);
     }
 
 if (count > 0)
     return sum/count;
 else
     return 0;
 }
 
 static void printControlledVocabFields(char **row, int fieldCount, 
 	struct slName *fieldList, char *vocabFile, struct hash *vocabHash)
 /* Print out fields from row, linking them to controlled vocab if need be. */
 {
 int i;
 struct slName *field;
 for (i=0, field = fieldList; i<fieldCount; ++i, field = field->next)
     {
     char *fieldVal = row[i];
     if (vocabFile && hashLookup(vocabHash, field->name))
 	{
 	char *link = controlledVocabLink(vocabFile, "term", 
 		fieldVal, fieldVal, fieldVal, "");
 	webPrintLinkCell(link);
 	}
     else
 	webPrintLinkCell(fieldVal);
     }
 }
 
 struct hash *getVocabHash(char *fileName)
 /* Get vocabulary term hash */
 {
 struct hash *hash = raTagVals(fileName, "type");
 hashAdd(hash, "cellType", NULL);	/* Will the kludge never end, no, never! */
 return hash;
 }
 
 static void printMetadataForTable(char *table)
 /* If table exists, _and_ tdb associated with it exists, print out
  * a metadata link that expands on click.  Otherwise print "unavailable" */
 {
 webPrintLinkCellStart();
 struct trackDb *tdb = hashFindVal(trackHash, table);
 if (tdb == NULL)
     printf("%s info n/a", table);
 else
     compositeMetadataToggle(database, tdb, "metadata", TRUE, FALSE);
 webPrintLinkCellEnd();
 }
 
 static void queryInputTrackTable(struct dyString *query, char *inputTrackTable,
                                 struct slName *fieldList)
 /* Construct query in dyString to return contents of inputTrackTable ordered appropriately */
 {
 struct dyString *fields = dyStringNew(0);
 struct slName *field;
 sqlDyStringPrintf(query, "select tableName ");
 for (field = fieldList; field != NULL; field = field->next)
     sqlDyStringPrintfFrag(fields, ",%s", field->name);
 sqlDyStringPrintf(query, "%-s from %s", fields->string, inputTrackTable);
 if (fieldList != NULL)
     // skip leading comma
     dyStringPrintf(query, " order by %s", fields->string+1);
 dyStringFree(&fields);
 }
 
 static void printPeakClusterTableHits(struct bed *cluster, struct sqlConnection *conn,
 	char *inputTrackTable, struct slName *fieldList, char *vocab)
 /* Put out a lines in an html table that shows assayed sources that have hits in this
  * cluster, or if invert is set, that have misses. */
 {
 char *vocabFile = NULL;
 struct hash *vocabHash = NULL;
 if (vocab)
     {
     vocabFile = cloneFirstWord(vocab);
     vocabHash = getVocabHash(vocabFile);
     }
 
 /* Make the SQL query to get the table and all other fields we want to show
  * from inputTrackTable. */
 struct dyString *query = dyStringNew(0);
 queryInputTrackTable(query, inputTrackTable, fieldList);
 
 int displayNo = 0;
 int fieldCount = slCount(fieldList);
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *table = row[0];
     double signal = getSignalAt(table, cluster);
     if (signal != 0)
 	{
 	printf("</TR><TR>\n");
 	webPrintIntCell(++displayNo);
 	webPrintDoubleCell(signal);
 	printControlledVocabFields(row+1, fieldCount, fieldList, vocabFile, vocabHash);
 	printMetadataForTable(table);
 	}
     }
 sqlFreeResult(&sr);
 freez(&vocabFile);
 dyStringFree(&query);
 }
 
 static void printPeakClusterInputs(struct sqlConnection *conn,
 	char *inputTrackTable, struct slName *fieldList, char *vocab)
 /* Print out all input tables for clustering. */
 {
 char *vocabFile = NULL;
 struct hash *vocabHash = NULL;
 if (vocab)
     {
     vocabFile = cloneFirstWord(vocab);
     vocabHash = getVocabHash(vocabFile);
     }
 
 /* Make the SQL query to get the table and all other fields we want to show
  * from inputTrackTable. */
 struct dyString *query = dyStringNew(0);
 queryInputTrackTable(query, inputTrackTable, fieldList);
 
 int displayNo = 0;
 int fieldCount = slCount(fieldList);
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     printf("</TR><TR>\n");
     webPrintIntCell(++displayNo);
     printControlledVocabFields(row+1, fieldCount, fieldList, vocabFile, vocabHash);
     printMetadataForTable(row[0]);
     }
 
 
 sqlFreeResult(&sr);
 freez(&vocabFile);
 dyStringFree(&query);
 }
 
 static char *factorSourceVocabLink(char *vocabFile, char *fieldName, char *fieldVal)
 /* Add link to show controlled vocabulary entry for term.
  * Handles 'target' (factor) which is a special case, derived from Antibody entries */
 {
 char *vocabType = (sameString(fieldName, "target") || sameString(fieldName, "factor")) ?
                     "target" : "term";
 return controlledVocabLink(vocabFile, vocabType, fieldVal, fieldVal, fieldVal, "");
 }
 
 static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn,
 	char *sourceTable, char *inputTrackTable, 
 	struct slName *fieldList, boolean invert, char *vocab)
 /* Put out a lines in an html table that shows assayed sources that have hits in this
  * cluster, or if invert is set, that have misses. */
 {
 char *vocabFile = NULL;
 if (vocab)
     {
     vocabFile = cloneFirstWord(vocab);
     }
 
 /* Make the monster SQL query to get all assays*/
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, 
 	inputTrackTable);
 struct slName *field;
 for (field = fieldList; field != NULL; field = field->next)
     sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name);
 sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable);
 sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable);
 sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable);
 
 int displayNo = 0;
 int fieldCount = slCount(fieldList);
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     int sourceId = sqlUnsigned(row[0]);
     boolean hit = FALSE;
     int i;
     double signal = 0.0;
     for (i=0; i<cluster->expCount; i++)
         {
         if (cluster->expNums[i] == sourceId)
             {
             hit = TRUE;
             signal = cluster->expScores[i];
             break;
             }
         }
     if (hit ^ invert)
         {
 	printf("</TR><TR>\n");
 	webPrintIntCell(++displayNo);
 	if (!invert)
 	    webPrintDoubleCell(signal);
 	webPrintLinkCell(row[1]);
 	int i = 0;
         // find position of CV metadata in field list
         int offset = 3;
         struct slName *field = fieldList;
 	for (i=0; i<fieldCount && field != NULL; ++i, field = field->next)
 	    {
 	    char *fieldVal = row[i+offset];
 	    if (vocab)
 	        {
                 char *link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal));
 		webPrintLinkCell(link);
 		}
 	    else
 		webPrintLinkCell(fieldVal);
 	    }
 	printMetadataForTable(row[2]);
 	}
     }
 sqlFreeResult(&sr);
 freez(&vocabFile);
 dyStringFree(&query);
 }
 
 void doPeakClusterListItemsAssayed()
 /* Put up a page that shows all experiments associated with a cluster track. */
 {
 struct trackDb *clusterTdb = tdbForTableArg();
 cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel);
 struct sqlConnection *conn = hAllocConn(database);
 
 char *inputTableFieldDisplay = trackDbSetting(clusterTdb, "inputTableFieldDisplay");
 webPrintLinkTableStart();
 if (inputTableFieldDisplay)
     {
     struct slName *fieldList = stringToSlNames(inputTableFieldDisplay);
     printClusterTableHeader(fieldList, FALSE, FALSE, FALSE);
     char *vocab = trackDbSetting(clusterTdb, "controlledVocabulary");
     char *inputTrackTable = trackDbRequiredSetting(clusterTdb, "inputTrackTable");
     printPeakClusterInputs(conn, inputTrackTable, fieldList, vocab);
     }
 else
     errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", 
                 clusterTdb->track);
 webPrintLinkTableEnd();
 hFreeConn(&conn);
 }
 
 void doPeakClusters(struct trackDb *tdb, char *item)
 /* Display detailed info about a cluster of DNase peaks from other tracks. */
 {
 int start = cartInt(cart, "o");
 char *table = tdb->table;
 int rowOffset = hOffsetPastBin(database, seqName, table);
 char query[256];
 struct sqlResult *sr;
 char **row;
 struct bed *cluster = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 
 cartWebStart(cart, database, "%s item details", tdb->shortLabel);
 sqlSafef(query, sizeof(query),
 	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
 	table, item, seqName, start);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
 if (row != NULL)
     cluster = bedLoadN(row+rowOffset, 5);
 sqlFreeResult(&sr);
 
 if (cluster != NULL)
     {
     /* Get list of subgroups to display */
     char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay");
     if (inputTableFieldDisplay != NULL)
         {
 	struct slName *fieldList = stringToSlNames(inputTableFieldDisplay);
 	char *vocab = trackDbSetting(tdb, "controlledVocabulary");
 	char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable");
 
 	/* Print out some information about the cluster overall. */
 	printf("<B>Items in Cluster:</B> %s of %d<BR>\n", cluster->name, 
 	    sqlRowCount(conn, sqlCheckIdentifier(inputTrackTable)));
 	printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score);
 	printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL);
 
 	/* In a new section put up list of hits. */
 	webNewSection("List of Items in Cluster");
 	webPrintLinkTableStart();
 	printClusterTableHeader(fieldList, FALSE, FALSE, TRUE);
 	printPeakClusterTableHits(cluster, conn, inputTrackTable, fieldList, vocab);
 	}
     else
 	errAbort("Missing required trackDb setting %s for track %s",
 	    "inputTableFieldDisplay", tdb->track);
     webPrintLinkTableEnd();
     }
 printf("<A HREF=\"%s&g=htcListItemsAssayed&table=%s\" TARGET_blank>", hgcPathAndSettings(),
 	tdb->track);
 printf("List all items assayed");
 printf("</A><BR>\n");
 webNewSection("Track Description");
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
 void doClusterMotifDetails(struct sqlConnection *conn, struct trackDb *tdb, 
                                 struct factorSource *cluster)
 /* Display details about TF binding motif(s) in cluster */
 {
 char *motifTable = trackDbSetting(tdb, "motifTable");         // localizations
 char *motifPwmTable = trackDbSetting(tdb, "motifPwmTable");   // PWM used to draw sequence logo
 char *motifMapTable = trackDbSetting(tdb, "motifMapTable");   // map target to motif
 struct slName *motifNames = NULL, *mn; // list of canonical motifs for the factor
 struct dnaMotif *motif = NULL;
 struct bed6FloatScore *hit = NULL, *maxHit = NULL;
 char **row;
 char query[256];
 
 if (motifTable != NULL && sqlTableExists(conn, motifTable))
     {
     struct sqlResult *sr;
     int rowOffset;
     char where[256];
 
     if (motifMapTable == NULL || !sqlTableExists(conn, motifMapTable))
         {
         // Assume cluster name is motif name if there is no map table
         motifNames = slNameNew(cluster->name);
         }
     else
         {
         sqlSafef(query, sizeof(query),
                 "select motif from %s where target = '%s'", motifMapTable, cluster->name);
         char *ret = sqlQuickString(conn, query);
         if (ret == NULL)
             {
             // missing target from table -- no canonical motif
             webNewEmptySection();
             return;
             }
         motifNames = slNameListFromString(ret, ',');
         }
     for (mn = motifNames; mn != NULL; mn = mn->next)
         {
         sqlSafefFrag(where, sizeof(where), "name='%s' order by score desc limit 1", mn->name);
         sr = hRangeQuery(conn, motifTable, cluster->chrom, cluster->chromStart,
                      cluster->chromEnd, where, &rowOffset);
         if ((row = sqlNextRow(sr)) != NULL)
             {
             hit = bed6FloatScoreLoad(row + rowOffset);
             if (maxHit == NULL || maxHit->score < hit->score)
                 maxHit = hit;
             }
         sqlFreeResult(&sr);
         }
     }
 if (maxHit == NULL)
     {
     // Maintain table layout
     webNewEmptySection();
     return;
     }
 hit = maxHit;
 
 webNewSection("Canonical Motif in Cluster");
 char posLink[1024];
 safef(posLink, sizeof(posLink),"<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</a>",
         hgTracksPathAndSettings(), database, 
             cluster->chrom, hit->chromStart+1, hit->chromEnd,
             cluster->chrom, hit->chromStart+1, hit->chromEnd);
 printf("<b>Motif Name:</b>  %s<br>\n", hit->name);
 printf("<b>Motif Score");
 printf(":</b>  %.2f<br>\n", hit->score);
 printf("<b>Motif Position:</b> %s<br>\n", posLink);
 printf("<b>Motif Strand:</b> %c<br>\n", (int)hit->strand[0]);
 
 struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower);
 if (seq == NULL)
     return;
 if (hit->strand[0] == '-')
     reverseComplement(seq->dna, seq->size);
 if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable))
     {
     motif = loadDnaMotif(hit->name, motifPwmTable);
     if (motif == NULL)
         return;
     motifLogoAndMatrix(&seq, 1, motif);
     }
 }
 
 void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start)
 /* Display detailed info about a cluster of TFBS peaks from other tracks. */
 {
 int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
 char **row;
 struct sqlResult *sr;
 char query[256];
 
 sqlSafef(query, sizeof(query),
 	"select * from %s where name = '%s' and chrom = '%s' and chromStart = %d",
 	tdb->table, item, seqName, start);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
 struct factorSource *cluster = NULL;
 if (row != NULL)
     cluster = factorSourceLoad(row + rowOffset);
 sqlFreeResult(&sr);
 
 if (cluster == NULL)
     errAbort("Error loading cluster from track %s", tdb->track);
 
 char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable");
 
 char *factorLink = cluster->name;
 char *vocab = trackDbSetting(tdb, "controlledVocabulary");
 if (vocab != NULL)
     {
     char *file = cloneFirstWord(vocab);
     factorLink = controlledVocabLink(file, "term", factorLink, factorLink, factorLink, "");
     }
 printf("<B>Factor:</B> %s<BR>\n", factorLink);
 printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score);
 printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL);
 
 
 /* Get list of tracks we'll look through for input. */
 char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable");
 sqlSafef(query, sizeof(query), 
     "select tableName from %s where factor='%s' order by source", inputTrackTable, 
     cluster->name);
 
 /* Next do the lists of hits and misses.  We have the hits from the non-zero signals in
  * cluster->expScores.  We need to figure out the sources actually assayed though
  * some other way.  We'll do this by one of two techniques. */
 char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay");
 if (inputTableFieldDisplay != NULL)
     {
     struct slName *fieldList = stringToSlNames(inputTableFieldDisplay);
     char *vocab = trackDbSetting(tdb, "controlledVocabulary");
 
     /* In a new section put up list of hits. */
     webNewSection("Assays for %s in Cluster", cluster->name);
     webPrintLinkTableStart();
     printClusterTableHeader(fieldList, TRUE, FALSE, TRUE);
     printFactorSourceTableHits(cluster, conn, sourceTable, 
             inputTrackTable, fieldList, FALSE, vocab);
     webPrintLinkTableEnd();
 
     webNewSectionHeaderStart(TRUE);
     char sectionTitle[128];
     safef(sectionTitle, 
             sizeof(sectionTitle),"Assays for %s Without Hits in Cluster", cluster->name);
     jsBeginCollapsibleSectionOldStyle(cart, tdb->track, "cellNoHits", sectionTitle, FALSE);
     webNewSectionHeaderEnd();
     webPrintLinkTableStart();
     printClusterTableHeader(fieldList, TRUE, FALSE, FALSE);
     printFactorSourceTableHits(cluster, conn, sourceTable, 
             inputTrackTable, fieldList, TRUE, vocab);
     webPrintLinkTableEnd();
     jsEndCollapsibleSection();
     }
 else
     {
     errAbort("Missing required trackDb setting %s for track %s",
         "inputTableFieldDisplay", tdb->track);
     }
 webNewSectionHeaderStart(TRUE);
 jsBeginCollapsibleSectionOldStyle(cart, tdb->track, "cellSources", "Cell Abbreviations", FALSE);
 webNewSectionHeaderEnd();
 hPrintFactorSourceAbbrevTable(conn, tdb);
 jsEndCollapsibleSection();
 
 doClusterMotifDetails(conn, tdb, cluster);
 }