09d617c6c75f512a0952d6fde7f39793815377f2
kate
  Thu Dec 13 13:18:00 2018 -0800
Add support for encode3/4 TFBS clusters. refs #21139

diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c
index 28f85d3..39f4254 100644
--- src/hg/hgc/peakClusters.c
+++ src/hg/hgc/peakClusters.c
@@ -233,30 +233,34 @@
     {
     vocabFile = cloneFirstWord(vocab);
     }
 
 /* Make the monster SQL query to get all assays*/
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, 
 	inputTrackTable);
 struct slName *field;
 for (field = fieldList; field != NULL; field = field->next)
     sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name);
 sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable);
 sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable);
 sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable);
 
+boolean encodeStanford = FALSE;
+if (startsWith("encode3", sourceTable) || startsWith("encode4", sourceTable))
+    encodeStanford = TRUE;
+
 int displayNo = 0;
 int fieldCount = slCount(fieldList);
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     int sourceId = sqlUnsigned(row[0]);
     boolean hit = FALSE;
     int i;
     double signal = 0.0;
     for (i=0; i<cluster->expCount; i++)
         {
         if (cluster->expNums[i] == sourceId)
             {
             hit = TRUE;
@@ -274,31 +278,46 @@
 	int i = 0;
         // find position of CV metadata in field list
         int offset = 3;
         struct slName *field = fieldList;
 	for (i=0; i<fieldCount && field != NULL; ++i, field = field->next)
 	    {
 	    char *fieldVal = row[i+offset];
 	    if (vocab)
 	        {
                 char *link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal));
 		webPrintLinkCell(link);
 		}
 	    else
 		webPrintLinkCell(fieldVal);
 	    }
-	printMetadataForTable(row[2]);
+        char *table = row[2];
+        if (encodeStanford)
+            {
+            char *file = stringIn("ENCFF", table);
+            if (!file)
+                webPrintLinkCell(table);
+            else
+                {
+                webPrintLinkCellStart();
+                printf("<A target='_blank'"
+                        "href='https://www.encodeproject.org/files/%s'>%s</A>", file, file);
+                webPrintLinkCellEnd();
+               } 
+            }
+        else
+            printMetadataForTable(table);
 	}
     }
 sqlFreeResult(&sr);
 freez(&vocabFile);
 dyStringFree(&query);
 }
 
 void doPeakClusterListItemsAssayed()
 /* Put up a page that shows all experiments associated with a cluster track. */
 {
 struct trackDb *clusterTdb = tdbForTableArg();
 cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel);
 struct sqlConnection *conn = hAllocConn(database);
 
 char *inputTableFieldDisplay = trackDbSetting(clusterTdb, "inputTableFieldDisplay");
@@ -447,69 +466,64 @@
 
 struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower);
 if (seq == NULL)
     return;
 if (hit->strand[0] == '-')
     reverseComplement(seq->dna, seq->size);
 if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable))
     {
     motif = loadDnaMotif(hit->name, motifPwmTable);
     if (motif == NULL)
         return;
     motifLogoAndMatrix(&seq, 1, motif);
     }
 }
 
-void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start)
+void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start, int end)
 /* Display detailed info about a cluster of TFBS peaks from other tracks. */
 {
-int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
-char **row;
-struct sqlResult *sr;
-char query[256];
-
-sqlSafef(query, sizeof(query),
-	"select * from %s where name = '%s' and chrom = '%s' and chromStart = %d",
-	tdb->table, item, seqName, start);
-sr = sqlGetResult(conn, query);
-row = sqlNextRow(sr);
+char extraWhere[256];
+safef(extraWhere, sizeof extraWhere, "name='%s'", item);
+int rowOffset;
+struct sqlResult *sr = hRangeQuery(conn, tdb->table, seqName, start, end, extraWhere, &rowOffset);
+char **row = sqlNextRow(sr);
 struct factorSource *cluster = NULL;
 if (row != NULL)
     cluster = factorSourceLoad(row + rowOffset);
 sqlFreeResult(&sr);
 
 if (cluster == NULL)
     errAbort("Error loading cluster from track %s", tdb->track);
 
 char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable");
 
 char *factorLink = cluster->name;
 char *vocab = trackDbSetting(tdb, "controlledVocabulary");
 if (vocab != NULL)
     {
     char *file = cloneFirstWord(vocab);
     factorLink = wgEncodeVocabLink(file, "term", factorLink, factorLink, factorLink, "");
     }
 printf("<B>Factor:</B> %s<BR>\n", factorLink);
 printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score);
 printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, item);
 
-
 /* Get list of tracks we'll look through for input. */
 char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable");
-sqlSafef(query, sizeof(query), 
-    "select tableName from %s where factor='%s' order by source", inputTrackTable, 
+char query[256];
+sqlSafef(query, sizeof(query), "select tableName from %s where factor='%s' order by source", 
+                inputTrackTable, 
     cluster->name);
 
 /* Next do the lists of hits and misses.  We have the hits from the non-zero signals in
  * cluster->expScores.  We need to figure out the sources actually assayed though
  * some other way.  We'll do this by one of two techniques. */
 char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay");
 if (inputTableFieldDisplay != NULL)
     {
     struct slName *fieldList = stringToSlNames(inputTableFieldDisplay);
     char *vocab = trackDbSetting(tdb, "controlledVocabulary");
 
     /* In a new section put up list of hits. */
     webNewSection("Assays for %s in Cluster", cluster->name);
     webPrintLinkTableStart();
     printClusterTableHeader(fieldList, TRUE, FALSE, TRUE);