09d617c6c75f512a0952d6fde7f39793815377f2 kate Thu Dec 13 13:18:00 2018 -0800 Add support for encode3/4 TFBS clusters. refs #21139 diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c index 28f85d3..39f4254 100644 --- src/hg/hgc/peakClusters.c +++ src/hg/hgc/peakClusters.c @@ -233,30 +233,34 @@ { vocabFile = cloneFirstWord(vocab); } /* Make the monster SQL query to get all assays*/ struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, inputTrackTable); struct slName *field; for (field = fieldList; field != NULL; field = field->next) sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name); sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable); +boolean encodeStanford = FALSE; +if (startsWith("encode3", sourceTable) || startsWith("encode4", sourceTable)) + encodeStanford = TRUE; + int displayNo = 0; int fieldCount = slCount(fieldList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int sourceId = sqlUnsigned(row[0]); boolean hit = FALSE; int i; double signal = 0.0; for (i=0; i<cluster->expCount; i++) { if (cluster->expNums[i] == sourceId) { hit = TRUE; @@ -274,31 +278,46 @@ int i = 0; // find position of CV metadata in field list int offset = 3; struct slName *field = fieldList; for (i=0; i<fieldCount && field != NULL; ++i, field = field->next) { char *fieldVal = row[i+offset]; if (vocab) { char *link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal)); webPrintLinkCell(link); } else webPrintLinkCell(fieldVal); } - printMetadataForTable(row[2]); + char *table = row[2]; + if (encodeStanford) + { + char *file = stringIn("ENCFF", table); + if (!file) + webPrintLinkCell(table); + else + { + webPrintLinkCellStart(); + printf("<A target='_blank'" + "href='https://www.encodeproject.org/files/%s'>%s</A>", file, file); + webPrintLinkCellEnd(); + } + } + else + printMetadataForTable(table); } } sqlFreeResult(&sr); freez(&vocabFile); dyStringFree(&query); } void doPeakClusterListItemsAssayed() /* Put up a page that shows all experiments associated with a cluster track. */ { struct trackDb *clusterTdb = tdbForTableArg(); cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel); struct sqlConnection *conn = hAllocConn(database); char *inputTableFieldDisplay = trackDbSetting(clusterTdb, "inputTableFieldDisplay"); @@ -447,69 +466,64 @@ struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower); if (seq == NULL) return; if (hit->strand[0] == '-') reverseComplement(seq->dna, seq->size); if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable)) { motif = loadDnaMotif(hit->name, motifPwmTable); if (motif == NULL) return; motifLogoAndMatrix(&seq, 1, motif); } } -void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start) +void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start, int end) /* Display detailed info about a cluster of TFBS peaks from other tracks. */ { -int rowOffset = hOffsetPastBin(database, seqName, tdb->table); -char **row; -struct sqlResult *sr; -char query[256]; - -sqlSafef(query, sizeof(query), - "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", - tdb->table, item, seqName, start); -sr = sqlGetResult(conn, query); -row = sqlNextRow(sr); +char extraWhere[256]; +safef(extraWhere, sizeof extraWhere, "name='%s'", item); +int rowOffset; +struct sqlResult *sr = hRangeQuery(conn, tdb->table, seqName, start, end, extraWhere, &rowOffset); +char **row = sqlNextRow(sr); struct factorSource *cluster = NULL; if (row != NULL) cluster = factorSourceLoad(row + rowOffset); sqlFreeResult(&sr); if (cluster == NULL) errAbort("Error loading cluster from track %s", tdb->track); char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); char *factorLink = cluster->name; char *vocab = trackDbSetting(tdb, "controlledVocabulary"); if (vocab != NULL) { char *file = cloneFirstWord(vocab); factorLink = wgEncodeVocabLink(file, "term", factorLink, factorLink, factorLink, ""); } printf("<B>Factor:</B> %s<BR>\n", factorLink); printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score); printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, item); - /* Get list of tracks we'll look through for input. */ char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); -sqlSafef(query, sizeof(query), - "select tableName from %s where factor='%s' order by source", inputTrackTable, +char query[256]; +sqlSafef(query, sizeof(query), "select tableName from %s where factor='%s' order by source", + inputTrackTable, cluster->name); /* Next do the lists of hits and misses. We have the hits from the non-zero signals in * cluster->expScores. We need to figure out the sources actually assayed though * some other way. We'll do this by one of two techniques. */ char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay"); if (inputTableFieldDisplay != NULL) { struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); char *vocab = trackDbSetting(tdb, "controlledVocabulary"); /* In a new section put up list of hits. */ webNewSection("Assays for %s in Cluster", cluster->name); webPrintLinkTableStart(); printClusterTableHeader(fieldList, TRUE, FALSE, TRUE);