abbdd533a45a6a3c5bad8b903da484d1e12e7a9a kent Wed Mar 16 21:38:53 2011 -0700 Making it so factorSource tracks can list hits and misses based on the table of inputs rather than relying on the trackDb system. Needed now that integrated regulatory TFBS track is based on data not in other tracks yet. diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c index e8285b5..3f8eaae 100644 --- src/hg/hgc/peakClusters.c +++ src/hg/hgc/peakClusters.c @@ -47,52 +47,41 @@ { char *val = findGroupTagVal(tdb, tag); if (val == NULL) errAbort("Couldn't find %s in subGroups tag of %s", tag, tdb->track); return val; } char *findGroupLabel(struct trackDb *tdb, char *group) /* Given name of group, ruffle through all subGroupN tags, looking for one that * matches group */ { char *groupId = mustFindGroupTagVal(tdb, group); return compositeGroupLabel(tdb, group, groupId); } -static void printClusterTableHeader(struct slName *displayGroupList) +static void printClusterTableHeader(struct slName *otherCols, + boolean withDescription, boolean withSignal) /* Print out header fields table of tracks in cluster */ { webPrintLabelCell("#"); +if (withSignal) webPrintLabelCell("signal"); -struct slName *displayGroup; -for (displayGroup = displayGroupList; displayGroup != NULL; displayGroup = displayGroup->next) - { - webPrintLabelCell(displayGroup->name); - } -webPrintLabelCell("description"); -} - -static void printOutOfClusterTableHeader(struct slName *displayGroupList) -/* Print out header fields for table of tracks not in cluster */ -{ -webPrintLabelCell("#"); -struct slName *displayGroup; -for (displayGroup = displayGroupList; displayGroup != NULL; displayGroup = displayGroup->next) - { - webPrintLabelCell(displayGroup->name); - } +struct slName *col; +for (col = otherCols; col != NULL; col = col->next) + webPrintLabelCell(col->name); +if (withDescription) webPrintLabelCell("description"); } static void printTableInfo(struct trackDb *tdb, struct trackDb *clusterTdb, struct slName *displayGroupList) /* Print out info on table. */ { struct slName *displayGroup; for (displayGroup = displayGroupList; displayGroup != NULL; displayGroup = displayGroup->next) { char *label = findGroupLabel(tdb, displayGroup->name); char *linkedLabel = compositeLabelWithVocabLink(database, tdb, tdb, displayGroup->name, label); webPrintLinkCell(linkedLabel); } webPrintLinkCellStart(); @@ -140,57 +129,108 @@ } if (invert) { if (!peakList) showOnePeakOrMiss(tdb, clusterTdb, NULL, displayGroupList, pRowIx); } else { if (peakList) showOnePeakOrMiss(tdb, clusterTdb, peakList, displayGroupList, pRowIx); } sqlFreeResult(&sr); return result; } +static void printClusterTableHits(struct bed *cluster, struct sqlConnection *conn, + char *sourceTable, char *inputTrackTable, + struct slName *fieldList, boolean invert, char *vocab) +/* Put out a lines in an html table that shows assayed sources that have hits in this + * cluster, or if invert is set, that have misses. */ +{ +/* Make the monster SQL query to get all assays*/ +struct dyString *query = dyStringNew(0); +dyStringPrintf(query, "select %s.id", sourceTable); +struct slName *field; +for (field = fieldList; field != NULL; field = field->next) + dyStringPrintf(query, ",%s.%s", inputTrackTable, field->name); +dyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable); +dyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable); +dyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable); + +int displayNo = 0; +int fieldCount = slCount(fieldList); +struct sqlResult *sr = sqlGetResult(conn, query->string); +char **row; +while ((row = sqlNextRow(sr)) != NULL) + { + int sourceId = sqlUnsigned(row[0]); + double signal = cluster->expScores[sourceId]; + boolean hit = (signal > 0); + if (hit ^ invert) + { + printf("</TR><TR>\n"); + webPrintIntCell(++displayNo); + if (!invert) + webPrintDoubleCell(signal); + int i; + for (i=0; i<fieldCount; ++i) + { + char *fieldVal = row[i+1]; + if (vocab) + { + char *file = cloneFirstWord(vocab); + char *link = controlledVocabLink(file, "term", fieldVal, fieldVal, fieldVal, ""); + webPrintLinkCell(link); + } + else + webPrintLinkCell(fieldVal); + } + } + } +sqlFreeResult(&sr); +dyStringFree(&query); +} + + static struct slName *findMatchingSubtracks(struct trackDb *tdb) /* Find subtracks that match inputTracks tags. */ { /* Just list look up tableName in inputTrackTable and return the list. */ char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); struct sqlConnection *conn = hAllocConn(database); char query[256]; safef(query, sizeof(query), "select tableName from %s order by source", inputTrackTable); struct slName *matchTrackList = sqlQuickList(conn, query); hFreeConn(&conn); return matchTrackList; } void doPeakClusterListItemsAssayed() /* Put up a page that shows all experiments associated with a cluster track. */ { struct trackDb *clusterTdb = tdbForTableArg(); struct slName *matchTrackList = findMatchingSubtracks(clusterTdb); struct slName *matchTrack; cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel); char *inputTracksSubgroupDisplay = trackDbRequiredSetting(clusterTdb, "inputTracksSubgroupDisplay"); struct slName *displayGroupList = stringToSlNames(inputTracksSubgroupDisplay); webPrintLinkTableStart(); -printOutOfClusterTableHeader(displayGroupList); +printClusterTableHeader(displayGroupList, TRUE, FALSE); int rowIx = 0; for (matchTrack = matchTrackList; matchTrack != NULL; matchTrack = matchTrack->next) { struct trackDb *tdb = hashFindVal(trackHash, matchTrack->name); showOnePeakOrMiss(tdb, clusterTdb, NULL, displayGroupList, &rowIx); } webPrintLinkTableEnd(); cartWebEnd(); } void doPeakClusters(struct trackDb *tdb, char *item) /* Display detailed info about a cluster of peaks from other tracks. */ { int start = cartInt(cart, "o"); char *table = tdb->table; @@ -217,31 +257,31 @@ char *inputTracksSubgroupDisplay = trackDbRequiredSetting(tdb, "inputTracksSubgroupDisplay"); struct slName *displayGroupList = stringToSlNames(inputTracksSubgroupDisplay); /* Get list of tracks that match criteria. */ struct slName *matchTrackList = findMatchingSubtracks(tdb); struct slName *matchTrack; /* Print out some information about the cluster overall. */ printf("<B>Items in Cluster:</B> %s of %d<BR>\n", cluster->name, slCount(matchTrackList)); printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score); printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL); /* In a new section put up list of hits. */ webNewSection("List of Items in Cluster"); webPrintLinkTableStart(); - printClusterTableHeader(displayGroupList); + printClusterTableHeader(displayGroupList, TRUE, TRUE); int rowIx = 0; for (matchTrack = matchTrackList; matchTrack != NULL; matchTrack = matchTrack->next) { showMatchingTrack(matchTrack->name, cluster, conn, tdb, displayGroupList, FALSE, &rowIx); } webPrintLinkTableEnd(); } printf("<A HREF=\"%s&g=htcListItemsAssayed&table=%s\" TARGET_blank>", hgcPathAndSettings(), tdb->track); printf("List all items assayed"); printf("</A><BR>\n"); webNewSection("Track Description"); printTrackHtml(tdb); cartWebEnd(); @@ -275,30 +315,31 @@ motifTable = TXCLUSTER_MOTIFS_TABLE; #endif safef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", tdb->table, item, seqName, start); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); struct bed *cluster = NULL; if (row != NULL) cluster = bedLoadN(row + rowOffset, 15); sqlFreeResult(&sr); if (cluster != NULL) { + char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); struct dnaMotif *motif = NULL; struct dnaSeq **seqs = NULL; struct bed6FloatScore *hits = NULL; if(motifTable != NULL && sqlTableExists(conn, motifTable)) { struct sqlResult *sr; int rowOffset; char where[256]; motif = loadDnaMotif(item, "transRegCodeMotif"); safef(where, sizeof(where), "name = '%s'", item); sr = hRangeQuery(conn, "wgEncodeRegTfbsClusteredMotifs", cluster->chrom, cluster->chromStart, cluster->chromEnd, where, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) @@ -345,51 +386,86 @@ } } printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL); if(seqs != NULL) { motifMultipleHitsSection(seqs, slCount(hits), motif); } /* Get list of tracks we'll look through for input. */ char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); safef(query, sizeof(query), "select tableName from %s where factor='%s' order by source", inputTrackTable, cluster->name); struct slName *matchTrackList = sqlQuickList(conn, query); struct slName *matchTrack; + /* Next do the lists of hits and misses. We have the hits from the non-zero signals in + * cluster->expScores. We need to figure out the sources actually assayed though + * some other way. We'll do this by one of two techniques. + * If the inputTracksSubgroupDisplay is set, we'll try and figure out what was + * assayed by looking at the subgroup stuff in trackDb, which works if everythings + * part of a composite. If not, we'll use the inputTrackTable. */ /* Get list of subgroups to display */ - char *inputTracksSubgroupDisplay = trackDbRequiredSetting(tdb, "inputTracksSubgroupDisplay"); + char *inputTracksSubgroupDisplay = trackDbSetting(tdb, "inputTracksSubgroupDisplay"); + char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay"); + if (inputTracksSubgroupDisplay != NULL) + { struct slName *displayGroupList = stringToSlNames(inputTracksSubgroupDisplay); /* In a new section put up list of hits. */ webNewSection("List of %s Items in Cluster", cluster->name); webPrintLinkTableStart(); - printClusterTableHeader(displayGroupList); + printClusterTableHeader(displayGroupList, TRUE, TRUE); int rowIx = 0; for (matchTrack = matchTrackList; matchTrack != NULL; matchTrack = matchTrack->next) { showMatchingTrack(matchTrack->name, cluster, conn, tdb, displayGroupList, FALSE, &rowIx); } webPrintLinkTableEnd(); webNewSection("List of cells assayed with %s but without hits in cluster", cluster->name); webPrintLinkTableStart(); - printOutOfClusterTableHeader(displayGroupList); + printClusterTableHeader(displayGroupList, TRUE, FALSE); rowIx = 0; for (matchTrack = matchTrackList; matchTrack != NULL; matchTrack = matchTrack->next) { showMatchingTrack(matchTrack->name, cluster, conn, tdb, displayGroupList, TRUE, &rowIx); } webPrintLinkTableEnd(); + } + else if (inputTableFieldDisplay != NULL) + { + struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); + char *vocab = trackDbSetting(tdb, "controlledVocabulary"); + + /* In a new section put up list of hits. */ + webNewSection("List of %s Items in Cluster", cluster->name); + webPrintLinkTableStart(); + printClusterTableHeader(fieldList, FALSE, TRUE); + printClusterTableHits(cluster, conn, sourceTable, + inputTrackTable, fieldList, FALSE, vocab); + webPrintLinkTableEnd(); + + webNewSection("List of cells assayed with %s but without hits in cluster", cluster->name); + webPrintLinkTableStart(); + printClusterTableHeader(fieldList, FALSE, FALSE); + printClusterTableHits(cluster, conn, sourceTable, + inputTrackTable, fieldList, TRUE, vocab); + webPrintLinkTableEnd(); + } + else + { + errAbort("Missing required trackDb setting %s or %s for track %s", + "inputTracksSubgroupDisplay", "inputTableFieldDisplay", tdb->track); + + } webNewSection("Table of abbreviations for cells"); - char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); hPrintAbbreviationTable(conn, sourceTable, "Cell Type"); webNewSection("Track Description"); } }