610f3effe22886348ae5fb6180cee7871303683d kate Fri Nov 1 14:25:05 2013 -0700 1. Limit motif hit info to highest scoring 2. Move motif score info near Motif panel. 3. Add strand info to motif hit description. refs #9092 diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c index a8bb4a5..3d1a190 100644 --- src/hg/hgc/peakClusters.c +++ src/hg/hgc/peakClusters.c @@ -366,129 +366,146 @@ tdb->track); printf("List all items assayed"); printf("
\n"); webNewSection("Track Description"); printTrackHtml(tdb); hFreeConn(&conn); } void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start) /* Display detailed info about a cluster of TFBS peaks from other tracks. */ { int rowOffset = hOffsetPastBin(database, seqName, tdb->table); char **row; struct sqlResult *sr; char query[256]; -char *motifTable = NULL; -// TODO: deal with hard-coded table names -//#ifdef TXCLUSTER_MOTIFS_TABLE -motifTable = TXCLUSTER_MOTIFS_TABLE; -//#endif +char *motifTable = trackDbSetting(tdb, "motifTable"); // localizations +char *motifPwmTable = trackDbSetting(tdb, "motifPwmTable"); // PWM used to draw sequence logo sqlSafef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", tdb->table, item, seqName, start); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); struct factorSource *cluster = NULL; if (row != NULL) cluster = factorSourceLoad(row + rowOffset); sqlFreeResult(&sr); -if (cluster != NULL) - { +if (cluster == NULL) + errAbort("Error loading cluster from track %s", tdb->track); + char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); struct dnaMotif *motif = NULL; struct dnaSeq **seqs = NULL; struct bed6FloatScore *hits = NULL; if (motifTable != NULL && sqlTableExists(conn, motifTable)) { struct sqlResult *sr; int rowOffset; char where[256]; - motif = loadDnaMotif(item, "transRegCodeMotif"); + if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable)) + motif = loadDnaMotif(item, motifPwmTable); + + #define HIGHEST_SCORING + #ifdef HIGHEST_SCORING + sqlSafefFrag(where, sizeof(where), "name = '%s' order by score desc", item); + #else sqlSafefFrag(where, sizeof(where), "name = '%s'", item); - sr = hRangeQuery(conn, "wgEncodeRegTfbsClusteredMotifs", cluster->chrom, cluster->chromStart, + #endif + sr = hRangeQuery(conn, motifTable, cluster->chrom, cluster->chromStart, cluster->chromEnd, where, &rowOffset); + #ifdef HIGHEST_SCORING + if ((row = sqlNextRow(sr)) != NULL) + #else while ((row = sqlNextRow(sr)) != NULL) + #endif { struct bed6FloatScore *hit = NULL; AllocVar(hit); hit->chromStart = sqlUnsigned(row[rowOffset + 1]); hit->chromEnd = sqlUnsigned(row[rowOffset + 2]); hit->score = sqlFloat(row[rowOffset + 4]); hit->strand[0] = row[rowOffset + 5][0]; slAddHead(&hits, hit); } sqlFreeResult(&sr); } char *factorLink = cluster->name; char *vocab = trackDbSetting(tdb, "controlledVocabulary"); if (vocab != NULL) { char *file = cloneFirstWord(vocab); factorLink = controlledVocabLink(file, "term", factorLink, factorLink, factorLink, ""); } printf("Factor: %s
\n", factorLink); printf("Cluster Score (out of 1000): %d
\n", cluster->score); +printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL); + +int hitCount = 0; +if (hits != NULL) + hitCount = slCount(hits); + if (motif != NULL && hits != NULL) { struct bed6FloatScore *hit = NULL; int i; seqs = needMem(sizeof(struct dnaSeq *) * slCount(hits)); char posLink[1024]; - for (hit = hits, i = 0; hit != NULL; hit = hit->next, i++) - { char query[256]; float maxScore = -1; - sqlSafef(query, sizeof(query), - "select max(score) from %s where name = '%s'", - "wgEncodeRegTfbsClusteredMotifs", item); + "select max(score) from %s where name = '%s'", motifTable, item); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { if(!isEmpty(row[0])) { maxScore = sqlFloat(row[0]); } } sqlFreeResult(&sr); + puts("

"); + for (hit = hits, i = 0; hit != NULL; hit = hit->next, i++) + { + struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower); if(hit->strand[0] == '-') reverseComplement(seq->dna, seq->size); seqs[i] = seq; // TODO: move to hgc.c (with other pos printers) safef(posLink, sizeof(posLink),"%s:%d-%d", hgTracksPathAndSettings(), database, cluster->chrom, hit->chromStart+1, hit->chromEnd, cluster->chrom, hit->chromStart+1, hit->chromEnd); - printf("Motif Score #%d: %.2f (max: %.2f) at %s
", i + 1, - hit->score, maxScore, posLink); + printf("Motif Score"); + if (hitCount > 1) + printf("#%d", i + 1); + printf(": %.2f (%s max: %.2f) at %s %c
", + hit->score, cluster->name, maxScore, posLink, (int)hit->strand[0]); } } - printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL); if (seqs != NULL) { - motifMultipleHitsSection(seqs, slCount(hits), motif); + motifMultipleHitsSection(seqs, hitCount, motif); } /* Get list of tracks we'll look through for input. */ char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); sqlSafef(query, sizeof(query), "select tableName from %s where factor='%s' order by source", inputTrackTable, cluster->name); /* Next do the lists of hits and misses. We have the hits from the non-zero signals in * cluster->expScores. We need to figure out the sources actually assayed though * some other way. We'll do this by one of two techniques. */ char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay"); if (inputTableFieldDisplay != NULL) { struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); @@ -507,17 +524,16 @@ printClusterTableHeader(fieldList, TRUE, FALSE, FALSE); printFactorSourceTableHits(cluster, conn, sourceTable, inputTrackTable, fieldList, TRUE, vocab); webPrintLinkTableEnd(); } else { errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", tdb->track); } webNewSection("Table of abbreviations for cells"); hPrintFactorSourceAbbrevTable(conn, tdb); webNewSection("Track Description"); } -}