f64ab211b58975370b211059f34d198164154823 kate Sun Jul 21 22:25:16 2013 -0700 Create factorSource table, format BED5+, for more efficient representation of TFBS clusters. #10097 diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c index fd82de1..b2aa75f 100644 --- src/hg/hgc/peakClusters.c +++ src/hg/hgc/peakClusters.c @@ -1,43 +1,43 @@ /* Stuff to display details on tracks that are clusters of peaks (items) in other tracks. * In particular peaks in either ENCODE narrowPeak or broadPeak settings, and residing in * composite tracks. * * These come in two main forms currently: * DNAse hypersensitive clusters - peaks clustered across cell lines stored in bed 5 * with no special type * Transcription Factor Binding Sites (TFBS) - peaks from transcription factor ChIP-seq * across a number of transcription factors and cell lines. Stored in bed 15 + * (or BED5+ factorSource format) * plus sourceTable with type factorSource */ #include "common.h" #include "hash.h" #include "jksql.h" #include "obscure.h" #include "hCommon.h" #include "hdb.h" #include "web.h" #include "cart.h" #include "trackDb.h" #include "hui.h" #include "hgc.h" #include "encode/encodePeak.h" #include "expRecord.h" #include "bed6FloatScore.h" #include "ra.h" -#include "txCluster.h" - +#include "factorSource.h" static void printClusterTableHeader(struct slName *otherCols, boolean withAbbreviation, boolean withDescription, boolean withSignal) /* Print out header fields table of tracks in cluster */ { webPrintLabelCell("#"); if (withSignal) webPrintLabelCell("signal"); if (withAbbreviation) webPrintLabelCell("abr"); struct slName *col; for (col = otherCols; col != NULL; col = col->next) webPrintLabelCell(col->name); if (withDescription) webPrintLabelCell("description"); @@ -194,62 +194,72 @@ char **row; while ((row = sqlNextRow(sr)) != NULL) { printf("</TR><TR>\n"); webPrintIntCell(++displayNo); printControlledVocabFields(row+1, fieldCount, fieldList, vocabFile, vocabHash); printMetadataForTable(row[0]); } sqlFreeResult(&sr); freez(&vocabFile); dyStringFree(&query); } -static void printFactorSourceTableHits(struct bed *cluster, struct sqlConnection *conn, +static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn, char *sourceTable, char *inputTrackTable, struct slName *fieldList, boolean invert, char *vocab) /* Put out a lines in an html table that shows assayed sources that have hits in this * cluster, or if invert is set, that have misses. */ { char *vocabFile = NULL; if (vocab) { vocabFile = cloneFirstWord(vocab); } /* Make the monster SQL query to get all assays*/ struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, inputTrackTable); struct slName *field; for (field = fieldList; field != NULL; field = field->next) sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name); sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable); int displayNo = 0; int fieldCount = slCount(fieldList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int sourceId = sqlUnsigned(row[0]); - double signal = cluster->expScores[sourceId]; - boolean hit = (signal > 0); + boolean hit = FALSE; + int i; + double signal = 0.0; + for (i=0; i<cluster->expCount; i++) + { + if (cluster->expNums[i] == sourceId) + { + hit = TRUE; + signal = cluster->expScores[i]; + break; + } + } if (hit ^ invert) { printf("</TR><TR>\n"); webPrintIntCell(++displayNo); if (!invert) webPrintDoubleCell(signal); webPrintLinkCell(row[1]); int i; for (i=0; i<fieldCount; ++i) { char *fieldVal = row[i+3]; if (vocab) { char *link = controlledVocabLink(vocabFile, "term", fieldVal, fieldVal, fieldVal, ""); @@ -279,31 +289,31 @@ { struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); printClusterTableHeader(fieldList, FALSE, FALSE, FALSE); char *vocab = trackDbSetting(clusterTdb, "controlledVocabulary"); char *inputTrackTable = trackDbRequiredSetting(clusterTdb, "inputTrackTable"); printPeakClusterInputs(conn, inputTrackTable, fieldList, vocab); } else errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", clusterTdb->track); webPrintLinkTableEnd(); hFreeConn(&conn); } void doPeakClusters(struct trackDb *tdb, char *item) -/* Display detailed info about a cluster of peaks from other tracks. */ +/* Display detailed info about a cluster of DNase peaks from other tracks. */ { int start = cartInt(cart, "o"); char *table = tdb->table; int rowOffset = hOffsetPastBin(database, seqName, table); char query[256]; struct sqlResult *sr; char **row; struct bed *cluster = NULL; struct sqlConnection *conn = hAllocConn(database); cartWebStart(cart, database, "%s item details", tdb->shortLabel); sqlSafef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", table, item, seqName, start); sr = sqlGetResult(conn, query); @@ -337,49 +347,49 @@ else errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", tdb->track); webPrintLinkTableEnd(); } printf("<A HREF=\"%s&g=htcListItemsAssayed&table=%s\" TARGET_blank>", hgcPathAndSettings(), tdb->track); printf("List all items assayed"); printf("</A><BR>\n"); webNewSection("Track Description"); printTrackHtml(tdb); hFreeConn(&conn); } void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start) -/* Display detailed info about a cluster of peaks from other tracks. */ +/* Display detailed info about a cluster of TFBS peaks from other tracks. */ { int rowOffset = hOffsetPastBin(database, seqName, tdb->table); char **row; struct sqlResult *sr; char query[256]; char *motifTable = NULL; #ifdef TXCLUSTER_MOTIFS_TABLE motifTable = TXCLUSTER_MOTIFS_TABLE; #endif sqlSafef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", tdb->table, item, seqName, start); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); -struct bed *cluster = NULL; +struct factorSource *cluster = NULL; if (row != NULL) - cluster = bedLoadN(row + rowOffset, 15); + cluster = factorSourceLoad(row + rowOffset); sqlFreeResult(&sr); if (cluster != NULL) { char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); struct dnaMotif *motif = NULL; struct dnaSeq **seqs = NULL; struct bed6FloatScore *hits = NULL; if(motifTable != NULL && sqlTableExists(conn, motifTable)) { struct sqlResult *sr; int rowOffset; char where[256];