6175649026c35358cc0871c3f2cdf39328736885 markd Sun Oct 30 20:44:31 2022 -0700 initial implementation of all gencode transcript rank filters, not GUI yet diff --git src/hg/hgTracks/gencodeTracks.c src/hg/hgTracks/gencodeTracks.c index 6fd9684..f3d71db 100644 --- src/hg/hgTracks/gencodeTracks.c +++ src/hg/hgTracks/gencodeTracks.c @@ -1,21 +1,22 @@ /* gencodeTracks - ENCODE GENCODE Genes tracks for both pilot and production ENCODE. * although these are used fundamentally different approaches to display */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" +#include "gencodeTracksCommon.h" #include "hgTracks.h" #include "hdb.h" #include "gencodeIntron.h" #include "genePredReader.h" #include "genePred.h" #include "encode/wgEncodeGencodeAttrs.h" /* item label symbolic names and constants. This must be * in sync with lib/hui.c:gencodeLabelControls() */ enum /* bit set of item labels */ { ITEM_LABEL_GENE_NAME = 0x01, ITEM_LABEL_GENE_ID = 0x02, ITEM_LABEL_TRANSCRIPT_ID = 0x04 @@ -268,64 +269,85 @@ } static void gencodeFilterBySetQuery(struct track *tg, filterBySetGetFuncType filterBySetGetFunc, struct gencodeQuery *gencodeQuery) /* build where sql clauses for filters or highlights. */ { filterBy_t *filterBySet = filterBySetGetFunc(tg->tdb, cart, NULL); filterBy_t *filterBy; for (filterBy = filterBySet; filterBy != NULL; filterBy = filterBy->next) { if (!filterByAllChosen(filterBy)) gencodeFilterByQuery(tg, filterBy, gencodeQuery); } filterBySetFree(&filterBySet); } +static void addFilterMaxTranscripsByRange(struct sqlConnection *conn, struct track *tg, struct gencodeQuery *gencodeQuery) +/* Add query for the maximum number of transcripts to display if requested and + * if transcriptRank is available in attrs */ +{ +// FIXME: tmp until we can get it in the UI +int maxTrans = cartCgiUsualInt(cart, "maxTrans", 0); +if (maxTrans == 0) + return; // zero disables +// do we have transcriptRank column? +if (!sqlColumnExists(conn, gencodeGetTableName(tg->tdb, "wgEncodeGencodeAttrs"), "transcriptRank")) + return ; + +// rank starts at 1, so anything less than or equal to max will be included +gencodeQueryBeginSubWhere(gencodeQuery); +sqlDyStringPrintf(gencodeQuery->where, "attrs.transcriptRank <= %d", maxTrans); +gencodeQuery->joinAttrs = TRUE; +gencodeQueryEndSubWhere(gencodeQuery); +gencodeQuery->isFiltered = TRUE; +} + static void addQueryTables(struct track *tg, struct gencodeQuery *gencodeQuery) /* add required from tables and joins */ { sqlDyStringPrintf(gencodeQuery->from, "%s g", tg->table); if (gencodeQuery->joinAttrs) { - sqlDyStringPrintf(gencodeQuery->from, ", %s attrs", trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeAttrs")); + sqlDyStringPrintf(gencodeQuery->from, ", %s attrs", gencodeGetTableName(tg->tdb, "wgEncodeGencodeAttrs")); sqlDyStringPrintf(gencodeQuery->where, " and (attrs.transcriptId = g.name)"); } if (gencodeQuery->joinTranscriptSource) { - sqlDyStringPrintf(gencodeQuery->from, ", %s transSrc", trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeTranscriptSource")); + sqlDyStringPrintf(gencodeQuery->from, ", %s transSrc", gencodeGetTableName(tg->tdb, "wgEncodeGencodeTranscriptSource")); sqlDyStringPrintf(gencodeQuery->where, " and (transSrc.transcriptId = g.name)"); } if (gencodeQuery->joinSupportLevel) { - sqlDyStringPrintf(gencodeQuery->from, ", %s supLevel", trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeTranscriptionSupportLevel")); + sqlDyStringPrintf(gencodeQuery->from, ", %s supLevel", gencodeGetTableName(tg->tdb, "wgEncodeGencodeTranscriptionSupportLevel")); sqlDyStringPrintf(gencodeQuery->where, " and (supLevel.transcriptId = g.name)"); } if (gencodeQuery->joinTag) { - sqlDyStringPrintf(gencodeQuery->from, ", %s tag", trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeTag")); + sqlDyStringPrintf(gencodeQuery->from, ", %s tag", gencodeGetTableName(tg->tdb, "wgEncodeGencodeTag")); sqlDyStringPrintf(gencodeQuery->where, " and (tag.transcriptId = g.name)"); } } -static void addQueryCommon(struct track *tg, filterBySetGetFuncType filterBySetGetFunc, struct gencodeQuery *gencodeQuery) +static void addQueryCommon(struct sqlConnection *conn, struct track *tg, filterBySetGetFuncType filterBySetGetFunc, struct gencodeQuery *gencodeQuery) /* Add tables and joins for both gene and highlight queries */ { // bin range overlap part hAddBinToQuery(winStart, winEnd, gencodeQuery->where); sqlDyStringPrintf(gencodeQuery->where, "(g.chrom = \"%s\") and (g.txStart < %u) and (g.txEnd > %u)", chromName, winEnd, winStart); gencodeFilterBySetQuery(tg, filterBySetGetFunc, gencodeQuery); +addFilterMaxTranscripsByRange(conn, tg, gencodeQuery); addQueryTables(tg, gencodeQuery); } static struct sqlResult *executeQuery(struct sqlConnection *conn, struct gencodeQuery *gencodeQuery) /* execute the actual SQL query */ { struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select %-s from %-s where %-s", dyStringContents(gencodeQuery->fields), dyStringContents(gencodeQuery->from), dyStringContents(gencodeQuery->where)); struct sqlResult *sr = sqlGetResult(conn, dyStringContents(query)); dyStringFree(&query); return sr; } static boolean annotIsGenePredExt(struct track *tg) @@ -347,100 +369,100 @@ gencodeQuery->isGenePredX = annotIsGenePredExt(tg); sqlDyStringPrintf(gencodeQuery->fields, "g.name, g.chrom, g.strand, g.txStart, g.txEnd, g.cdsStart, g.cdsEnd, g.exonCount, g.exonStarts, g.exonEnds"); gencodeQuery->genePredNumColumns = GENEPRED_NUM_COLS; if (gencodeQuery->isGenePredX) { sqlDyStringPrintf(gencodeQuery->fields, ", g.score, g.name2, g.cdsStartStat, g.cdsEndStat, g.exonFrames"); gencodeQuery->genePredNumColumns = GENEPREDX_NUM_COLS; } } static void geneQueryAddAttrsCols(struct track *tg, struct sqlConnection *conn, struct gencodeQuery *gencodeQuery) /* add attributes columns to query */ { -struct slName *fields = sqlFieldNames(conn, trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeAttrs")); +struct slName *fields = sqlFieldNames(conn, gencodeGetTableName(tg->tdb, "wgEncodeGencodeAttrs")); sqlDyStringPrintf(gencodeQuery->fields, ", "); sqlDyStringPrintf(gencodeQuery->fields, "attrs.geneId, attrs.geneName, attrs.geneType, attrs.geneStatus, attrs.transcriptId, attrs.transcriptName, attrs.transcriptType, attrs.transcriptStatus, attrs.havanaGeneId, attrs.havanaTranscriptId, attrs.ccdsId, attrs.level, attrs.transcriptClass"); gencodeQuery->attrsNumColumns = WGENCODEGENCODEATTRS_MIM_NUM_COLS; if (slNameInList(fields, "proteinId")) { sqlDyStringPrintf(gencodeQuery->fields, ", attrs.proteinId"); gencodeQuery->attrsNumColumns++; } if (slNameInList(fields, "transcriptRank")) { sqlDyStringPrintf(gencodeQuery->fields, ", attrs.transcriptRank"); gencodeQuery->attrsNumColumns = WGENCODEGENCODEATTRS_NUM_COLS; } gencodeQuery->joinAttrs = TRUE; } -static struct gencodeQuery *geneQueryConstruct(struct track *tg, - struct sqlConnection *conn, +static struct gencodeQuery *geneQueryConstruct(struct sqlConnection *conn, + struct track *tg, boolean includeAttrs) /* construct the query for a GENCODE records, which includes filters. */ { struct gencodeQuery *gencodeQuery = gencodeQueryNew(); geneQueryAddGenePredCols(tg, gencodeQuery); if (includeAttrs) geneQueryAddAttrsCols(tg, conn, gencodeQuery); -addQueryCommon(tg, filterBySetGet, gencodeQuery); +addQueryCommon(conn, tg, filterBySetGet, gencodeQuery); return gencodeQuery; } -static struct gencodeQuery *highlightQueryConstruct(struct track *tg) +static struct gencodeQuery *highlightQueryConstruct(struct track *tg, struct sqlConnection *conn) /* construct the query for GENCODE ids which should be highlighted. * this essentially redoes the genePred query, only using the filter functions * and only getting ids */ { struct gencodeQuery *gencodeQuery = gencodeQueryNew(); sqlDyStringPrintf(gencodeQuery->fields, "g.name"); -addQueryCommon(tg, highlightBySetGet, gencodeQuery); +addQueryCommon(conn, tg, highlightBySetGet, gencodeQuery); return gencodeQuery; } static unsigned getHighlightColor(struct track *tg) /* get the highlightColor from trackDb, or a default if not found */ { unsigned char red = 255, green = 165, blue = 0; // Orange default char *colorStr = trackDbSetting(tg->tdb, "highlightColor"); if (colorStr != NULL) parseColor(colorStr, &red, &green, &blue); return MAKECOLOR_32(red, green, blue); } static void highlightByGetColor(struct genePred *gp, struct hash *highlightIds, unsigned highlightColor, struct linkedFeatures *lf) /* compute the highlight color based on a extra fields returned in a row, setting * the linkedFeatures field */ { if (hashLookup(highlightIds, gp->name) != NULL) { lf->highlightColor = highlightColor; lf->highlightMode = highlightBackground; } } static struct hash* loadHighlightIds(struct sqlConnection *conn, struct track *tg) /* Load ids (genePred names) in window for annotations to be highlighted. */ { struct hash *highlightIds = hashNew(0); -struct gencodeQuery *gencodeQuery = highlightQueryConstruct(tg); +struct gencodeQuery *gencodeQuery = highlightQueryConstruct(tg, conn); struct sqlResult *sr = executeQuery(conn, gencodeQuery); char **row; while ((row = sqlNextRow(sr)) != NULL) hashAddInt(highlightIds, row[0], 1); sqlFreeResult(&sr); return highlightIds; } static boolean getLabelCartVar(struct track *tg, char *labelName, boolean *anyExistsP) /* get the cart label value for a label type. Sort TRUE in anyExistsP if the variable exists. */ { char varSuffix[64]; safef(varSuffix, sizeof(varSuffix), "label.%s", labelName); char *value = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, varSuffix, NULL); if (value != NULL) @@ -534,31 +556,31 @@ else lf->extra = cloneString(gp->name); wgEncodeGencodeAttrsFree(&attrs); return lf; } static void loadGencodeTrack(struct track *tg) /* Load genePreds in window info linked feature, with filtering, etc. */ { struct sqlConnection *conn = hAllocConn(database); unsigned enabledLabels = getEnabledLabels(tg); boolean needAttrs = (enabledLabels & ITEM_LABEL_GENE_ID) != 0; // only for certain labels struct hash *highlightIds = NULL; if (anyFilterBy(tg, highlightBySetGet)) highlightIds = loadHighlightIds(conn, tg); -struct gencodeQuery *gencodeQuery = geneQueryConstruct(tg, conn, needAttrs); +struct gencodeQuery *gencodeQuery = geneQueryConstruct(conn, tg, needAttrs); struct sqlResult *sr = executeQuery(conn, gencodeQuery); struct linkedFeatures *lfList = NULL; unsigned highlightColor = getHighlightColor(tg); char **row; while ((row = sqlNextRow(sr)) != NULL) slAddHead(&lfList, loadGencodeTranscript(tg, gencodeQuery, row, enabledLabels, highlightIds, highlightColor)); sqlFreeResult(&sr); hFreeConn(&conn); if (tg->visibility != tvDense) slSort(&lfList, linkedFeaturesCmpStart); else slReverse(&lfList); tg->items = lfList; if (gencodeQuery->isFiltered)