06845edb9038bb632170891cebcdc8f477f2ea8d angie Fri Dec 4 08:59:38 2015 -0800 Add dbNSFP v3.1a (including VEST scores) to hg38 for hgVai. Add hgVai options for adding in transcript status info: GENCODE tags when applicable, knownCanonical for knownGene, refSeqStatus for refGene. refs #16502, #16503 diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c index a1cf465..c4c19a4 100644 --- src/hg/hgVai/hgVai.c +++ src/hg/hgVai/hgVai.c @@ -22,33 +22,37 @@ #include "textOut.h" #include "trackHub.h" #include "hubConnect.h" #include "twoBit.h" #include "gpFx.h" #include "bigGenePred.h" #include "udc.h" #include "knetUdc.h" #include "md5.h" #include "regexHelper.h" #include "hAnno.h" #include "annoGratorQuery.h" #include "annoGratorGpVar.h" #include "annoFormatVep.h" #include "annoStreamBigBed.h" +#include "annoStreamDb.h" #include "libifyMe.h" +#define GENCODE_TAG_DOC_URL "\"http://www.gencodegenes.org/gencode_tags.html\"" +#define REFSEQ_STATUS_DOC_URL "\"http://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_status_codes\"" + /* Global Variables */ struct cart *cart; /* CGI and other variables */ struct hash *oldVars = NULL; /* The cart before new cgi stuff added. */ char *genome = NULL; /* Name of genome - mouse, human, etc. */ char *database = NULL; /* Current genome database - hg17, mm5, etc. */ char *regionType = NULL; /* genome, ENCODE pilot regions, or specific position range. */ struct grp *fullGroupList = NULL; /* List of all groups. */ struct trackDb *fullTrackList = NULL; /* List of all tracks in database. */ static struct pipeline *compressPipeline = (struct pipeline *)NULL; // Null terminated list of CGI Variables we don't want to save permanently: char *excludeVars[] = {"Submit", "submit", "hgva_startQuery", NULL,}; #define hgvaRange "position" @@ -473,67 +477,71 @@ char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000"); char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" }; char *limitValues[] = { "10", "100", "1000", "10000", "100000" }; cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels), curLimit); printCtAndHubButtons(); puts("
"); } boolean isGeneTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get genePred tracks. */ { return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type)); } -boolean selectGenes() -/* Let user select a gene predictions track; return FALSE if there are no genePred tracks. */ +char *selectGenes() +/* Let user select a gene predictions track; return NULL if there are no genePred tracks. */ { struct slRef *trackRefList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList); boolean gotGP = (trackRefList != NULL); if (!gotGP) warn("This assembly (%s) has no gene prediction tracks, " "so the VAI will not be able to annotate it.", database); printf("
Select Genes
\n"); if (gotGP) printf("The gene predictions selected here will be used "); else printf("Gene predictions are required in order "); printf("to determine the effect of " "each variant on genes, for example intronic, missense, splice site, intergenic etc."); if (!gotGP) printf(" Since this assembly has no gene prediction tracks, " "the VAI can't provide functional annotations. " "Please select a different genome.
"); printf("
\n"); -char *selected = cartUsualString(cart, "hgva_geneTrack", ""); //#*** per-db cart vars?? +if (! gotGP) + return NULL; +char *firstTrack = ((struct trackDb *)(trackRefList->val))->track; +char *selected = cartUsualString(cart, "hgva_geneTrack", firstTrack); //#*** should show more info about each track... button to pop up track desc? if (gotGP) { - printf("\n"); struct slRef *ref; for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; if (tdb->subtracks == NULL) printOption(tdb->track, selected, tdb->longLabel); } puts("
"); } -return gotGP; +return selected; } //#*** We really need a dbNsfp.[ch]: enum PolyPhen2Subset { noSubset, HDIV, HVAR }; char *formatDesc(char *url, char *name, char *details, boolean doHtml) /* Return a description with URL for name plus extra details. If doHtml, * wrap URL in .... */ { char desc[1024]; if (doHtml) safef(desc, sizeof(desc), "%s %s", url, name, details); else safef(desc, sizeof(desc), "(%s) %s %s", @@ -560,30 +568,34 @@ else errAbort("dbNsfpDescFromTableName: invalid PolyPhen2 subset type (%d)", subset); } else if (sameString(tableName, "dbNsfpMutationTaster")) return formatDesc("http://www.mutationtaster.org/", "MutationTaster", "(A = disease causing automatic, D = disease causing, " "N = polymorphism, P = polymorphism automatic)", doHtml); else if (sameString(tableName, "dbNsfpMutationAssessor")) return formatDesc("http://mutationassessor.org/", "MutationAssessor", "(high or medium: predicted functional; " "low or neutral: predicted non-functional)", doHtml); else if (sameString(tableName, "dbNsfpLrt")) return formatDesc("http://www.genetics.wustl.edu/jflab/lrt_query.html", "Likelihood ratio test (LRT)", "(D = deleterious, N = Neutral, U = unknown)", doHtml); +else if (sameString(tableName, "dbNsfpVest")) + return formatDesc("http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665549/", + "Variant Effect Scoring Tool (VEST)", + "(scores [0-1] predict confidence that a change is deleterious", doHtml); else if (sameString(tableName, "dbNsfpGerpNr")) return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html", "GERP++", "Neutral Rate (NR)", doHtml); else if (sameString(tableName, "dbNsfpGerpRs")) return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html", "GERP++", "Rejected Substitutions (RS)", doHtml); else if (sameString(tableName, "dbNsfpInterPro")) return formatDesc("http://www.ebi.ac.uk/interpro/", "InterPro", "protein domains", doHtml); return NULL; } struct slName *findDbNsfpTables() /* See if this database contains dbNSFP tables. */ { if (startsWith(hubTrackPrefix, database)) @@ -609,42 +621,45 @@ safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table); boolean defaultChecked = (sameString("dbNsfpSift", table) || sameString("dbNsfpPolyPhen2", table)); cartMakeCheckBox(cart, cartVar, defaultChecked); printf("%s
\n", description); } } void selectDbNsfp(struct slName *dbNsfpTables) /* Let user select scores/predicitions from various tools collected by dbNSFP. */ { if (dbNsfpTables == NULL) return; startCollapsibleSection("dbNsfp", "Database of Non-synonymous Functional Predictions (dbNSFP)", TRUE); +//#*** hardcoded version info... we need metadata (#11462) +char *dbNsfpVersion = sameString(database, "hg19") ? "2.0" : "3.1a"; +char *txVersion = sameString(dbNsfpVersion, "2.0") ? "Gencode release 9 (Ensembl 64, Dec. 2011)" : + "Gencode release 22 (Ensembl 79, Mar. 2015)"; printf("dbNSFP " "(Liu et al. 2013) " - "release 2.0 " + "release %s " "provides pre-computed scores and predictions of functional significance " "from a variety of tools. Every possible coding change to transcripts in " - //#*** hardcoded version info... sigh, we need trackDb... or metaDb?? - "Gencode release 9 (Ensembl 64, Dec. 2011) gene predictions " + "%s gene predictions " "has been evaluated. " "Note: This may not encompass all transcripts in your " - "selected gene set.
\n"); -//#*** Another cheap hack: reverse alph order happens to be what we want, + "selected gene set.
\n", dbNsfpVersion, txVersion); +//#*** Another cheap hack: reverse alph order happens to be what we want (until VEST??), //#*** but priorities would be cleaner: slReverse(&dbNsfpTables); jsMakeSetClearContainer(); struct slName *table; for (table = dbNsfpTables; table != NULL; table = table->next) { if (sameString(table->name, "dbNsfpPolyPhen2")) { printDbNsfpSource(table->name, HDIV); printDbNsfpSource(table->name, HVAR); } else printDbNsfpSource(table->name, 0); } jsEndContainer(); @@ -722,30 +737,198 @@ } void selectDbSnp(boolean gotSnp) /* Offer to include rsID (and other fields, or leave that for advanced output??) if available */ { if (!gotSnp) return; startCollapsibleSection("dbSnp", "Known variation", TRUE); cartMakeCheckBox(cart, "hgva_rsId", TRUE); printf("Include dbSNP " "rs# ID if one exists
\n"); puts("
"); endCollapsibleSection(); } +#define GENCODE_PREFIX "wgEncodeGencode" + +struct slName *getGencodeTagVersions() +/* Return a list of version strings from the ends of wgEncodeGencodeTag% tables. */ +{ +static struct slName *tagVersions = NULL; +if (tagVersions == NULL) + { + struct sqlConnection *conn = hAllocConn(database); + struct slName *tagTables = sqlQuickList(conn, + NOSQLINJ "show tables like '"GENCODE_PREFIX"Tag%'"); + int offset = strlen(GENCODE_PREFIX"Tag"); + struct slName *tt; + for (tt = tagTables; tt != NULL; tt = tt->next) + slAddHead(&tagVersions, slNameNew(tt->name + offset)); + hFreeConn(&conn); + } +return slNameCloneList(tagVersions); +} + +boolean knownGeneHasGencodeTags() +/* Return TRUE if this database has knownToTag for knownGene. */ +{ +return hTableExists(database, "knownGene") && hTableExists(database, "knownToTag"); +} + +boolean hasGencodeTags() +/* Return TRUE if GENCODE tags can be associated with some gene track in database. */ +{ +return knownGeneHasGencodeTags() || (getGencodeTagVersions() != NULL); +} + +boolean hasTxStatus() +/* Return TRUE if any gene track in database has some kind of transcript status info + * like knownCanonical, GENCODE tags and/or RefSeq status. */ +{ +if (hasGencodeTags()) + return TRUE; +if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical")) + return TRUE; +if (hTableExists(database, "refGene") && hTableExists(database, "refSeqStatus")) + return TRUE; +return FALSE; +} + +char *getLatestGencodeVersion(struct slName *versionList) +/* Return the numerically largest version found in versionList. */ +{ +int maxVersionNum = -1; +char *maxVersion = NULL; +struct slName *version; +for (version = versionList; version != NULL; version = version->next) + { + int versionNum = atoi(skipToNumeric(version->name)); + if (versionNum > maxVersionNum) + { + maxVersionNum = versionNum; + maxVersion = version->name; + } + } +return cloneString(maxVersion); +} + +INLINE char *gencodeTableName(char *suffix, char *version, char *buf, size_t bufSize) +/* Write wgEncodeGencode into buf. Return buf for convenience. */ +{ +safef(buf, bufSize, GENCODE_PREFIX"%s%s", suffix, version); +return buf; +} + +boolean refGeneHasGencodeTags(struct slName *gencodeVersionList) +/* Return TRUE if this database has a wgEncodeGencodeRefSeq table as well as a ...Tag table. */ +{ +char *version = getLatestGencodeVersion(gencodeVersionList); +char table[PATH_LEN]; +return hTableExists(database, gencodeTableName("RefSeq", version, table, sizeof(table))); +} + +boolean startsWithGencodeGene(char *geneTrack) +/* Return TRUE if geneTrack starts with wgEncodeGencode{Basic,Comp,PseudoGene}. + * (There are other GENCODE genepred tracks that don't have tags associated with them.) */ +{ +return (startsWith(GENCODE_PREFIX"Basic", geneTrack) || + startsWith(GENCODE_PREFIX"Comp", geneTrack) || + startsWith(GENCODE_PREFIX"PseudoGene", geneTrack)); +} + +boolean isGencodeWithVersion(char *geneTrack, struct slName *versionList) +/* Return TRUE if geneTrack looks like a Gencode gene track for a supported version. */ +{ +if (! startsWithGencodeGene(geneTrack)) + return FALSE; +struct slName *v; +for (v = versionList; v != NULL; v = v->next) + { + if (endsWith(geneTrack, v->name)) + return TRUE; + } +return FALSE; +} + +void selectTxStatus(boolean hasTxStatus, char *geneTrack) +/* Offer to include transcript status, e.g. whether it is in knownCanonical or has GENCODE tags. + * This makes one div per category of txStatus info; each div is visible only if its info is + * applicable to the selected gene track. If no divs are visible, display a message that + * there's nothing for the currently selected gene track. */ +{ +if (! hasTxStatus) + return; +startCollapsibleSection("txStatus", "Transcript status", FALSE); +boolean somethingIsVisible = FALSE; +if (hasGencodeTags()) + { + struct slName *versionList = getGencodeTagVersions(); + char *maybeKnownGene = knownGeneHasGencodeTags() ? "knownGene" : ""; + char *maybeRefGene = refGeneHasGencodeTags(versionList) ? "refGene" : ""; + char *maybeEnsGene = ""; + char *versions = ""; + if (versionList != NULL) + { + if (hTableExists(database, "ensGene")) + maybeEnsGene = "ensGene"; + versions = slNameListToString(versionList, ' '); + } + boolean isVisible = (sameString(geneTrack, maybeKnownGene) || + sameString(geneTrack, maybeEnsGene) || + sameString(geneTrack, maybeRefGene) || + isGencodeWithVersion(geneTrack, versionList)); + somethingIsVisible |= isVisible; + printf("
", + maybeKnownGene, maybeRefGene, maybeEnsGene, versions, + isVisible ? "block" : "none"); + cartMakeCheckBox(cart, "hgva_txStatus_gencode", FALSE); + puts("Include the GENCODE tags for each transcript (if available).
"); + puts("
"); + } +if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical")) + { + boolean isVisible = sameString(geneTrack, "knownGene"); + somethingIsVisible |= isVisible; + printf("
", + isVisible ? "block" : "none"); + cartMakeCheckBox(cart, "hgva_txStatus_knownCanonical", FALSE); + puts("Indicate whether each UCSC Genes transcript is 'canonical' (generally the longest " + "isoform of a gene).
"); + puts("
"); + } +if (hTableExists(database, "refGene") && hTableExists(database, "refSeqStatus")) + { + boolean isVisible = sameString(geneTrack, "refGene"); + somethingIsVisible |= isVisible; + printf("
", + isVisible ? "block" : "none"); + cartMakeCheckBox(cart, "hgva_txStatus_refSeqStatus", FALSE); + puts("Include the " + "RefSeq status of each transcript.
"); + puts("
"); + } +printf("
", + somethingIsVisible ? "none" : "block"); +puts("No transcript status data are available for the selected gene track."); +puts("
"); +puts("
"); +endCollapsibleSection(); +} + boolean isHg19RegulatoryTrack(struct trackDb *tdb, void *filterData) /* For now, just look for a couple specific tracks by tableName. */ { //#*** NEED METADATA return (sameString("wgEncodeRegDnaseClusteredV3", tdb->table) || sameString("wgEncodeRegTfbsClusteredV3", tdb->table)); } boolean isHg38RegulatoryTrack(struct trackDb *tdb, void *filterData) /* For now, just look for a couple specific tracks by tableName. */ { //#*** NEED METADATA return (sameString("wgEncodeRegDnaseClustered", tdb->table) || sameString("wgEncodeRegTfbsClusteredV3", tdb->table)); } @@ -853,46 +1036,48 @@ for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; char cartVar[512]; safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track); cartMakeCheckBox(cart, cartVar, FALSE); struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb); printf("%s
\n", hgTrackUiName(), cartSidUrlString(cart), topTdb->track, tdb->longLabel); } puts("
"); endCollapsibleSection(); } } -void selectAnnotations() +void selectAnnotations(char *geneTrack) /* Beyond predictions of protein-coding effect, what other basic data can we integrate? */ { struct slName *dbNsfpTables = findDbNsfpTables(); boolean gotSnp = findSnpBed4("", NULL, NULL); struct slRef *elTrackRefList = NULL, *scoreTrackRefList = NULL; findCons(&elTrackRefList, &scoreTrackRefList); struct slRef *cosmicTrackRefList = findTrackRefByName("cosmic"); +boolean hasTxStat = hasTxStatus(); if (dbNsfpTables == NULL && !gotSnp && elTrackRefList == NULL && scoreTrackRefList == NULL && - cosmicTrackRefList == NULL) + cosmicTrackRefList == NULL && !hasTxStat) return; puts("
"); printf("
Select More Annotations (optional)
\n"); // Make wrapper table for collapsible sections: puts(""); selectDbNsfp(dbNsfpTables); +selectTxStatus(hasTxStat, geneTrack); selectDbSnp(gotSnp); trackCheckBoxSection("Cosmic", "COSMIC", cosmicTrackRefList); trackCheckBoxSection("ConsEl", "Conserved elements", elTrackRefList); trackCheckBoxSection("ConsScore", "Conservation scores", scoreTrackRefList); puts("
"); } void selectFiltersFunc() /* Options to restrict variants based on gene region/soTerm from gpFx */ { startCollapsibleSection("filtersFunc", "Functional role", FALSE); printf("Include variants annotated as
\n"); jsMakeSetClearContainer(); cartMakeCheckBox(cart, "hgva_include_intergenic", TRUE); printf("intergenic
\n"); @@ -1071,35 +1256,35 @@ boolean alreadyAgreed = cartUsualBoolean(cart, "hgva_agreedToDisclaimer", FALSE); printf("\n", alreadyAgreed ? "true" : "false"); addSomeCss(); printAssemblySection(); /* Check for variant custom tracks. If there are none, tell user they need to * upload at least one. */ struct slRef *varTrackList = NULL, *varGroupList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, isVariantCustomTrack, NULL, &varGroupList, &varTrackList); puts("
"); // Make wrapper table for collapsible sections: selectVariants(varGroupList, varTrackList); -boolean gotGP = selectGenes(); -if (gotGP) +char *geneTrack = selectGenes(); +if (geneTrack != NULL) { selectRegulatory(); - selectAnnotations(); + selectAnnotations(geneTrack); selectFilters(); selectOutput(); submitAndDisclaimer(); } printf(""); jsReloadOnBackButton(cart); webNewSection("Using the Variant Annotation Integrator"); webIncludeHelpFile("hgVaiHelpText", FALSE); jsIncludeFile("jquery-ui.js", NULL); jsIncludeFile("hgVarAnnogrator.js", NULL); jsIncludeFile("ui.dropdownchecklist.js", NULL); jsIncludeFile("ddcl.js", NULL); } @@ -1278,31 +1463,32 @@ slAddHead(pGratorList, grator); if (vepOut != NULL) { char *tableName = tableNameFromSourceName(grator->streamer.name); char *suffix = NULL; if (subset == HDIV) suffix = "HDIV"; else if (subset == HVAR) suffix = "HVAR"; char *tag = tagFromTableName(tableName, suffix); if (isEmpty(description)) description = grator->streamer.name; if (isReg) annoFormatVepAddRegulatory(vepOut, (struct annoStreamer *)grator, tag, description, column); else - annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column); + annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column, + FALSE); } } INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList) /* If grator is non-NULL, add it to gratorList. */ { updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL, FALSE); } void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName, struct annoGrator **pGratorList) // If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange // data, so annoFormatVep can tell whether the variant and gpFx are consistent with the // variant and transcript that dbNsfp used to calculate scores. { @@ -2140,63 +2326,204 @@ * (main page will be displayed) */ { struct trackDb *varTdb = tdbForTrack(database, variantTrack, &fullTrackList); if (varTdb == NULL) { if (isHubTrack(variantTrack)) warn("Can't find hub track '%s'", variantTrack); else warn("Can't find tdb for variant track '%s'", variantTrack); } else checkVariantTrack(varTdb); return varTdb; } -static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb) +static char *gencodeVersionFromTrack(char *track) +/* If track is a GENCODE table, find and return a pointer to the version at the end; + * otherwise return NULL. */ +{ +if (startsWithGencodeGene(track)) + { + char *v = strrchr(track, 'V'); + return v; + } +return NULL; +} + +static char *gencodeTagTableForTrack(char *db, char *track) +/* If there is a wgEncodeGencodeTag table that can be associated with track, + * return it; otherwise return NULL. */ +{ +struct slName *versionList = getGencodeTagVersions(); +if (startsWithGencodeGene(track)) + { + char *version = gencodeVersionFromTrack(track); + if (version != NULL) + { + char table[PATH_LEN]; + return cloneString(gencodeTableName("Tag", version, table, sizeof(table))); + } + } +else if (sameString(track, "refGene") && refGeneHasGencodeTags(versionList)) + { + char *version = getLatestGencodeVersion(versionList); + char table[PATH_LEN]; + if (hTableExists(db, gencodeTableName("RefSeq", version, table, sizeof(table)))) + return cloneString(gencodeTableName("Tag", version, table, sizeof(table))); + } +else if (sameString(track, "knownGene") && knownGeneHasGencodeTags(versionList)) + { + if (hTableExists(db, "knownToTag")) + return cloneString("knownToTag"); + } +return NULL; +} + +static struct joinerDtf *getTxStatusExtras(char *db, char *track) +// Several kinds of transcript status may be enabled in the cart; if any are enabled, +// and if they apply to track, return the tables & fields to be joined with the track. +{ +struct joinerDtf *txStatusExtras = NULL; +if (cartUsualBoolean(cart, "hgva_txStatus_gencode", FALSE)) + { + char *gencodeTagTable = gencodeTagTableForTrack(db, track); + if (gencodeTagTable != NULL) + { + char *field = "tag"; + if (sameString("knownToTag", gencodeTagTable)) + field = "value"; + slAddHead(&txStatusExtras, joinerDtfNew(db, gencodeTagTable, field)); + } + } +if (cartUsualBoolean(cart, "hgva_txStatus_knownCanonical", FALSE) && + sameString(track, "knownGene") && + hTableExists(db, "knownCanonical")) + { + slAddHead(&txStatusExtras, joinerDtfNew(db, "knownCanonical", "transcript")); + } +if (cartUsualBoolean(cart, "hgva_txStatus_refSeqStatus", FALSE) && + sameString(track, "refGene") && + hTableExists(db, "refSeqStatus")) + { + slAddHead(&txStatusExtras, joinerDtfNew(db, "refSeqStatus", "status")); + } +return txStatusExtras; +} + +static void configAddTableField(struct dyString *dy, char *table, char *field, boolean *pIsFirst) +/* Add a JSON object with table and (list of one) field. */ +// (with "." prepended to table name +// because that's the convention for related tables in same db as track): +{ +if (! *pIsFirst) + dyStringAppend(dy, ", "); +dyStringPrintf(dy, "{ \"table\": \".%s\", \"fields\": [\"%s\"] }", table, field); +*pIsFirst = FALSE; +} + + +static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb, + struct joinerDtf *txStatusExtras) /* Add VAI-specific config options, if applicable. */ { struct jsonElement *config = NULL; char *track = tdb->track; +struct dyString *dyConfig = dyStringCreate("{ \"naForMissing\": false," + " \"relatedTables\": [ "); +boolean isFirst = TRUE; // If track is sql-based knownGene and we have kgXref, then add kgXref.geneSymbol after // the columns of knownGene. -if (sameString(track, "knownGene") && !isCustomTrack(track) && !isHubTrack(track) && - !trackDbSetting(tdb, "bigDataUrl")) - { - struct sqlConnection *conn = hAllocConn(db); - if (sqlTableExists(conn, "kgXref")) +if (sameString(track, "knownGene") && + !isCustomTrack(track) && !isHubTrack(track) && + !trackDbSetting(tdb, "bigDataUrl") && + hTableExists(db, "kgXref")) { - char jsonStr[PATH_LEN]; - safef(jsonStr, sizeof(jsonStr), - "{ \"relatedTables\":" - " [ { \"table\": \"%s.kgXref\", \"fields\": [\"geneSymbol\"] } ] }", - db); - config = jsonParse(jsonStr); + configAddTableField(dyConfig, ".kgXref", "geneSymbol", &isFirst); } - hFreeConn(&conn); +struct joinerDtf *txStatDtf; +for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next) + configAddTableField(dyConfig, txStatDtf->table, txStatDtf->field, &isFirst); + +// If any of the above apply, close the relatedTables list and config object +// and parse into jsonElements. +if (! isFirst) + { + dyStringAppend(dyConfig, " ] }"); + config = jsonParse(dyConfig->string); + dyStringFree(&dyConfig); } return config; } static void adjustGpVarOverlapRule(struct annoGrator *gpVarGrator, boolean haveRegulatory) /* If we're able to detect regulatory elements, and want to keep those annotations, loosen up * gpVarGrator's overlap rule from the default (must overlap). */ { if (haveRegulatory && cartUsualBoolean(cart, "hgva_include_regulatory", TRUE)) gpVarGrator->setOverlapRule(gpVarGrator, agoNoConstraint); } +static void addTxStatusExtras(struct annoFormatter *vepOut, char *geneTrack, + struct annoGrator *gpVarGrator, + struct joinerDtf *txStatusExtras) +/* Given a list of tables and fields that will be joined with geneTrack to provide transcript + * status info, configure vepOut to put them in the EXTRAs column. */ +{ +struct joinerDtf *txStatDtf; +for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next) + { + char *tag = NULL, *description = NULL; + boolean isBoolean = FALSE; + if (differentString(txStatDtf->database, database)) + errAbort("addTxStatusExtras: Expected db=%s in txStatDtf but got %s", + database, txStatDtf->database); + if ((startsWith(GENCODE_PREFIX"Tag", txStatDtf->table) && + sameString(txStatDtf->field, "tag")) || + (sameString(txStatDtf->table, "knownToTag") && + sameString(txStatDtf->field, "value"))) + { + tag = "GENCODE_TAG"; + description = "GENCODE tags for the transcript"; + } + else if (sameString(txStatDtf->table, "knownCanonical") && + sameString(txStatDtf->field, "transcript")) + { + tag = "CANONICAL"; + description = "If present, the transcript is the 'canonical' transcript of the gene " + "(generally the longest isoform of the gene)"; + isBoolean = TRUE; + } + else if (sameString(txStatDtf->table, "refSeqStatus") && + sameString(txStatDtf->field, "status")) + { + tag = "REFSEQ_STATUS"; + description = "RefSeq status of the transcript"; + } + else + { + errAbort("addTxStatusExtras: Unrecognized {table,field}: {%s,%s}", + txStatDtf->table, txStatDtf->field); + } + char *column = annoStreamDbColumnNameFromDtf(database, geneTrack, txStatDtf); + annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)gpVarGrator, + tag, description, column, isBoolean); + } +} + void doQuery() /* Translate simple form inputs into anno* components and execute query. */ { dyInfo = dyStringNew(0); char *chrom = NULL; uint start = 0, end = 0; if (sameString(regionType, hgvaRegionTypeRange)) getCartPosOrDie(&chrom, &start, &end); struct annoAssembly *assembly = hAnnoGetAssembly(database); char *geneTrack = cartString(cart, "hgva_geneTrack"); struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList); if (geneTdb == NULL) { warn("Can't find tdb for gene track %s", geneTrack); @@ -2226,31 +2553,32 @@ primaryLongLabel = hgvaVariantIdsLabel; } else { struct trackDb *varTdb = getVariantTrackDb(variantTrack); if (varTdb == NULL) { doUi(); return; } primary = hAnnoStreamerFromTrackDb(assembly, varTdb->table, varTdb, chrom, maxVarRows, NULL); primaryLongLabel = varTdb->longLabel; } enum annoGratorOverlap geneOverlapRule = agoMustOverlap; -struct jsonElement *gpConfig = configForStreamer(database, geneTdb); +struct joinerDtf *txStatusExtras = getTxStatusExtras(database, geneTrack); +struct jsonElement *gpConfig = configForStreamer(database, geneTdb, txStatusExtras); struct annoGrator *gpVarGrator = hAnnoGratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom, ANNO_NO_LIMIT, primary->asObj, geneOverlapRule, gpConfig); setGpVarFuncFilter(gpVarGrator); // Some grators may be used as both filters and output values. To avoid making // multiple grators for the same source, hash them by trackName: struct hash *gratorsByName = hashNew(8); struct annoGrator *snpGrator = NULL; char *snpDesc = NULL; if (cartUsualBoolean(cart, "hgva_rsId", FALSE)) snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc); // Now construct gratorList in the order in which annoFormatVep wants to see them, @@ -2259,30 +2587,31 @@ slAddHead(&gratorList, gpVarGrator); if (snpGrator != NULL) slAddHead(&gratorList, snpGrator); // Text or HTML output? char *outFormat = cartUsualString(cart, "hgva_outFormat", "vepTab"); boolean doHtml = sameString(outFormat, "vepHtml"); // Initialize VEP formatter: struct annoFormatter *vepOut = annoFormatVepNew("stdout", doHtml, primary, primaryLongLabel, (struct annoStreamer *)gpVarGrator, geneTdb->longLabel, (struct annoStreamer *)snpGrator, snpDesc, assembly); +addTxStatusExtras(vepOut, geneTrack, gpVarGrator, txStatusExtras); boolean haveRegulatory = FALSE; addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom, doHtml, &haveRegulatory); adjustGpVarOverlapRule(gpVarGrator, haveRegulatory); addFilterTracks(&gratorList, gratorsByName, assembly, chrom); slReverse(&gratorList); if (doHtml) { webStart(cart, database, "Annotated Variants in VEP/HTML format"); } else { // Undo the htmlPushEarlyHandlers() because after this point they make ugly text: