06845edb9038bb632170891cebcdc8f477f2ea8d
angie
Fri Dec 4 08:59:38 2015 -0800
Add dbNSFP v3.1a (including VEST scores) to hg38 for hgVai.
Add hgVai options for adding in transcript status info: GENCODE tags
when applicable, knownCanonical for knownGene, refSeqStatus for refGene.
refs #16502, #16503
diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index a1cf465..c4c19a4 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -22,33 +22,37 @@
#include "textOut.h"
#include "trackHub.h"
#include "hubConnect.h"
#include "twoBit.h"
#include "gpFx.h"
#include "bigGenePred.h"
#include "udc.h"
#include "knetUdc.h"
#include "md5.h"
#include "regexHelper.h"
#include "hAnno.h"
#include "annoGratorQuery.h"
#include "annoGratorGpVar.h"
#include "annoFormatVep.h"
#include "annoStreamBigBed.h"
+#include "annoStreamDb.h"
#include "libifyMe.h"
+#define GENCODE_TAG_DOC_URL "\"http://www.gencodegenes.org/gencode_tags.html\""
+#define REFSEQ_STATUS_DOC_URL "\"http://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_status_codes\""
+
/* Global Variables */
struct cart *cart; /* CGI and other variables */
struct hash *oldVars = NULL; /* The cart before new cgi stuff added. */
char *genome = NULL; /* Name of genome - mouse, human, etc. */
char *database = NULL; /* Current genome database - hg17, mm5, etc. */
char *regionType = NULL; /* genome, ENCODE pilot regions, or specific position range. */
struct grp *fullGroupList = NULL; /* List of all groups. */
struct trackDb *fullTrackList = NULL; /* List of all tracks in database. */
static struct pipeline *compressPipeline = (struct pipeline *)NULL;
// Null terminated list of CGI Variables we don't want to save permanently:
char *excludeVars[] = {"Submit", "submit", "hgva_startQuery", NULL,};
#define hgvaRange "position"
@@ -473,67 +477,71 @@
char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000");
char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" };
char *limitValues[] = { "10", "100", "1000", "10000", "100000" };
cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels),
curLimit);
printCtAndHubButtons();
puts(" ");
}
boolean isGeneTrack(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get genePred tracks. */
{
return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type));
}
-boolean selectGenes()
-/* Let user select a gene predictions track; return FALSE if there are no genePred tracks. */
+char *selectGenes()
+/* Let user select a gene predictions track; return NULL if there are no genePred tracks. */
{
struct slRef *trackRefList = NULL;
tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList);
boolean gotGP = (trackRefList != NULL);
if (!gotGP)
warn("This assembly (%s) has no gene prediction tracks, "
"so the VAI will not be able to annotate it.", database);
printf("
\n");
if (gotGP)
printf("The gene predictions selected here will be used ");
else
printf("Gene predictions are required in order ");
printf("to determine the effect of "
"each variant on genes, for example intronic, missense, splice site, intergenic etc.");
if (!gotGP)
printf(" Since this assembly has no gene prediction tracks, "
"the VAI can't provide functional annotations. "
"Please select a different genome. ");
printf(" \n");
-char *selected = cartUsualString(cart, "hgva_geneTrack", ""); //#*** per-db cart vars??
+if (! gotGP)
+ return NULL;
+char *firstTrack = ((struct trackDb *)(trackRefList->val))->track;
+char *selected = cartUsualString(cart, "hgva_geneTrack", firstTrack);
//#*** should show more info about each track... button to pop up track desc?
if (gotGP)
{
- printf("\n");
+ printf("\n");
struct slRef *ref;
for (ref = trackRefList; ref != NULL; ref = ref->next)
{
struct trackDb *tdb = ref->val;
if (tdb->subtracks == NULL)
printOption(tdb->track, selected, tdb->longLabel);
}
puts(" ");
}
-return gotGP;
+return selected;
}
//#*** We really need a dbNsfp.[ch]:
enum PolyPhen2Subset { noSubset, HDIV, HVAR };
char *formatDesc(char *url, char *name, char *details, boolean doHtml)
/* Return a description with URL for name plus extra details. If doHtml,
* wrap URL in ... . */
{
char desc[1024];
if (doHtml)
safef(desc, sizeof(desc), "%s %s",
url, name, details);
else
safef(desc, sizeof(desc), "(%s) %s %s",
@@ -560,30 +568,34 @@
else
errAbort("dbNsfpDescFromTableName: invalid PolyPhen2 subset type (%d)", subset);
}
else if (sameString(tableName, "dbNsfpMutationTaster"))
return formatDesc("http://www.mutationtaster.org/", "MutationTaster",
"(A = disease causing automatic, D = disease causing, "
"N = polymorphism, P = polymorphism automatic)", doHtml);
else if (sameString(tableName, "dbNsfpMutationAssessor"))
return formatDesc("http://mutationassessor.org/", "MutationAssessor",
"(high or medium: predicted functional; "
"low or neutral: predicted non-functional)", doHtml);
else if (sameString(tableName, "dbNsfpLrt"))
return formatDesc("http://www.genetics.wustl.edu/jflab/lrt_query.html",
"Likelihood ratio test (LRT)",
"(D = deleterious, N = Neutral, U = unknown)", doHtml);
+else if (sameString(tableName, "dbNsfpVest"))
+ return formatDesc("http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665549/",
+ "Variant Effect Scoring Tool (VEST)",
+ "(scores [0-1] predict confidence that a change is deleterious", doHtml);
else if (sameString(tableName, "dbNsfpGerpNr"))
return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html",
"GERP++", "Neutral Rate (NR)", doHtml);
else if (sameString(tableName, "dbNsfpGerpRs"))
return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html",
"GERP++", "Rejected Substitutions (RS)", doHtml);
else if (sameString(tableName, "dbNsfpInterPro"))
return formatDesc("http://www.ebi.ac.uk/interpro/", "InterPro", "protein domains", doHtml);
return NULL;
}
struct slName *findDbNsfpTables()
/* See if this database contains dbNSFP tables. */
{
if (startsWith(hubTrackPrefix, database))
@@ -609,42 +621,45 @@
safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table);
boolean defaultChecked = (sameString("dbNsfpSift", table) ||
sameString("dbNsfpPolyPhen2", table));
cartMakeCheckBox(cart, cartVar, defaultChecked);
printf("%s \n", description);
}
}
void selectDbNsfp(struct slName *dbNsfpTables)
/* Let user select scores/predicitions from various tools collected by dbNSFP. */
{
if (dbNsfpTables == NULL)
return;
startCollapsibleSection("dbNsfp", "Database of Non-synonymous Functional Predictions (dbNSFP)",
TRUE);
+//#*** hardcoded version info... we need metadata (#11462)
+char *dbNsfpVersion = sameString(database, "hg19") ? "2.0" : "3.1a";
+char *txVersion = sameString(dbNsfpVersion, "2.0") ? "Gencode release 9 (Ensembl 64, Dec. 2011)" :
+ "Gencode release 22 (Ensembl 79, Mar. 2015)";
printf("dbNSFP "
"(Liu et al. 2013 ) "
- "release 2.0 "
+ "release %s "
"provides pre-computed scores and predictions of functional significance "
"from a variety of tools. Every possible coding change to transcripts in "
- //#*** hardcoded version info... sigh, we need trackDb... or metaDb??
- "Gencode release 9 (Ensembl 64, Dec. 2011) gene predictions "
+ "%s gene predictions "
"has been evaluated. "
"Note: This may not encompass all transcripts in your "
- "selected gene set. \n");
-//#*** Another cheap hack: reverse alph order happens to be what we want,
+ "selected gene set. \n", dbNsfpVersion, txVersion);
+//#*** Another cheap hack: reverse alph order happens to be what we want (until VEST??),
//#*** but priorities would be cleaner:
slReverse(&dbNsfpTables);
jsMakeSetClearContainer();
struct slName *table;
for (table = dbNsfpTables; table != NULL; table = table->next)
{
if (sameString(table->name, "dbNsfpPolyPhen2"))
{
printDbNsfpSource(table->name, HDIV);
printDbNsfpSource(table->name, HVAR);
}
else
printDbNsfpSource(table->name, 0);
}
jsEndContainer();
@@ -722,30 +737,198 @@
}
void selectDbSnp(boolean gotSnp)
/* Offer to include rsID (and other fields, or leave that for advanced output??) if available */
{
if (!gotSnp)
return;
startCollapsibleSection("dbSnp", "Known variation", TRUE);
cartMakeCheckBox(cart, "hgva_rsId", TRUE);
printf("Include dbSNP "
"rs# ID if one exists \n");
puts(" ");
endCollapsibleSection();
}
+#define GENCODE_PREFIX "wgEncodeGencode"
+
+struct slName *getGencodeTagVersions()
+/* Return a list of version strings from the ends of wgEncodeGencodeTag% tables. */
+{
+static struct slName *tagVersions = NULL;
+if (tagVersions == NULL)
+ {
+ struct sqlConnection *conn = hAllocConn(database);
+ struct slName *tagTables = sqlQuickList(conn,
+ NOSQLINJ "show tables like '"GENCODE_PREFIX"Tag%'");
+ int offset = strlen(GENCODE_PREFIX"Tag");
+ struct slName *tt;
+ for (tt = tagTables; tt != NULL; tt = tt->next)
+ slAddHead(&tagVersions, slNameNew(tt->name + offset));
+ hFreeConn(&conn);
+ }
+return slNameCloneList(tagVersions);
+}
+
+boolean knownGeneHasGencodeTags()
+/* Return TRUE if this database has knownToTag for knownGene. */
+{
+return hTableExists(database, "knownGene") && hTableExists(database, "knownToTag");
+}
+
+boolean hasGencodeTags()
+/* Return TRUE if GENCODE tags can be associated with some gene track in database. */
+{
+return knownGeneHasGencodeTags() || (getGencodeTagVersions() != NULL);
+}
+
+boolean hasTxStatus()
+/* Return TRUE if any gene track in database has some kind of transcript status info
+ * like knownCanonical, GENCODE tags and/or RefSeq status. */
+{
+if (hasGencodeTags())
+ return TRUE;
+if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical"))
+ return TRUE;
+if (hTableExists(database, "refGene") && hTableExists(database, "refSeqStatus"))
+ return TRUE;
+return FALSE;
+}
+
+char *getLatestGencodeVersion(struct slName *versionList)
+/* Return the numerically largest version found in versionList. */
+{
+int maxVersionNum = -1;
+char *maxVersion = NULL;
+struct slName *version;
+for (version = versionList; version != NULL; version = version->next)
+ {
+ int versionNum = atoi(skipToNumeric(version->name));
+ if (versionNum > maxVersionNum)
+ {
+ maxVersionNum = versionNum;
+ maxVersion = version->name;
+ }
+ }
+return cloneString(maxVersion);
+}
+
+INLINE char *gencodeTableName(char *suffix, char *version, char *buf, size_t bufSize)
+/* Write wgEncodeGencode into buf. Return buf for convenience. */
+{
+safef(buf, bufSize, GENCODE_PREFIX"%s%s", suffix, version);
+return buf;
+}
+
+boolean refGeneHasGencodeTags(struct slName *gencodeVersionList)
+/* Return TRUE if this database has a wgEncodeGencodeRefSeq table as well as a ...Tag table. */
+{
+char *version = getLatestGencodeVersion(gencodeVersionList);
+char table[PATH_LEN];
+return hTableExists(database, gencodeTableName("RefSeq", version, table, sizeof(table)));
+}
+
+boolean startsWithGencodeGene(char *geneTrack)
+/* Return TRUE if geneTrack starts with wgEncodeGencode{Basic,Comp,PseudoGene}.
+ * (There are other GENCODE genepred tracks that don't have tags associated with them.) */
+{
+return (startsWith(GENCODE_PREFIX"Basic", geneTrack) ||
+ startsWith(GENCODE_PREFIX"Comp", geneTrack) ||
+ startsWith(GENCODE_PREFIX"PseudoGene", geneTrack));
+}
+
+boolean isGencodeWithVersion(char *geneTrack, struct slName *versionList)
+/* Return TRUE if geneTrack looks like a Gencode gene track for a supported version. */
+{
+if (! startsWithGencodeGene(geneTrack))
+ return FALSE;
+struct slName *v;
+for (v = versionList; v != NULL; v = v->next)
+ {
+ if (endsWith(geneTrack, v->name))
+ return TRUE;
+ }
+return FALSE;
+}
+
+void selectTxStatus(boolean hasTxStatus, char *geneTrack)
+/* Offer to include transcript status, e.g. whether it is in knownCanonical or has GENCODE tags.
+ * This makes one div per category of txStatus info; each div is visible only if its info is
+ * applicable to the selected gene track. If no divs are visible, display a message that
+ * there's nothing for the currently selected gene track. */
+{
+if (! hasTxStatus)
+ return;
+startCollapsibleSection("txStatus", "Transcript status", FALSE);
+boolean somethingIsVisible = FALSE;
+if (hasGencodeTags())
+ {
+ struct slName *versionList = getGencodeTagVersions();
+ char *maybeKnownGene = knownGeneHasGencodeTags() ? "knownGene" : "";
+ char *maybeRefGene = refGeneHasGencodeTags(versionList) ? "refGene" : "";
+ char *maybeEnsGene = "";
+ char *versions = "";
+ if (versionList != NULL)
+ {
+ if (hTableExists(database, "ensGene"))
+ maybeEnsGene = "ensGene";
+ versions = slNameListToString(versionList, ' ');
+ }
+ boolean isVisible = (sameString(geneTrack, maybeKnownGene) ||
+ sameString(geneTrack, maybeEnsGene) ||
+ sameString(geneTrack, maybeRefGene) ||
+ isGencodeWithVersion(geneTrack, versionList));
+ somethingIsVisible |= isVisible;
+ printf("",
+ maybeKnownGene, maybeRefGene, maybeEnsGene, versions,
+ isVisible ? "block" : "none");
+ cartMakeCheckBox(cart, "hgva_txStatus_gencode", FALSE);
+ puts("Include the
GENCODE tags for each transcript (if available).
");
+ puts("
");
+ }
+if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical"))
+ {
+ boolean isVisible = sameString(geneTrack, "knownGene");
+ somethingIsVisible |= isVisible;
+ printf("",
+ isVisible ? "block" : "none");
+ cartMakeCheckBox(cart, "hgva_txStatus_knownCanonical", FALSE);
+ puts("Indicate whether each UCSC Genes transcript is 'canonical' (generally the longest "
+ "isoform of a gene). ");
+ puts("
");
+ }
+if (hTableExists(database, "refGene") && hTableExists(database, "refSeqStatus"))
+ {
+ boolean isVisible = sameString(geneTrack, "refGene");
+ somethingIsVisible |= isVisible;
+ printf("",
+ isVisible ? "block" : "none");
+ cartMakeCheckBox(cart, "hgva_txStatus_refSeqStatus", FALSE);
+ puts("Include the "
+ "
RefSeq status of each transcript.
");
+ puts("
");
+ }
+printf("",
+ somethingIsVisible ? "none" : "block");
+puts("No transcript status data are available for the selected gene track.");
+puts("
");
+puts(" ");
+endCollapsibleSection();
+}
+
boolean isHg19RegulatoryTrack(struct trackDb *tdb, void *filterData)
/* For now, just look for a couple specific tracks by tableName. */
{
//#*** NEED METADATA
return (sameString("wgEncodeRegDnaseClusteredV3", tdb->table) ||
sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
}
boolean isHg38RegulatoryTrack(struct trackDb *tdb, void *filterData)
/* For now, just look for a couple specific tracks by tableName. */
{
//#*** NEED METADATA
return (sameString("wgEncodeRegDnaseClustered", tdb->table) ||
sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
}
@@ -853,46 +1036,48 @@
for (ref = trackRefList; ref != NULL; ref = ref->next)
{
struct trackDb *tdb = ref->val;
char cartVar[512];
safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track);
cartMakeCheckBox(cart, cartVar, FALSE);
struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb);
printf("%s \n", hgTrackUiName(), cartSidUrlString(cart),
topTdb->track, tdb->longLabel);
}
puts(" ");
endCollapsibleSection();
}
}
-void selectAnnotations()
+void selectAnnotations(char *geneTrack)
/* Beyond predictions of protein-coding effect, what other basic data can we integrate? */
{
struct slName *dbNsfpTables = findDbNsfpTables();
boolean gotSnp = findSnpBed4("", NULL, NULL);
struct slRef *elTrackRefList = NULL, *scoreTrackRefList = NULL;
findCons(&elTrackRefList, &scoreTrackRefList);
struct slRef *cosmicTrackRefList = findTrackRefByName("cosmic");
+boolean hasTxStat = hasTxStatus();
if (dbNsfpTables == NULL && !gotSnp && elTrackRefList == NULL && scoreTrackRefList == NULL &&
- cosmicTrackRefList == NULL)
+ cosmicTrackRefList == NULL && !hasTxStat)
return;
puts(" ");
printf("\n");
// Make wrapper table for collapsible sections:
puts("");
selectDbNsfp(dbNsfpTables);
+selectTxStatus(hasTxStat, geneTrack);
selectDbSnp(gotSnp);
trackCheckBoxSection("Cosmic", "COSMIC", cosmicTrackRefList);
trackCheckBoxSection("ConsEl", "Conserved elements", elTrackRefList);
trackCheckBoxSection("ConsScore", "Conservation scores", scoreTrackRefList);
puts("
");
}
void selectFiltersFunc()
/* Options to restrict variants based on gene region/soTerm from gpFx */
{
startCollapsibleSection("filtersFunc", "Functional role", FALSE);
printf("Include variants annotated as \n");
jsMakeSetClearContainer();
cartMakeCheckBox(cart, "hgva_include_intergenic", TRUE);
printf("intergenic \n");
@@ -1071,35 +1256,35 @@
boolean alreadyAgreed = cartUsualBoolean(cart, "hgva_agreedToDisclaimer", FALSE);
printf("\n", alreadyAgreed ? "true" : "false");
addSomeCss();
printAssemblySection();
/* Check for variant custom tracks. If there are none, tell user they need to
* upload at least one. */
struct slRef *varTrackList = NULL, *varGroupList = NULL;
tdbFilterGroupTrack(fullTrackList, fullGroupList, isVariantCustomTrack, NULL,
&varGroupList, &varTrackList);
puts(" ");
// Make wrapper table for collapsible sections:
selectVariants(varGroupList, varTrackList);
-boolean gotGP = selectGenes();
-if (gotGP)
+char *geneTrack = selectGenes();
+if (geneTrack != NULL)
{
selectRegulatory();
- selectAnnotations();
+ selectAnnotations(geneTrack);
selectFilters();
selectOutput();
submitAndDisclaimer();
}
printf("");
jsReloadOnBackButton(cart);
webNewSection("Using the Variant Annotation Integrator");
webIncludeHelpFile("hgVaiHelpText", FALSE);
jsIncludeFile("jquery-ui.js", NULL);
jsIncludeFile("hgVarAnnogrator.js", NULL);
jsIncludeFile("ui.dropdownchecklist.js", NULL);
jsIncludeFile("ddcl.js", NULL);
}
@@ -1278,31 +1463,32 @@
slAddHead(pGratorList, grator);
if (vepOut != NULL)
{
char *tableName = tableNameFromSourceName(grator->streamer.name);
char *suffix = NULL;
if (subset == HDIV)
suffix = "HDIV";
else if (subset == HVAR)
suffix = "HVAR";
char *tag = tagFromTableName(tableName, suffix);
if (isEmpty(description))
description = grator->streamer.name;
if (isReg)
annoFormatVepAddRegulatory(vepOut, (struct annoStreamer *)grator, tag, description, column);
else
- annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column);
+ annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column,
+ FALSE);
}
}
INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList)
/* If grator is non-NULL, add it to gratorList. */
{
updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL, FALSE);
}
void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName,
struct annoGrator **pGratorList)
// If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange
// data, so annoFormatVep can tell whether the variant and gpFx are consistent with the
// variant and transcript that dbNsfp used to calculate scores.
{
@@ -2140,63 +2326,204 @@
* (main page will be displayed) */
{
struct trackDb *varTdb = tdbForTrack(database, variantTrack, &fullTrackList);
if (varTdb == NULL)
{
if (isHubTrack(variantTrack))
warn("Can't find hub track '%s'", variantTrack);
else
warn("Can't find tdb for variant track '%s'", variantTrack);
}
else
checkVariantTrack(varTdb);
return varTdb;
}
-static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb)
+static char *gencodeVersionFromTrack(char *track)
+/* If track is a GENCODE table, find and return a pointer to the version at the end;
+ * otherwise return NULL. */
+{
+if (startsWithGencodeGene(track))
+ {
+ char *v = strrchr(track, 'V');
+ return v;
+ }
+return NULL;
+}
+
+static char *gencodeTagTableForTrack(char *db, char *track)
+/* If there is a wgEncodeGencodeTag table that can be associated with track,
+ * return it; otherwise return NULL. */
+{
+struct slName *versionList = getGencodeTagVersions();
+if (startsWithGencodeGene(track))
+ {
+ char *version = gencodeVersionFromTrack(track);
+ if (version != NULL)
+ {
+ char table[PATH_LEN];
+ return cloneString(gencodeTableName("Tag", version, table, sizeof(table)));
+ }
+ }
+else if (sameString(track, "refGene") && refGeneHasGencodeTags(versionList))
+ {
+ char *version = getLatestGencodeVersion(versionList);
+ char table[PATH_LEN];
+ if (hTableExists(db, gencodeTableName("RefSeq", version, table, sizeof(table))))
+ return cloneString(gencodeTableName("Tag", version, table, sizeof(table)));
+ }
+else if (sameString(track, "knownGene") && knownGeneHasGencodeTags(versionList))
+ {
+ if (hTableExists(db, "knownToTag"))
+ return cloneString("knownToTag");
+ }
+return NULL;
+}
+
+static struct joinerDtf *getTxStatusExtras(char *db, char *track)
+// Several kinds of transcript status may be enabled in the cart; if any are enabled,
+// and if they apply to track, return the tables & fields to be joined with the track.
+{
+struct joinerDtf *txStatusExtras = NULL;
+if (cartUsualBoolean(cart, "hgva_txStatus_gencode", FALSE))
+ {
+ char *gencodeTagTable = gencodeTagTableForTrack(db, track);
+ if (gencodeTagTable != NULL)
+ {
+ char *field = "tag";
+ if (sameString("knownToTag", gencodeTagTable))
+ field = "value";
+ slAddHead(&txStatusExtras, joinerDtfNew(db, gencodeTagTable, field));
+ }
+ }
+if (cartUsualBoolean(cart, "hgva_txStatus_knownCanonical", FALSE) &&
+ sameString(track, "knownGene") &&
+ hTableExists(db, "knownCanonical"))
+ {
+ slAddHead(&txStatusExtras, joinerDtfNew(db, "knownCanonical", "transcript"));
+ }
+if (cartUsualBoolean(cart, "hgva_txStatus_refSeqStatus", FALSE) &&
+ sameString(track, "refGene") &&
+ hTableExists(db, "refSeqStatus"))
+ {
+ slAddHead(&txStatusExtras, joinerDtfNew(db, "refSeqStatus", "status"));
+ }
+return txStatusExtras;
+}
+
+static void configAddTableField(struct dyString *dy, char *table, char *field, boolean *pIsFirst)
+/* Add a JSON object with table and (list of one) field. */
+// (with "." prepended to table name
+// because that's the convention for related tables in same db as track):
+{
+if (! *pIsFirst)
+ dyStringAppend(dy, ", ");
+dyStringPrintf(dy, "{ \"table\": \".%s\", \"fields\": [\"%s\"] }", table, field);
+*pIsFirst = FALSE;
+}
+
+
+static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb,
+ struct joinerDtf *txStatusExtras)
/* Add VAI-specific config options, if applicable. */
{
struct jsonElement *config = NULL;
char *track = tdb->track;
+struct dyString *dyConfig = dyStringCreate("{ \"naForMissing\": false,"
+ " \"relatedTables\": [ ");
+boolean isFirst = TRUE;
// If track is sql-based knownGene and we have kgXref, then add kgXref.geneSymbol after
// the columns of knownGene.
-if (sameString(track, "knownGene") && !isCustomTrack(track) && !isHubTrack(track) &&
- !trackDbSetting(tdb, "bigDataUrl"))
- {
- struct sqlConnection *conn = hAllocConn(db);
- if (sqlTableExists(conn, "kgXref"))
+if (sameString(track, "knownGene") &&
+ !isCustomTrack(track) && !isHubTrack(track) &&
+ !trackDbSetting(tdb, "bigDataUrl") &&
+ hTableExists(db, "kgXref"))
{
- char jsonStr[PATH_LEN];
- safef(jsonStr, sizeof(jsonStr),
- "{ \"relatedTables\":"
- " [ { \"table\": \"%s.kgXref\", \"fields\": [\"geneSymbol\"] } ] }",
- db);
- config = jsonParse(jsonStr);
+ configAddTableField(dyConfig, ".kgXref", "geneSymbol", &isFirst);
}
- hFreeConn(&conn);
+struct joinerDtf *txStatDtf;
+for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next)
+ configAddTableField(dyConfig, txStatDtf->table, txStatDtf->field, &isFirst);
+
+// If any of the above apply, close the relatedTables list and config object
+// and parse into jsonElements.
+if (! isFirst)
+ {
+ dyStringAppend(dyConfig, " ] }");
+ config = jsonParse(dyConfig->string);
+ dyStringFree(&dyConfig);
}
return config;
}
static void adjustGpVarOverlapRule(struct annoGrator *gpVarGrator, boolean haveRegulatory)
/* If we're able to detect regulatory elements, and want to keep those annotations, loosen up
* gpVarGrator's overlap rule from the default (must overlap). */
{
if (haveRegulatory && cartUsualBoolean(cart, "hgva_include_regulatory", TRUE))
gpVarGrator->setOverlapRule(gpVarGrator, agoNoConstraint);
}
+static void addTxStatusExtras(struct annoFormatter *vepOut, char *geneTrack,
+ struct annoGrator *gpVarGrator,
+ struct joinerDtf *txStatusExtras)
+/* Given a list of tables and fields that will be joined with geneTrack to provide transcript
+ * status info, configure vepOut to put them in the EXTRAs column. */
+{
+struct joinerDtf *txStatDtf;
+for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next)
+ {
+ char *tag = NULL, *description = NULL;
+ boolean isBoolean = FALSE;
+ if (differentString(txStatDtf->database, database))
+ errAbort("addTxStatusExtras: Expected db=%s in txStatDtf but got %s",
+ database, txStatDtf->database);
+ if ((startsWith(GENCODE_PREFIX"Tag", txStatDtf->table) &&
+ sameString(txStatDtf->field, "tag")) ||
+ (sameString(txStatDtf->table, "knownToTag") &&
+ sameString(txStatDtf->field, "value")))
+ {
+ tag = "GENCODE_TAG";
+ description = "GENCODE tags for the transcript";
+ }
+ else if (sameString(txStatDtf->table, "knownCanonical") &&
+ sameString(txStatDtf->field, "transcript"))
+ {
+ tag = "CANONICAL";
+ description = "If present, the transcript is the 'canonical' transcript of the gene "
+ "(generally the longest isoform of the gene)";
+ isBoolean = TRUE;
+ }
+ else if (sameString(txStatDtf->table, "refSeqStatus") &&
+ sameString(txStatDtf->field, "status"))
+ {
+ tag = "REFSEQ_STATUS";
+ description = "RefSeq status of the transcript";
+ }
+ else
+ {
+ errAbort("addTxStatusExtras: Unrecognized {table,field}: {%s,%s}",
+ txStatDtf->table, txStatDtf->field);
+ }
+ char *column = annoStreamDbColumnNameFromDtf(database, geneTrack, txStatDtf);
+ annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)gpVarGrator,
+ tag, description, column, isBoolean);
+ }
+}
+
void doQuery()
/* Translate simple form inputs into anno* components and execute query. */
{
dyInfo = dyStringNew(0);
char *chrom = NULL;
uint start = 0, end = 0;
if (sameString(regionType, hgvaRegionTypeRange))
getCartPosOrDie(&chrom, &start, &end);
struct annoAssembly *assembly = hAnnoGetAssembly(database);
char *geneTrack = cartString(cart, "hgva_geneTrack");
struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList);
if (geneTdb == NULL)
{
warn("Can't find tdb for gene track %s", geneTrack);
@@ -2226,31 +2553,32 @@
primaryLongLabel = hgvaVariantIdsLabel;
}
else
{
struct trackDb *varTdb = getVariantTrackDb(variantTrack);
if (varTdb == NULL)
{
doUi();
return;
}
primary = hAnnoStreamerFromTrackDb(assembly, varTdb->table, varTdb, chrom, maxVarRows, NULL);
primaryLongLabel = varTdb->longLabel;
}
enum annoGratorOverlap geneOverlapRule = agoMustOverlap;
-struct jsonElement *gpConfig = configForStreamer(database, geneTdb);
+struct joinerDtf *txStatusExtras = getTxStatusExtras(database, geneTrack);
+struct jsonElement *gpConfig = configForStreamer(database, geneTdb, txStatusExtras);
struct annoGrator *gpVarGrator = hAnnoGratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom,
ANNO_NO_LIMIT, primary->asObj,
geneOverlapRule, gpConfig);
setGpVarFuncFilter(gpVarGrator);
// Some grators may be used as both filters and output values. To avoid making
// multiple grators for the same source, hash them by trackName:
struct hash *gratorsByName = hashNew(8);
struct annoGrator *snpGrator = NULL;
char *snpDesc = NULL;
if (cartUsualBoolean(cart, "hgva_rsId", FALSE))
snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc);
// Now construct gratorList in the order in which annoFormatVep wants to see them,
@@ -2259,30 +2587,31 @@
slAddHead(&gratorList, gpVarGrator);
if (snpGrator != NULL)
slAddHead(&gratorList, snpGrator);
// Text or HTML output?
char *outFormat = cartUsualString(cart, "hgva_outFormat", "vepTab");
boolean doHtml = sameString(outFormat, "vepHtml");
// Initialize VEP formatter:
struct annoFormatter *vepOut = annoFormatVepNew("stdout", doHtml,
primary, primaryLongLabel,
(struct annoStreamer *)gpVarGrator,
geneTdb->longLabel,
(struct annoStreamer *)snpGrator,
snpDesc, assembly);
+addTxStatusExtras(vepOut, geneTrack, gpVarGrator, txStatusExtras);
boolean haveRegulatory = FALSE;
addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom, doHtml, &haveRegulatory);
adjustGpVarOverlapRule(gpVarGrator, haveRegulatory);
addFilterTracks(&gratorList, gratorsByName, assembly, chrom);
slReverse(&gratorList);
if (doHtml)
{
webStart(cart, database, "Annotated Variants in VEP/HTML format");
}
else
{
// Undo the htmlPushEarlyHandlers() because after this point they make ugly text: