06845edb9038bb632170891cebcdc8f477f2ea8d
angie
  Fri Dec 4 08:59:38 2015 -0800
Add dbNSFP v3.1a (including VEST scores) to hg38 for hgVai.
Add hgVai options for adding in transcript status info: GENCODE tags
when applicable, knownCanonical for knownGene, refSeqStatus for refGene.
refs #16502, #16503

diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index a1cf465..c4c19a4 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -22,33 +22,37 @@
 #include "textOut.h"
 #include "trackHub.h"
 #include "hubConnect.h"
 #include "twoBit.h"
 #include "gpFx.h"
 #include "bigGenePred.h"
 #include "udc.h"
 #include "knetUdc.h"
 #include "md5.h"
 #include "regexHelper.h"
 #include "hAnno.h"
 #include "annoGratorQuery.h"
 #include "annoGratorGpVar.h"
 #include "annoFormatVep.h"
 #include "annoStreamBigBed.h"
+#include "annoStreamDb.h"
 
 #include "libifyMe.h"
 
+#define GENCODE_TAG_DOC_URL "\"http://www.gencodegenes.org/gencode_tags.html\""
+#define REFSEQ_STATUS_DOC_URL "\"http://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_status_codes\""
+
 /* Global Variables */
 struct cart *cart;		/* CGI and other variables */
 struct hash *oldVars = NULL;	/* The cart before new cgi stuff added. */
 char *genome = NULL;		/* Name of genome - mouse, human, etc. */
 char *database = NULL;		/* Current genome database - hg17, mm5, etc. */
 char *regionType = NULL;	/* genome, ENCODE pilot regions, or specific position range. */
 struct grp *fullGroupList = NULL;	/* List of all groups. */
 struct trackDb *fullTrackList = NULL;	/* List of all tracks in database. */
 static struct pipeline *compressPipeline = (struct pipeline *)NULL;
 
 
 // Null terminated list of CGI Variables we don't want to save permanently:
 char *excludeVars[] = {"Submit", "submit", "hgva_startQuery", NULL,};
 
 #define hgvaRange "position"
@@ -473,67 +477,71 @@
 char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000");
 char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" };
 char *limitValues[] = { "10", "100", "1000", "10000", "100000" };
 cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels),
 			curLimit);
 printCtAndHubButtons();
 puts("<BR>");
 }
 
 boolean isGeneTrack(struct trackDb *tdb, void *filterData)
 /* This is a TdbFilterFunction to get genePred tracks. */
 {
 return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type));
 }
 
-boolean selectGenes()
-/* Let user select a gene predictions track; return FALSE if there are no genePred tracks. */
+char *selectGenes()
+/* Let user select a gene predictions track; return NULL if there are no genePred tracks. */
 {
 struct slRef *trackRefList = NULL;
 tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList);
 boolean gotGP = (trackRefList != NULL);
 if (!gotGP)
     warn("This assembly (%s) has no gene prediction tracks, "
 	 "so the VAI will not be able to annotate it.", database);
 printf("<div class='sectionLiteHeader'>Select Genes</div>\n");
 if (gotGP)
     printf("The gene predictions selected here will be used ");
 else
     printf("Gene predictions are required in order ");
 printf("to determine the effect of "
        "each variant on genes, for example intronic, missense, splice site, intergenic etc.");
 if (!gotGP)
     printf(" Since this assembly has no gene prediction tracks, "
 	   "the VAI can't provide functional annotations. "
 	   "Please select a different genome.<BR>");
 printf("<BR>\n");
-char *selected = cartUsualString(cart, "hgva_geneTrack", ""); //#*** per-db cart vars??
+if (! gotGP)
+    return NULL;
+char *firstTrack = ((struct trackDb *)(trackRefList->val))->track;
+char *selected = cartUsualString(cart, "hgva_geneTrack", firstTrack);
 //#*** should show more info about each track... button to pop up track desc?
 
 if (gotGP)
     {
-    printf("<SELECT ID='hgva_geneTrack' NAME='hgva_geneTrack'>\n");
+    printf("<SELECT ID='hgva_geneTrack' NAME='hgva_geneTrack' "
+           "onchange=\"hgva.changeGeneSource();\">\n");
     struct slRef *ref;
     for (ref = trackRefList;  ref != NULL;  ref = ref->next)
 	{
 	struct trackDb *tdb = ref->val;
     if (tdb->subtracks == NULL)
 	printOption(tdb->track, selected, tdb->longLabel);
 	}
     puts("</SELECT><BR>");
     }
-return gotGP;
+return selected;
 }
 
 //#*** We really need a dbNsfp.[ch]:
 enum PolyPhen2Subset { noSubset, HDIV, HVAR };
 
 char *formatDesc(char *url, char *name, char *details, boolean doHtml)
 /* Return a description with URL for name plus extra details.  If doHtml,
  * wrap URL in <A ...>...</A>. */
 {
 char desc[1024];
 if (doHtml)
     safef(desc, sizeof(desc), "<A HREF='%s' TARGET=_BLANK>%s</A> %s",
 	  url, name, details);
 else
     safef(desc, sizeof(desc), "(%s) %s %s",
@@ -560,30 +568,34 @@
     else
 	errAbort("dbNsfpDescFromTableName: invalid PolyPhen2 subset type (%d)", subset);
     }
 else if (sameString(tableName, "dbNsfpMutationTaster"))
 	return formatDesc("http://www.mutationtaster.org/", "MutationTaster",
 			  "(A = disease causing automatic, D = disease causing, "
 			  "N = polymorphism, P = polymorphism automatic)", doHtml);
 else if (sameString(tableName, "dbNsfpMutationAssessor"))
 	return formatDesc("http://mutationassessor.org/", "MutationAssessor",
 			  "(high or medium: predicted functional; "
 			  "low or neutral: predicted non-functional)", doHtml);
 else if (sameString(tableName, "dbNsfpLrt"))
 	return formatDesc("http://www.genetics.wustl.edu/jflab/lrt_query.html",
 			  "Likelihood ratio test (LRT)",
 			  "(D = deleterious, N = Neutral, U = unknown)", doHtml);
+else if (sameString(tableName, "dbNsfpVest"))
+    return formatDesc("http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665549/",
+                      "Variant Effect Scoring Tool (VEST)",
+                      "(scores [0-1] predict confidence that a change is deleterious", doHtml);
 else if (sameString(tableName, "dbNsfpGerpNr"))
 	return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html",
 			  "GERP++", "Neutral Rate (NR)", doHtml);
 else if (sameString(tableName, "dbNsfpGerpRs"))
 	return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html",
 			  "GERP++", "Rejected Substitutions (RS)", doHtml);
 else if (sameString(tableName, "dbNsfpInterPro"))
 	return formatDesc("http://www.ebi.ac.uk/interpro/", "InterPro", "protein domains", doHtml);
 return NULL;
 }
 
 struct slName *findDbNsfpTables()
 /* See if this database contains dbNSFP tables. */
 {
 if (startsWith(hubTrackPrefix, database))
@@ -609,42 +621,45 @@
 	safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table);
     boolean defaultChecked = (sameString("dbNsfpSift", table) ||
 			      sameString("dbNsfpPolyPhen2", table));
     cartMakeCheckBox(cart, cartVar, defaultChecked);
     printf("%s<BR>\n", description);
     }
 }
 
 void selectDbNsfp(struct slName *dbNsfpTables)
 /* Let user select scores/predicitions from various tools collected by dbNSFP. */
 {
 if (dbNsfpTables == NULL)
     return;
 startCollapsibleSection("dbNsfp", "Database of Non-synonymous Functional Predictions (dbNSFP)",
 			TRUE);
+//#*** hardcoded version info... we need metadata (#11462)
+char *dbNsfpVersion = sameString(database, "hg19") ? "2.0" : "3.1a";
+char *txVersion = sameString(dbNsfpVersion, "2.0") ? "Gencode release 9 (Ensembl 64, Dec. 2011)" :
+                                                     "Gencode release 22 (Ensembl 79, Mar. 2015)";
 printf("<A HREF='https://sites.google.com/site/jpopgen/dbNSFP' TARGET=_BLANK>dbNSFP</A> "
        "(<A HREF='http://onlinelibrary.wiley.com/doi/10.1002/humu.22376/abstract' "
        "TARGET=_BLANK>Liu <em>et al.</em> 2013</A>) "
-       "release 2.0 "
+       "release %s "
        "provides pre-computed scores and predictions of functional significance "
        "from a variety of tools.  Every possible coding change to transcripts in "
- //#*** hardcoded version info... sigh, we need trackDb... or metaDb??
-       "Gencode release 9 (Ensembl 64, Dec. 2011) gene predictions "
+       "%s gene predictions "
        "has been evaluated.  "
        "<em>Note: This may not encompass all transcripts in your "
-       "selected gene set.</em><BR>\n");
-//#*** Another cheap hack: reverse alph order happens to be what we want,
+       "selected gene set.</em><BR>\n", dbNsfpVersion, txVersion);
+//#*** Another cheap hack: reverse alph order happens to be what we want (until VEST??),
 //#*** but priorities would be cleaner:
 slReverse(&dbNsfpTables);
 jsMakeSetClearContainer();
 struct slName *table;
 for (table = dbNsfpTables;  table != NULL;  table = table->next)
     {
     if (sameString(table->name, "dbNsfpPolyPhen2"))
 	{
 	printDbNsfpSource(table->name, HDIV);
 	printDbNsfpSource(table->name, HVAR);
 	}
     else
 	printDbNsfpSource(table->name, 0);
     }
 jsEndContainer();
@@ -722,30 +737,198 @@
 }
 
 void selectDbSnp(boolean gotSnp)
 /* Offer to include rsID (and other fields, or leave that for advanced output??) if available */
 {
 if (!gotSnp)
     return;
 startCollapsibleSection("dbSnp", "Known variation", TRUE);
 cartMakeCheckBox(cart, "hgva_rsId", TRUE);
 printf("Include <A HREF='http://www.ncbi.nlm.nih.gov/projects/SNP/' TARGET=_BLANK>dbSNP</A> "
        "rs# ID if one exists<BR>\n");
 puts("<BR>");
 endCollapsibleSection();
 }
 
+#define GENCODE_PREFIX "wgEncodeGencode"
+
+struct slName *getGencodeTagVersions()
+/* Return a list of version strings from the ends of wgEncodeGencodeTag% tables. */
+{
+static struct slName *tagVersions = NULL;
+if (tagVersions == NULL)
+    {
+    struct sqlConnection *conn = hAllocConn(database);
+    struct slName *tagTables = sqlQuickList(conn,
+                                            NOSQLINJ "show tables like '"GENCODE_PREFIX"Tag%'");
+    int offset = strlen(GENCODE_PREFIX"Tag");
+    struct slName *tt;
+    for (tt = tagTables;  tt != NULL;  tt = tt->next)
+        slAddHead(&tagVersions, slNameNew(tt->name + offset));
+    hFreeConn(&conn);
+    }
+return slNameCloneList(tagVersions);
+}
+
+boolean knownGeneHasGencodeTags()
+/* Return TRUE if this database has knownToTag for knownGene. */
+{
+return hTableExists(database, "knownGene") && hTableExists(database, "knownToTag");
+}
+
+boolean hasGencodeTags()
+/* Return TRUE if GENCODE tags can be associated with some gene track in database. */
+{
+return knownGeneHasGencodeTags() || (getGencodeTagVersions() != NULL);
+}
+
+boolean hasTxStatus()
+/* Return TRUE if any gene track in database has some kind of transcript status info
+ * like knownCanonical, GENCODE tags and/or RefSeq status. */
+{
+if (hasGencodeTags())
+    return TRUE;
+if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical"))
+    return TRUE;
+if (hTableExists(database, "refGene") && hTableExists(database, "refSeqStatus"))
+    return TRUE;
+return FALSE;
+}
+
+char *getLatestGencodeVersion(struct slName *versionList)
+/* Return the numerically largest version found in versionList. */
+{
+int maxVersionNum = -1;
+char *maxVersion = NULL;
+struct slName *version;
+for (version = versionList;  version != NULL;  version = version->next)
+    {
+    int versionNum = atoi(skipToNumeric(version->name));
+    if (versionNum > maxVersionNum)
+        {
+        maxVersionNum = versionNum;
+        maxVersion = version->name;
+        }
+    }
+return cloneString(maxVersion);
+}
+
+INLINE char *gencodeTableName(char *suffix, char *version, char *buf, size_t bufSize)
+/* Write wgEncodeGencode<suffix><version> into buf.  Return buf for convenience. */
+{
+safef(buf, bufSize, GENCODE_PREFIX"%s%s", suffix, version);
+return buf;
+}
+
+boolean refGeneHasGencodeTags(struct slName *gencodeVersionList)
+/* Return TRUE if this database has a wgEncodeGencodeRefSeq table as well as a ...Tag table. */
+{
+char *version = getLatestGencodeVersion(gencodeVersionList);
+char table[PATH_LEN];
+return hTableExists(database, gencodeTableName("RefSeq", version, table, sizeof(table)));
+}
+
+boolean startsWithGencodeGene(char *geneTrack)
+/* Return TRUE if geneTrack starts with wgEncodeGencode{Basic,Comp,PseudoGene}.
+ * (There are other GENCODE genepred tracks that don't have tags associated with them.) */
+{
+return (startsWith(GENCODE_PREFIX"Basic", geneTrack) ||
+        startsWith(GENCODE_PREFIX"Comp", geneTrack) ||
+        startsWith(GENCODE_PREFIX"PseudoGene", geneTrack));
+}
+
+boolean isGencodeWithVersion(char *geneTrack, struct slName *versionList)
+/* Return TRUE if geneTrack looks like a Gencode gene track for a supported version. */
+{
+if (! startsWithGencodeGene(geneTrack))
+    return FALSE;
+struct slName *v;
+for (v = versionList;  v != NULL;  v = v->next)
+    {
+    if (endsWith(geneTrack, v->name))
+        return TRUE;
+    }
+return FALSE;
+}
+
+void selectTxStatus(boolean hasTxStatus, char *geneTrack)
+/* Offer to include transcript status, e.g. whether it is in knownCanonical or has GENCODE tags.
+ * This makes one div per category of txStatus info; each div is visible only if its info is
+ * applicable to the selected gene track.  If no divs are visible, display a message that
+ * there's nothing for the currently selected gene track. */
+{
+if (! hasTxStatus)
+    return;
+startCollapsibleSection("txStatus", "Transcript status", FALSE);
+boolean somethingIsVisible = FALSE;
+if (hasGencodeTags())
+    {
+    struct slName *versionList = getGencodeTagVersions();
+    char *maybeKnownGene = knownGeneHasGencodeTags() ? "knownGene" : "";
+    char *maybeRefGene = refGeneHasGencodeTags(versionList) ? "refGene" : "";
+    char *maybeEnsGene = "";
+    char *versions = "";
+    if (versionList != NULL)
+        {
+        if (hTableExists(database, "ensGene"))
+            maybeEnsGene = "ensGene";
+        versions = slNameListToString(versionList, ' ');
+        }
+    boolean isVisible = (sameString(geneTrack, maybeKnownGene) ||
+                         sameString(geneTrack, maybeEnsGene) ||
+                         sameString(geneTrack, maybeRefGene) ||
+                         isGencodeWithVersion(geneTrack, versionList));
+    somethingIsVisible |= isVisible;
+    printf("<div class=\"txStatus %s %s %s %s\" style=\"display: %s;\">",
+           maybeKnownGene, maybeRefGene, maybeEnsGene, versions,
+           isVisible ? "block" : "none");
+    cartMakeCheckBox(cart, "hgva_txStatus_gencode", FALSE);
+    puts("Include the <A HREF=" GENCODE_TAG_DOC_URL " "
+         "TARGET=_BLANK>GENCODE tags</A> for each transcript (if available).<BR>");
+    puts("</div>");
+    }
+if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical"))
+    {
+    boolean isVisible = sameString(geneTrack, "knownGene");
+    somethingIsVisible |= isVisible;
+    printf("<div class=\"txStatus knownGene\" style=\"display: %s;\">",
+           isVisible ? "block" : "none");
+    cartMakeCheckBox(cart, "hgva_txStatus_knownCanonical", FALSE);
+    puts("Indicate whether each UCSC Genes transcript is 'canonical' (generally the longest "
+         "isoform of a gene).<BR>");
+    puts("</div>");
+    }
+if (hTableExists(database, "refGene") && hTableExists(database, "refSeqStatus"))
+    {
+    boolean isVisible = sameString(geneTrack, "refGene");
+    somethingIsVisible |= isVisible;
+    printf("<div class=\"txStatus refGene\" style=\"display: %s;\">",
+           isVisible ? "block" : "none");
+    cartMakeCheckBox(cart, "hgva_txStatus_refSeqStatus", FALSE);
+    puts("Include the "
+         "<A HREF=" REFSEQ_STATUS_DOC_URL " "
+         "TARGET=_BLANK>RefSeq status</A> of each transcript.<BR>");
+    puts("</div>");
+    }
+printf("<div class=\"noTxStatus\" style=\"display: %s;\">",
+       somethingIsVisible ? "none" : "block");
+puts("No transcript status data are available for the selected gene track.");
+puts("</div>");
+puts("<BR>");
+endCollapsibleSection();
+}
+
 boolean isHg19RegulatoryTrack(struct trackDb *tdb, void *filterData)
 /* For now, just look for a couple specific tracks by tableName. */
 {
 //#*** NEED METADATA
 return (sameString("wgEncodeRegDnaseClusteredV3", tdb->table) ||
 	sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
 }
 
 boolean isHg38RegulatoryTrack(struct trackDb *tdb, void *filterData)
 /* For now, just look for a couple specific tracks by tableName. */
 {
 //#*** NEED METADATA
 return (sameString("wgEncodeRegDnaseClustered", tdb->table) ||
 	sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
 }
@@ -853,46 +1036,48 @@
     for (ref = trackRefList;  ref != NULL;  ref = ref->next)
 	{
 	struct trackDb *tdb = ref->val;
 	char cartVar[512];
 	safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track);
 	cartMakeCheckBox(cart, cartVar, FALSE);
 	struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb);
 	printf("<A HREF=\"%s?%s&g=%s\">%s</A><BR>\n", hgTrackUiName(), cartSidUrlString(cart),
 	       topTdb->track, tdb->longLabel);
 	}
     puts("<BR>");
     endCollapsibleSection();
     }
 }
 
-void selectAnnotations()
+void selectAnnotations(char *geneTrack)
 /* Beyond predictions of protein-coding effect, what other basic data can we integrate? */
 {
 struct slName *dbNsfpTables = findDbNsfpTables();
 boolean gotSnp = findSnpBed4("", NULL, NULL);
 struct slRef *elTrackRefList = NULL, *scoreTrackRefList = NULL;
 findCons(&elTrackRefList, &scoreTrackRefList);
 struct slRef *cosmicTrackRefList = findTrackRefByName("cosmic");
+boolean hasTxStat = hasTxStatus();
 if (dbNsfpTables == NULL && !gotSnp && elTrackRefList == NULL && scoreTrackRefList == NULL &&
-    cosmicTrackRefList == NULL)
+    cosmicTrackRefList == NULL && !hasTxStat)
     return;
 puts("<BR>");
 printf("<div class='sectionLiteHeader'>Select More Annotations (optional)</div>\n");
 // Make wrapper table for collapsible sections:
 puts("<TABLE border=0 cellspacing=5 cellpadding=0 style='padding-left: 10px;'>");
 selectDbNsfp(dbNsfpTables);
+selectTxStatus(hasTxStat, geneTrack);
 selectDbSnp(gotSnp);
 trackCheckBoxSection("Cosmic", "COSMIC", cosmicTrackRefList);
 trackCheckBoxSection("ConsEl", "Conserved elements", elTrackRefList);
 trackCheckBoxSection("ConsScore", "Conservation scores", scoreTrackRefList);
 puts("</TABLE>");
 }
 
 void selectFiltersFunc()
 /* Options to restrict variants based on gene region/soTerm from gpFx */
 {
 startCollapsibleSection("filtersFunc", "Functional role", FALSE);
 printf("Include variants annotated as<BR>\n");
 jsMakeSetClearContainer();
 cartMakeCheckBox(cart, "hgva_include_intergenic", TRUE);
 printf("intergenic<BR>\n");
@@ -1071,35 +1256,35 @@
 boolean alreadyAgreed = cartUsualBoolean(cart, "hgva_agreedToDisclaimer", FALSE);
 printf("<script>\n"
        "$(document).ready(function() { hgva.disclaimer.init(%s, hgva.userClickedAgree); });\n"
        "</script>\n", alreadyAgreed ? "true" : "false");
 addSomeCss();
 printAssemblySection();
 
 /* Check for variant custom tracks.  If there are none, tell user they need to
  * upload at least one. */
 struct slRef *varTrackList = NULL, *varGroupList = NULL;
 tdbFilterGroupTrack(fullTrackList, fullGroupList, isVariantCustomTrack, NULL,
 		    &varGroupList, &varTrackList);
 puts("<BR>");
 // Make wrapper table for collapsible sections:
 selectVariants(varGroupList, varTrackList);
-boolean gotGP = selectGenes();
-if (gotGP)
+char *geneTrack = selectGenes();
+if (geneTrack != NULL)
     {
     selectRegulatory();
-    selectAnnotations();
+    selectAnnotations(geneTrack);
     selectFilters();
     selectOutput();
     submitAndDisclaimer();
     }
 printf("</FORM>");
 
 jsReloadOnBackButton(cart);
 
 webNewSection("Using the Variant Annotation Integrator");
 webIncludeHelpFile("hgVaiHelpText", FALSE);
 jsIncludeFile("jquery-ui.js", NULL);
 jsIncludeFile("hgVarAnnogrator.js", NULL);
 jsIncludeFile("ui.dropdownchecklist.js", NULL);
 jsIncludeFile("ddcl.js", NULL);
 }
@@ -1278,31 +1463,32 @@
 slAddHead(pGratorList, grator);
 if (vepOut != NULL)
     {
     char *tableName = tableNameFromSourceName(grator->streamer.name);
     char *suffix = NULL;
     if (subset == HDIV)
 	suffix = "HDIV";
     else if (subset == HVAR)
 	suffix = "HVAR";
     char *tag = tagFromTableName(tableName, suffix);
     if (isEmpty(description))
 	description = grator->streamer.name;
     if (isReg)
 	annoFormatVepAddRegulatory(vepOut, (struct annoStreamer *)grator, tag, description, column);
     else
-	annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column);
+	annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column,
+                                  FALSE);
     }
 }
 
 INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList)
 /* If grator is non-NULL, add it to gratorList. */
 {
 updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL, FALSE);
 }
 
 void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName,
 			struct annoGrator **pGratorList)
 // If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange
 // data, so annoFormatVep can tell whether the variant and gpFx are consistent with the
 // variant and transcript that dbNsfp used to calculate scores.
 {
@@ -2140,63 +2326,204 @@
  * (main page will be displayed) */
 {
 struct trackDb *varTdb = tdbForTrack(database, variantTrack, &fullTrackList);
 if (varTdb == NULL)
     {
     if (isHubTrack(variantTrack))
 	warn("Can't find hub track '%s'", variantTrack);
     else
 	warn("Can't find tdb for variant track '%s'", variantTrack);
     }
 else
     checkVariantTrack(varTdb);
 return varTdb;
 }
 
-static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb)
+static char *gencodeVersionFromTrack(char *track)
+/* If track is a GENCODE table, find and return a pointer to the version at the end;
+ * otherwise return NULL. */
+{
+if (startsWithGencodeGene(track))
+    {
+    char *v = strrchr(track, 'V');
+    return v;
+    }
+return NULL;
+}
+
+static char *gencodeTagTableForTrack(char *db, char *track)
+/* If there is a wgEncodeGencodeTag<version> table that can be associated with track,
+ * return it; otherwise return NULL. */
+{
+struct slName *versionList = getGencodeTagVersions();
+if (startsWithGencodeGene(track))
+    {
+    char *version = gencodeVersionFromTrack(track);
+    if (version != NULL)
+        {
+        char table[PATH_LEN];
+        return cloneString(gencodeTableName("Tag", version, table, sizeof(table)));
+        }
+    }
+else if (sameString(track, "refGene") && refGeneHasGencodeTags(versionList))
+    {
+    char *version = getLatestGencodeVersion(versionList);
+    char table[PATH_LEN];
+    if (hTableExists(db, gencodeTableName("RefSeq", version, table, sizeof(table))))
+        return cloneString(gencodeTableName("Tag", version, table, sizeof(table)));
+    }
+else if (sameString(track, "knownGene") && knownGeneHasGencodeTags(versionList))
+    {
+    if (hTableExists(db, "knownToTag"))
+        return cloneString("knownToTag");
+    }
+return NULL;
+}
+
+static struct joinerDtf *getTxStatusExtras(char *db, char *track)
+// Several kinds of transcript status may be enabled in the cart; if any are enabled,
+// and if they apply to track, return the tables & fields to be joined with the track.
+{
+struct joinerDtf *txStatusExtras = NULL;
+if (cartUsualBoolean(cart, "hgva_txStatus_gencode", FALSE))
+    {
+    char *gencodeTagTable = gencodeTagTableForTrack(db, track);
+    if (gencodeTagTable != NULL)
+        {
+        char *field = "tag";
+        if (sameString("knownToTag", gencodeTagTable))
+            field = "value";
+        slAddHead(&txStatusExtras, joinerDtfNew(db, gencodeTagTable, field));
+        }
+    }
+if (cartUsualBoolean(cart, "hgva_txStatus_knownCanonical", FALSE) &&
+    sameString(track, "knownGene") &&
+    hTableExists(db, "knownCanonical"))
+    {
+    slAddHead(&txStatusExtras, joinerDtfNew(db, "knownCanonical", "transcript"));
+    }
+if (cartUsualBoolean(cart, "hgva_txStatus_refSeqStatus", FALSE) &&
+    sameString(track, "refGene") &&
+    hTableExists(db, "refSeqStatus"))
+    {
+    slAddHead(&txStatusExtras, joinerDtfNew(db, "refSeqStatus", "status"));
+    }
+return txStatusExtras;
+}
+
+static void configAddTableField(struct dyString *dy, char *table, char *field, boolean *pIsFirst)
+/* Add a JSON object with table and (list of one) field. */
+// (with "." prepended to table name
+// because that's the convention for related tables in same db as track):
+{
+if (! *pIsFirst)
+    dyStringAppend(dy, ", ");
+dyStringPrintf(dy, "{ \"table\": \".%s\", \"fields\": [\"%s\"] }", table, field);
+*pIsFirst = FALSE;
+}
+
+
+static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb,
+                                             struct joinerDtf *txStatusExtras)
 /* Add VAI-specific config options, if applicable. */
 {
 struct jsonElement *config = NULL;
 char *track = tdb->track;
+struct dyString *dyConfig = dyStringCreate("{ \"naForMissing\": false,"
+                                           "  \"relatedTables\": [ ");
+boolean isFirst = TRUE;
 // If track is sql-based knownGene and we have kgXref, then add kgXref.geneSymbol after
 // the columns of knownGene.
-if (sameString(track, "knownGene") && !isCustomTrack(track) && !isHubTrack(track) &&
-    !trackDbSetting(tdb, "bigDataUrl"))
-    {
-    struct sqlConnection *conn = hAllocConn(db);
-    if (sqlTableExists(conn, "kgXref"))
+if (sameString(track, "knownGene") &&
+    !isCustomTrack(track) && !isHubTrack(track) &&
+    !trackDbSetting(tdb, "bigDataUrl") &&
+    hTableExists(db, "kgXref"))
     {
-        char jsonStr[PATH_LEN];
-        safef(jsonStr, sizeof(jsonStr),
-              "{ \"relatedTables\":"
-              "    [ { \"table\": \"%s.kgXref\", \"fields\": [\"geneSymbol\"] } ] }",
-              db);
-        config = jsonParse(jsonStr);
+    configAddTableField(dyConfig, ".kgXref", "geneSymbol", &isFirst);
     }
-    hFreeConn(&conn);
+struct joinerDtf *txStatDtf;
+for (txStatDtf = txStatusExtras;  txStatDtf != NULL;  txStatDtf = txStatDtf->next)
+    configAddTableField(dyConfig, txStatDtf->table, txStatDtf->field, &isFirst);
+
+// If any of the above apply, close the relatedTables list and config object
+// and parse into jsonElements.
+if (! isFirst)
+    {
+    dyStringAppend(dyConfig, " ] }");
+    config = jsonParse(dyConfig->string);
+    dyStringFree(&dyConfig);
     }
 return config;
 }
 
 static void adjustGpVarOverlapRule(struct annoGrator *gpVarGrator, boolean haveRegulatory)
 /* If we're able to detect regulatory elements, and want to keep those annotations, loosen up
  * gpVarGrator's overlap rule from the default (must overlap). */
 {
 if (haveRegulatory && cartUsualBoolean(cart, "hgva_include_regulatory", TRUE))
     gpVarGrator->setOverlapRule(gpVarGrator, agoNoConstraint);
 }
 
+static void addTxStatusExtras(struct annoFormatter *vepOut, char *geneTrack,
+                              struct annoGrator *gpVarGrator,
+                              struct joinerDtf *txStatusExtras)
+/* Given a list of tables and fields that will be joined with geneTrack to provide transcript
+ * status info, configure vepOut to put them in the EXTRAs column. */
+{
+struct joinerDtf *txStatDtf;
+for (txStatDtf = txStatusExtras;  txStatDtf != NULL;  txStatDtf = txStatDtf->next)
+    {
+    char *tag = NULL, *description = NULL;
+    boolean isBoolean = FALSE;
+    if (differentString(txStatDtf->database, database))
+        errAbort("addTxStatusExtras: Expected db=%s in txStatDtf but got %s",
+                 database, txStatDtf->database);
+    if ((startsWith(GENCODE_PREFIX"Tag", txStatDtf->table) &&
+         sameString(txStatDtf->field, "tag")) ||
+        (sameString(txStatDtf->table, "knownToTag") &&
+         sameString(txStatDtf->field, "value")))
+        {
+        tag = "GENCODE_TAG";
+        description = "<A HREF=" GENCODE_TAG_DOC_URL " "
+            "TARGET=_BLANK>GENCODE tags</A> for the transcript";
+        }
+    else if (sameString(txStatDtf->table, "knownCanonical") &&
+             sameString(txStatDtf->field, "transcript"))
+        {
+        tag = "CANONICAL";
+        description = "If present, the transcript is the 'canonical' transcript of the gene "
+            "(generally the longest isoform of the gene)";
+        isBoolean = TRUE;
+        }
+    else if (sameString(txStatDtf->table, "refSeqStatus") &&
+             sameString(txStatDtf->field, "status"))
+        {
+        tag = "REFSEQ_STATUS";
+        description = "<A HREF=" REFSEQ_STATUS_DOC_URL " "
+         "TARGET=_BLANK>RefSeq status</A> of the transcript";
+        }
+    else
+        {
+        errAbort("addTxStatusExtras: Unrecognized {table,field}: {%s,%s}",
+                 txStatDtf->table, txStatDtf->field);
+        }
+    char *column = annoStreamDbColumnNameFromDtf(database, geneTrack, txStatDtf);
+    annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)gpVarGrator,
+                              tag, description, column, isBoolean);
+    }
+}
+
 void doQuery()
 /* Translate simple form inputs into anno* components and execute query. */
 {
 dyInfo = dyStringNew(0);
 char *chrom = NULL;
 uint start = 0, end = 0;
 if (sameString(regionType, hgvaRegionTypeRange))
     getCartPosOrDie(&chrom, &start, &end);
 struct annoAssembly *assembly = hAnnoGetAssembly(database);
 
 char *geneTrack = cartString(cart, "hgva_geneTrack");
 struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList);
 if (geneTdb == NULL)
     {
     warn("Can't find tdb for gene track %s", geneTrack);
@@ -2226,31 +2553,32 @@
     primaryLongLabel = hgvaVariantIdsLabel;
     }
 else
     {
     struct trackDb *varTdb = getVariantTrackDb(variantTrack);
     if (varTdb == NULL)
 	{
 	doUi();
 	return;
 	}
     primary = hAnnoStreamerFromTrackDb(assembly, varTdb->table, varTdb, chrom, maxVarRows, NULL);
     primaryLongLabel = varTdb->longLabel;
     }
 
 enum annoGratorOverlap geneOverlapRule = agoMustOverlap;
-struct jsonElement *gpConfig = configForStreamer(database, geneTdb);
+struct joinerDtf *txStatusExtras = getTxStatusExtras(database, geneTrack);
+struct jsonElement *gpConfig = configForStreamer(database, geneTdb, txStatusExtras);
 struct annoGrator *gpVarGrator = hAnnoGratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom,
                                                         ANNO_NO_LIMIT, primary->asObj,
                                                         geneOverlapRule, gpConfig);
 setGpVarFuncFilter(gpVarGrator);
 
 // Some grators may be used as both filters and output values. To avoid making
 // multiple grators for the same source, hash them by trackName:
 struct hash *gratorsByName = hashNew(8);
 
 struct annoGrator *snpGrator = NULL;
 char *snpDesc = NULL;
 if (cartUsualBoolean(cart, "hgva_rsId", FALSE))
     snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc);
 
 // Now construct gratorList in the order in which annoFormatVep wants to see them,
@@ -2259,30 +2587,31 @@
 slAddHead(&gratorList, gpVarGrator);
 if (snpGrator != NULL)
     slAddHead(&gratorList, snpGrator);
 
 // Text or HTML output?
 char *outFormat = cartUsualString(cart, "hgva_outFormat", "vepTab");
 boolean doHtml = sameString(outFormat, "vepHtml");
 
 // Initialize VEP formatter:
 struct annoFormatter *vepOut = annoFormatVepNew("stdout", doHtml,
 						primary, primaryLongLabel,
 						(struct annoStreamer *)gpVarGrator,
 						geneTdb->longLabel,
 						(struct annoStreamer *)snpGrator,
 						snpDesc, assembly);
+addTxStatusExtras(vepOut, geneTrack, gpVarGrator, txStatusExtras);
 boolean haveRegulatory = FALSE;
 addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom, doHtml, &haveRegulatory);
 adjustGpVarOverlapRule(gpVarGrator, haveRegulatory);
 
 addFilterTracks(&gratorList, gratorsByName, assembly, chrom);
 
 slReverse(&gratorList);
 
 if (doHtml)
     {
     webStart(cart, database, "Annotated Variants in VEP/HTML format");
     }
 else
     {
     // Undo the htmlPushEarlyHandlers() because after this point they make ugly text: