6697f821c6b91c2e0c069bf6b0283adc99eb0201
angie
Sat Jun 8 15:56:04 2013 -0700
Added support for remaining dbNSFP sources; added longLabels to VEP output header lines. refs #6152
diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index 4eeb61c..75ef188 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -362,159 +362,210 @@
"
\n");
char *selected = cartUsualString(cart, "hgva_geneTrack", ""); //#*** per-db cart vars??
//#*** should show more info about each track... button to pop up track desc?
printf("
");
}
-char *dbNsfpDescFromTableName(char *tableName)
+//#*** We really need a dbNsfp.[ch]:
+enum PolyPhen2Subset { noSubset, HDIV, HVAR };
+
+char *formatDesc(char *url, char *name, char *details, boolean doHtml)
+/* Return a description with URL for name plus extra details. If doHtml,
+ * wrap URL in .... */
+{
+char desc[1024];
+if (doHtml)
+ safef(desc, sizeof(desc), "%s %s",
+ url, name, details);
+else
+ safef(desc, sizeof(desc), "(%s) %s %s",
+ url, name, details);
+return cloneString(desc);
+}
+
+char *dbNsfpDescFromTableName(char *tableName, enum PolyPhen2Subset subset, boolean doHtml)
/* Return a description for table name... if these become tracks, use tdb of course. */
{
if (sameString(tableName, "dbNsfpSift"))
- return ""
- "SIFT scores";
+ return formatDesc("http://sift.bii.a-star.edu.sg/", "SIFT",
+ "(D = damaging, T = tolerated)", doHtml);
else if (sameString(tableName, "dbNsfpPolyPhen2"))
- return ""
- "PolyPhen-2 scores and predictions (HDIV, HVAR, UniProt...)";
+ {
+ if (subset == HDIV)
+ return formatDesc("http://genetics.bwh.harvard.edu/pph2/", "PolyPhen-2",
+ "with HumDiv training set "
+ "(D = probably damaging, P = possibly damaging, B = benign)", doHtml);
+ else if (subset == HVAR)
+ return formatDesc("http://genetics.bwh.harvard.edu/pph2/", "PolyPhen-2",
+ "with HumVar training set "
+ "(D = probably damaging, P = possibly damaging, B = benign)", doHtml);
+ else
+ errAbort("dbNsfpDescFromTableName: invalid PolyPhen2 subset type (%d)", subset);
+ }
else if (sameString(tableName, "dbNsfpMutationTaster"))
- return ""
- "MutationTaster scores and predictions";
+ return formatDesc("http://www.mutationtaster.org/", "MutationTaster",
+ "(A = disease causing automatic, D = disease causing, "
+ "N = polymorphism, P = polymorphism automatic)", doHtml);
else if (sameString(tableName, "dbNsfpMutationAssessor"))
- return ""
- "MutationAssessor scores and predictions";
-//else if (sameString(tableName, "dbNsfpGerpNr"))
-// return "GERP++ neutral rate (NR) score";
-//else if (sameString(tableName, "dbNsfpGerpRs"))
-// return "GERP++ Rejected Substitutions (RS) score";
+ return formatDesc("http://mutationassessor.org/", "MutationAssessor",
+ "(high or medium: predicted functional; "
+ "low or neutral: predicted non-functional)", doHtml);
else if (sameString(tableName, "dbNsfpLrt"))
- return ""
- "Likelihood ratio test (LRT) scores";
-else if (sameString(tableName, "dbNsfpInterpro"))
- return "Protein domains from ";
+ return formatDesc("http://www.mutationtaster.org/", "Likelihood ratio test (LRT)",
+ "(D = deleterious, N = Neutral, U = unknown)", doHtml);
+else if (sameString(tableName, "dbNsfpGerpNr"))
+ return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html",
+ "GERP++", "Neutral Rate (NR)", doHtml);
+else if (sameString(tableName, "dbNsfpGerpRs"))
+ return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html",
+ "GERP++", "Rejected Substitutions (RS)", doHtml);
+else if (sameString(tableName, "dbNsfpInterPro"))
+ return formatDesc("http://www.ebi.ac.uk/interpro/", "InterPro", "protein domains", doHtml);
return NULL;
}
struct slName *findDbNsfpTables()
/* See if this database contains dbNSFP tables. */
{
struct sqlConnection *conn = hAllocConn(database);
-struct slName *dbNsfpTables = sqlQuickList(conn, "show tables like 'dbNsfp%'");
+struct slName *dbNsfpTables = sqlQuickList(conn, "NOSQLINJ show tables like 'dbNsfp%'");
hFreeConn(&conn);
return dbNsfpTables;
}
+void printDbNsfpSource(char *table, enum PolyPhen2Subset subset)
+/* If we know what to do with table, make a checkbox with descriptive label. */
+{
+char *description = dbNsfpDescFromTableName(table, subset, TRUE);
+if (description != NULL)
+ {
+ char cartVar[512];
+ if (subset == HDIV)
+ safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s:HDIV", database, table);
+ else if (subset == HVAR)
+ safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s:HVAR", database, table);
+ else
+ safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table);
+ boolean defaultChecked = (sameString("dbNsfpSift", table) ||
+ sameString("dbNsfpPolyPhen2", table));
+ cartMakeCheckBox(cart, cartVar, defaultChecked);
+ printf("%s
\n", description);
+ }
+}
+
void selectDbNsfp(struct slName *dbNsfpTables)
/* Let user select scores/predicitions from various tools collected by dbNSFP. */
{
if (dbNsfpTables == NULL)
return;
-startCollapsibleSection("dbNsfp", "Non-synonymous functional predictions", FALSE);
-printf("The "
- ""
- "Database of Non-Synonymous Functional Predictions (dbNSFP) release 2.0 "
+startCollapsibleSection("dbNsfp", "Database of Non-synonymous Functional Predictions (dbNSFP)",
+ FALSE);
+printf("dbNSFP "
+ "(Liu et al. 2011) "
+ "release 2.0 "
"provides pre-computed scores and predictions of functional significance "
"from a variety of tools. Every possible coding change to transcripts in "
//#*** hardcoded version info... sigh, we need trackDb... or metaDb??
"Gencode release 9 (Ensembl 64, Dec. 2011) gene predictions "
"has been evaluated. "
"Note: This may not encompass all transcripts in your "
"selected gene set.
\n");
//#*** Another cheap hack: reverse alph order happens to be what we want,
//#*** but priorities would be cleaner:
slReverse(&dbNsfpTables);
struct slName *table;
for (table = dbNsfpTables; table != NULL; table = table->next)
{
- char *description = dbNsfpDescFromTableName(table->name);
- if (description != NULL)
+ if (sameString(table->name, "dbNsfpPolyPhen2"))
{
- char cartVar[512];
- safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table->name);
- boolean defaultChecked = (sameString("dbNsfpSift", table->name) ||
- sameString("dbNsfpPolyPhen2", table->name));
- cartMakeCheckBox(cart, cartVar, defaultChecked);
- printf("%s
\n", description);
+ printDbNsfpSource(table->name, HDIV);
+ printDbNsfpSource(table->name, HVAR);
}
+ else
+ printDbNsfpSource(table->name, 0);
}
puts("
");
endCollapsibleSection();
}
boolean findSnpBed4(char *suffix, char **retFileName, struct trackDb **retTdb)
/* If we can find the latest snpNNNsuffix track using 'show tables rlike "regex"',
* or better yet a bigBed file for it (faster),
* set the appropriate ret* and return TRUE, otherwise return FALSE. */
{
if (suffix == NULL)
suffix = "";
char query[64];
sqlSafef(query, sizeof(query), "show tables like 'snp1__%s'", suffix);
struct sqlConnection *conn = hAllocConn(database);
struct slName *snpNNNTables = sqlQuickList(conn, query);
hFreeConn(&conn);
if (snpNNNTables == NULL)
return FALSE;
+boolean foundIt = FALSE;
// Skip to last in list -- highest number (show tables can't use rlike or 'order by'):
struct slName *table = snpNNNTables;
while (table->next != NULL && isdigit(table->next->name[4]) && isdigit(table->next->name[5]))
table = table->next;
// Do we happen to have a bigBed version? Better yet, bed4 only for current uses:
char fileName[HDB_MAX_PATH_STRING];
safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bed4.bb", database, table->name);
if (fileExists(fileName))
{
if (retFileName != NULL)
*retFileName = cloneString(fileName);
- return TRUE;
+ foundIt = TRUE;
}
// Not bed4; try just .bb:
safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bb", database, table->name);
if (fileExists(fileName))
{
if (retFileName != NULL)
*retFileName = cloneString(fileName);
- return TRUE;
+ foundIt = TRUE;
}
+if (foundIt && retTdb == NULL)
+ return TRUE;
struct trackDb *tdb = tdbForTrack(database, table->name, &fullTrackList);
if (tdb != NULL)
{
if (retTdb != NULL)
*retTdb = tdb;
return TRUE;
}
-return FALSE;
+return foundIt;
}
void selectDbSnp(boolean gotSnp)
/* Offer to include rsID (and other fields, or leave that for advanced output??) if available */
{
if (!gotSnp)
return;
startCollapsibleSection("dbSnp", "Known variation", FALSE);
cartMakeCheckBox(cart, "hgva_rsId", TRUE);
-printf("Include dbSNP rs# ID if one exists
\n");
+printf("Include dbSNP "
+ "rs# ID if one exists
\n");
puts("
");
endCollapsibleSection();
}
boolean isConsElTrack(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get "phastConsNwayElements" tracks. */
{
return (startsWith("phastCons", tdb->table) && stringIn("Elements", tdb->table));
}
boolean isConsScoreTrack(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get tracks that start with "phastCons" (no Elements)
* or "phyloP". */
{
return ((startsWith("phastCons", tdb->table) && !stringIn("Elements", tdb->table))
@@ -814,61 +865,276 @@
char query[512];
sqlSafef(query, sizeof(query), "select fileName from %s", table);
char *fileName = sqlQuickString(conn, query);
hFreeConn(&conn);
return fileName;
}
void textOpen()
/* Start serving up plain text, possibly via a pipeline to gzip. */
{
char *fileName = cartUsualString(cart, "hgva_outFile", "");
char *compressType = cartUsualString(cart, "hgva_compressType", textOutCompressGzip);
compressPipeline = textOutInit(fileName, compressType);
}
-struct annoGrator *gratorForSnp(char *suffix, struct annoAssembly *assembly, char *chrom,
- enum annoGratorOverlap overlapRule)
-/* Look up snpNNNsuffix; if we find it, return a grator, otherwise return NULL. */
+void setGpVarFuncFilter(struct annoGrator *gpVarGrator)
+/* Use cart variables to configure gpVarGrator's filtering by functional category. */
+{
+struct annoGratorGpVarFuncFilter aggvFuncFilter;
+ZeroVar(&aggvFuncFilter);
+aggvFuncFilter.intergenic = cartUsualBoolean(cart, "hgva_include_intergenic", FALSE);
+aggvFuncFilter.upDownstream = cartUsualBoolean(cart, "hgva_include_upDownstream", TRUE);
+aggvFuncFilter.utr = cartUsualBoolean(cart, "hgva_include_utr", TRUE);
+aggvFuncFilter.cdsSyn = cartUsualBoolean(cart, "hgva_include_cdsSyn", TRUE);
+aggvFuncFilter.cdsNonSyn = cartUsualBoolean(cart, "hgva_include_cdsNonSyn", TRUE);
+aggvFuncFilter.intron = cartUsualBoolean(cart, "hgva_include_intron", TRUE);
+aggvFuncFilter.splice = cartUsualBoolean(cart, "hgva_include_splice", TRUE);
+aggvFuncFilter.nonCodingExon = cartUsualBoolean(cart, "hgva_include_nonCodingExon", TRUE);
+annoGratorGpVarSetFuncFilter(gpVarGrator, &aggvFuncFilter);
+}
+
+#define NO_MAXROWS 0
+
+struct annoGrator *gratorForSnpBed4(struct hash *gratorsByName, char *suffix,
+ struct annoAssembly *assembly, char *chrom,
+ enum annoGratorOverlap overlapRule,
+ char **retDescription)
+/* Look up snpNNNsuffix; if we find it, return a grator (possibly for a bigBed 4 file),
+ * otherwise return NULL. */
{
char *fileName = NULL;
struct trackDb *tdb = NULL;
if (! findSnpBed4(suffix, &fileName, &tdb))
return NULL;
struct annoGrator *grator = NULL;
-if (fileName != NULL)
+// First look in gratorsByName to see if this grator has already been constructed:
+if (tdb != NULL)
+ {
+ grator = hashFindVal(gratorsByName, tdb->table);
+ if (retDescription)
+ *retDescription = cloneString(tdb->longLabel);
+ }
+if (grator != NULL)
{
- struct annoStreamer *streamer = annoStreamBigBedNew(fileName, assembly, BIGNUM);
- grator = annoGratorNew(streamer);
+ // Set its overlap behavior and we're done.
grator->setOverlapRule(grator, overlapRule);
+ return grator;
+ }
+// If not in gratorsByName, then attempt to construct it here:
+if (fileName != NULL)
+ grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, overlapRule);
+else
+ grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, overlapRule);
+if (grator != NULL)
+ hashAdd(gratorsByName, tdb->table, grator);
+return grator;
+}
+
+char *tableNameFromSourceName(char *sourceName)
+/* Strip sourceName (file path or db.table) to just the basename or table name. */
+{
+char *tableName = cloneString(sourceName);
+char *p = strrchr(tableName, '/');
+if (p != NULL)
+ {
+ // file path; strip to basename
+ char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN];
+ splitPath(tableName, dir, name, extension);
+ safecpy(tableName, strlen(tableName)+1, name);
}
else
{
- grator = gratorFromTrack(assembly, tdb->table, tdb, chrom, BIGNUM, NULL, overlapRule);
+ // database.table -- skip database.
+ p = strchr(tableName, '.');
+ if (p != NULL)
+ tableName = p+1;
}
-return grator;
+return tableName;
}
-void updateGratorLists(struct annoGrator *grator, struct annoGrator **pGratorList,
- struct annoGrator **pFirstExtra)
-/* If grator is non-NULL, add it to gratorList; if *pFirstExtra is NULL, set it to grator. */
+char *tagFromTableName(char *tableName, char *suffix)
+/* Generate a tag for VEP's extras column or VCF's info column. */
{
-if (grator != NULL)
+char *p = strstr(tableName, "dbNsfp");
+if (p != NULL)
+ tableName = p + strlen("dbNsfp");
+int suffixLen = (suffix == NULL) ? 0 : strlen(suffix);
+int tagSize = strlen(tableName) + suffixLen + 1;
+char *tag = cloneStringZ(tableName, tagSize);
+if (isNotEmpty(suffix))
+ safecat(tag, tagSize, suffix);
+touppers(tag);
+return tag;
+}
+
+enum PolyPhen2Subset stripSubsetFromTrackName(char *trackName)
+/* trackName may have a _suffix for a subset of PolyPhen2; convert that to enum
+ * and zero out the suffix so we have the real trackName. */
{
+enum PolyPhen2Subset subset = noSubset;
+char *p = strchr(trackName, ':');
+if (p != NULL)
+ {
+ if (sameString(p+1, "HDIV"))
+ subset = HDIV;
+ else if (sameString(p+1, "HVAR"))
+ subset = HVAR;
+ else
+ errAbort("unrecognized suffix in track_suffix '%s'", trackName);
+ *p = '\0';
+ }
+return subset;
+}
+
+void updateGratorListAndVepExtra(struct annoGrator *grator, struct annoGrator **pGratorList,
+ struct annoFormatter *vepOut, enum PolyPhen2Subset subset,
+ char *column, char *description)
+/* If grator is non-NULL, add it to gratorList and vepOut's list of items for EXTRAs column. */
+{
+if (grator == NULL)
+ return;
slAddHead(pGratorList, grator);
- if (pFirstExtra != NULL && *pFirstExtra == NULL)
- *pFirstExtra = grator;
+if (vepOut != NULL)
+ {
+ char *tableName = tableNameFromSourceName(grator->streamer.name);
+ char *suffix = NULL;
+ if (subset == HDIV)
+ suffix = "HDIV";
+ else if (subset == HVAR)
+ suffix = "HVAR";
+ char *tag = tagFromTableName(tableName, suffix);
+ if (isEmpty(description))
+ description = grator->streamer.name;
+ annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column);
+ }
+}
+
+INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList)
+/* If grator is non-NULL, add it to gratorList. */
+{
+updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL);
+}
+
+void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName,
+ struct annoGrator **pGratorList)
+// If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange
+// data, so annoFormatVep can tell whether the variant and gpFx are consistent with the
+// variant and transcript that dbNsfp used to calculate scores.
+{
+//#*** Yet another place where we need metadata:
+char *seqChangeTable = "dbNsfpSeqChange";
+if (hashFindVal(gratorsByName, seqChangeTable) == NULL)
+ {
+ char *fileName = fileNameFromTable(seqChangeTable);
+ if (fileName == NULL)
+ errAbort("'%s' requested, but I can't find fileName for %s",
+ trackName, seqChangeTable);
+ struct annoGrator *grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS,
+ agoNoConstraint);
+ updateGratorList(grator, pGratorList);
+ hashAdd(gratorsByName, seqChangeTable, grator);
+ }
+}
+
+void addOutputTracks(struct annoGrator **pGratorList, struct hash *gratorsByName,
+ struct annoFormatter *vepOut, struct annoAssembly *assembly, char *chrom)
+// Construct grators for tracks selected to appear in EXTRAS column
+{
+char trackPrefix[128];
+safef(trackPrefix, sizeof(trackPrefix), "hgva_track_%s_", database);
+int trackPrefixLen = strlen(trackPrefix);
+struct slPair *trackVar, *trackVars = cartVarsWithPrefix(cart, trackPrefix);
+for (trackVar = trackVars; trackVar != NULL; trackVar = trackVar->next)
+ {
+ char *val = trackVar->val;
+ if (! (sameWord(val, "on") || atoi(val) > 0))
+ continue;
+ char *trackName = trackVar->name + trackPrefixLen;
+ if (sameString(trackName, "dbNsfpPolyPhen2"))
+ // PolyPhen2 must have a suffix now -- skip obsolete cartVar from existing carts
+ continue;
+ struct annoGrator *grator = hashFindVal(gratorsByName, trackName);
+ if (grator != NULL)
+ // We already have this as a grator:
+ continue;
+ enum PolyPhen2Subset subset = noSubset;
+ char *description = NULL;
+ char *column = NULL;
+ if (startsWith("dbNsfp", trackName))
+ {
+ // trackName for PolyPhen2 has a suffix for subset -- strip it if we find it:
+ subset = stripSubsetFromTrackName(trackName);
+ description = dbNsfpDescFromTableName(trackName, subset, FALSE);
+ addDbNsfpSeqChange(trackName, assembly, gratorsByName, pGratorList);
+ char *fileName = fileNameFromTable(trackName);
+ if (fileName != NULL)
+ grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, agoNoConstraint);
+ }
+ else
+ {
+ struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList);
+ if (tdb != NULL)
+ {
+ grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL,
+ agoNoConstraint);
+ if (grator != NULL)
+ //#*** Need something more sophisticated but this works for our
+ //#*** limited selection of extra tracks:
+ if (asColumnFind(grator->streamer.asObj, "name") != NULL)
+ column = "name";
+ description = tdb->longLabel;
+ }
+ }
+ updateGratorListAndVepExtra(grator, pGratorList, vepOut, subset, column, description);
+ if (grator != NULL)
+ hashAdd(gratorsByName, trackName, grator);
+ }
+}
+
+void addFilterTracks(struct annoGrator **pGratorList, struct hash *gratorsByName,
+ struct annoAssembly *assembly, char *chrom)
+// Add grators for filters (not added to vepOut):
+{
+if (cartUsualBoolean(cart, "hgva_exclude_snpCommon", FALSE))
+ {
+ struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Common", assembly, chrom,
+ agoMustNotOverlap, NULL);
+ updateGratorList(grator, pGratorList);
+ }
+
+if (cartUsualBoolean(cart, "hgva_exclude_snpMult", FALSE))
+ {
+ struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Mult", assembly, chrom,
+ agoMustNotOverlap, NULL);
+ updateGratorList(grator, pGratorList);
+ }
+
+if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE))
+ {
+ char *consElTrack = cartString(cart, "hgva_require_consEl_track");
+ struct annoGrator *grator = hashFindVal(gratorsByName, consElTrack);
+ if (grator == NULL)
+ {
+ struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList);
+ if (tdb != NULL)
+ grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL,
+ agoMustOverlap);
+ updateGratorList(grator, pGratorList);
+ }
+ else
+ grator->setOverlapRule(grator, agoMustOverlap);
}
}
void doQuery()
/* Translate simple form inputs into anno* components and execute query. */
{
char *chrom = NULL;
uint start = 0, end = 0;
if (sameString(regionType, hgvaRegionTypeRange))
{
char *position = cartString(cart, hgvaRange);
if (! parsePosition(position, &chrom, &start, &end))
errAbort("Expected position to be chrom:start-end but got '%s'", position);
}
char *variantTrack = cartString(cart, "hgva_variantTrack");
@@ -882,172 +1148,66 @@
warn("Can't find hub track %s; try the \"check hub\" button in "
"Track Data Hubs.",
variantTrack,
hgHubConnectName(), cartSidUrlString(cart), hgHubConnectCgiDestUrl, cgiScriptName());
else
warn("Can't find tdb for variant track %s", variantTrack);
doUi();
return;
}
checkVariantTrack(varTdb);
int maxVarRows = cartUsualInt(cart, "hgva_variantLimit", 10);
struct annoStreamer *primary = streamerFromTrack(assembly, varTdb->table, varTdb, chrom,
maxVarRows);
struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList);
+if (geneTdb == NULL)
+ {
+ warn("Can't find tdb for gene track %s", geneTrack);
+ doUi();
+ return;
+ }
enum annoGratorOverlap geneOverlapRule = agoMustOverlap;
-struct annoGrator *gpVarGrator = gratorFromTrack(assembly, geneTdb->table, geneTdb, chrom,
- BIGNUM, primary->asObj, geneOverlapRule);
-struct annoGratorGpVarFuncFilter aggvFuncFilter;
-ZeroVar(&aggvFuncFilter);
-aggvFuncFilter.intergenic = cartUsualBoolean(cart, "hgva_include_intergenic", FALSE);
-aggvFuncFilter.upDownstream = cartUsualBoolean(cart, "hgva_include_upDownstream", TRUE);
-aggvFuncFilter.utr = cartUsualBoolean(cart, "hgva_include_utr", TRUE);
-aggvFuncFilter.cdsSyn = cartUsualBoolean(cart, "hgva_include_cdsSyn", TRUE);
-aggvFuncFilter.cdsNonSyn = cartUsualBoolean(cart, "hgva_include_cdsNonSyn", TRUE);
-aggvFuncFilter.intron = cartUsualBoolean(cart, "hgva_include_intron", TRUE);
-aggvFuncFilter.splice = cartUsualBoolean(cart, "hgva_include_splice", TRUE);
-aggvFuncFilter.nonCodingExon = cartUsualBoolean(cart, "hgva_include_nonCodingExon", TRUE);
-annoGratorGpVarSetFuncFilter(gpVarGrator, &aggvFuncFilter);
+struct annoGrator *gpVarGrator = gratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom,
+ NO_MAXROWS, primary->asObj, geneOverlapRule);
+setGpVarFuncFilter(gpVarGrator);
+
+// Some grators may be used as both filters and output values. To avoid making
+// multiple grators for the same source, hash them by trackName:
+struct hash *gratorsByName = hashNew(8);
struct annoGrator *snpGrator = NULL;
+char *snpDesc = NULL;
if (cartBoolean(cart, "hgva_rsId"))
- {
- snpGrator = gratorForSnp("", assembly, chrom, agoNoConstraint);
- }
+ snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc);
// Now construct gratorList in the order in which annoFormatVep wants to see them,
// i.e. first the gpVar, then the snpNNN, then whatever else:
struct annoGrator *gratorList = NULL;
slAddHead(&gratorList, gpVarGrator);
if (snpGrator != NULL)
slAddHead(&gratorList, snpGrator);
-// extraGrators will point to the first grator in gratorList after snpGrator;
-// i.e. first grator that is meant for the EXTRAS column and/or used by a filter
-struct annoGrator *extraGrators = NULL;
-if (cartUsualBoolean(cart, "hgva_exclude_snpCommon", FALSE))
- {
- struct annoGrator *grator = gratorForSnp("Common", assembly, chrom, agoMustNotOverlap);
- updateGratorLists(grator, &gratorList, &extraGrators);
- }
-if (cartUsualBoolean(cart, "hgva_exclude_snpMult", FALSE))
- {
- struct annoGrator *grator = gratorForSnp("Mult", assembly, chrom, agoMustNotOverlap);
- updateGratorLists(grator, &gratorList, &extraGrators);
- }
-// If we're including Conserved Elements as a filter, and it has also been selected as an
-// extra output value, don't make two grators for it:
-char *consElTrack = NULL;
-if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE))
- {
- struct annoGrator *grator = NULL;
- consElTrack = cartString(cart, "hgva_require_consEl_track");
- struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList);
- if (tdb != NULL)
- grator = gratorFromTrack(assembly, tdb->table, tdb, chrom, BIGNUM, NULL, agoMustOverlap);
- updateGratorLists(grator, &gratorList, &extraGrators);
- }
+struct annoFormatter *vepOut = annoFormatVepNew("stdout", primary, varTdb->longLabel,
+ (struct annoStreamer *)gpVarGrator,
+ geneTdb->longLabel,
+ (struct annoStreamer *)snpGrator,
+ snpDesc);
-boolean addedDbNsfpSeqChange = FALSE;
-char trackPrefix[128];
-safef(trackPrefix, sizeof(trackPrefix), "hgva_track_%s_", database);
-int trackPrefixLen = strlen(trackPrefix);
-struct slPair *trackVar, *trackVars = cartVarsWithPrefix(cart, trackPrefix);
-for (trackVar = trackVars; trackVar != NULL; trackVar = trackVar->next)
- {
- char *val = trackVar->val;
- if (! (sameWord(val, "on") || atoi(val) > 0))
- continue;
- char *trackName = trackVar->name + trackPrefixLen;
- if (sameOk(consElTrack, trackName))
- // We already have this as a grator because it's a filter (above):
- continue;
- struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList);
- struct annoGrator *grator = NULL;
- if (tdb != NULL)
- grator = gratorFromTrack(assembly, tdb->table, tdb, chrom, BIGNUM, NULL, agoNoConstraint);
- else if (startsWith("dbNsfp", trackName))
- {
- // If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange
- // data, so annoFormatVep can tell whether the variant and gpFx are consistent with the
- // variant and transcript that dbNsfp used to calculate scores.
- if (!addedDbNsfpSeqChange)
- {
- char *fileName = fileNameFromTable("dbNsfpSeqChange");
- if (fileName == NULL)
- errAbort("'%s' requested, but I can't find fileName for dbNsfpSeqChange",
- trackName);
- struct annoStreamer *streamer = annoStreamBigBedNew(fileName, assembly, BIGNUM);
- grator = annoGratorNew(streamer);
- updateGratorLists(grator, &gratorList, &extraGrators);
- addedDbNsfpSeqChange = TRUE;
- }
- char *fileName = fileNameFromTable(trackName);
- if (fileName != NULL)
- {
- struct annoStreamer *streamer = annoStreamBigBedNew(fileName, assembly, BIGNUM);
- grator = annoGratorNew(streamer);
- }
- }
- updateGratorLists(grator, &gratorList, &extraGrators);
- }
-slReverse(&gratorList);
+addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom);
+addFilterTracks(&gratorList, gratorsByName, assembly, chrom);
-struct annoFormatter *vepOut = annoFormatVepNew("stdout", primary,
- (struct annoStreamer *)gpVarGrator,
- (struct annoStreamer *)snpGrator);
-//#*** can all the extraItem config smarts be pulled inside annoFormatVep?
-struct annoStreamer *extraSource = (struct annoStreamer *)extraGrators;
-for (; extraSource != NULL; extraSource = extraSource->next)
- {
- char *tableName = extraSource->name;
- char *tag = cloneString(tableName);
- char *p = strrchr(tag, '/');
- if (p != NULL)
- {
- // file path; strip to basenaem
- tableName = tag = p+1;
- p = strchr(tag, '.');
- if (p != NULL)
- *p = '\0';
- }
- p = strstr(tag, "dbNsfp");
- if (p != NULL)
- {
- tableName = p;
- tag = p + strlen("dbNsfp");
- }
- else
- {
- // database.table -- skip database.
- p = strchr(tag, '.');
- if (p != NULL)
- tag = p+1;
- }
- touppers(tag);
- char *description = dbNsfpDescFromTableName(tableName);
- if (description == NULL)
- description = extraSource->name;
-//#*** This is where the smarts probably can't all be pulled into annoFormatVep:
- char *column = "ignoredVepKnowsBetterOrOverlapExcludesVariant";
- if (extraSource->rowType == arWords)
- {
- if (asColumnFindIx(extraSource->asObj->columnList, "name") >= 0)
- column = "name";
- }
- annoFormatVepAddExtraItem(vepOut, extraSource, tag, description, column);
- }
+slReverse(&gratorList);
// Undo the htmlPushEarlyHandlers() because after this point they make ugly text:
popWarnHandler();
popAbortHandler();
textOpen();
webStartText();
struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
if (chrom != NULL)
annoGratorQuerySetRegion(query, chrom, start, end);
annoGratorQueryExecute(query);
textOutClose(&compressPipeline);
annoGratorQueryFree(&query);
}