6697f821c6b91c2e0c069bf6b0283adc99eb0201 angie Sat Jun 8 15:56:04 2013 -0700 Added support for remaining dbNSFP sources; added longLabels to VEP output header lines. refs #6152 diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c index 4eeb61c..75ef188 100644 --- src/hg/hgVai/hgVai.c +++ src/hg/hgVai/hgVai.c @@ -362,159 +362,210 @@ "<BR>\n"); char *selected = cartUsualString(cart, "hgva_geneTrack", ""); //#*** per-db cart vars?? //#*** should show more info about each track... button to pop up track desc? printf("<SELECT ID='hgva_geneTrack' NAME='hgva_geneTrack'>\n"); struct slRef *ref; for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; if (tdb->subtracks == NULL) printOption(tdb->track, selected, tdb->longLabel); } puts("</SELECT><BR>"); } -char *dbNsfpDescFromTableName(char *tableName) +//#*** We really need a dbNsfp.[ch]: +enum PolyPhen2Subset { noSubset, HDIV, HVAR }; + +char *formatDesc(char *url, char *name, char *details, boolean doHtml) +/* Return a description with URL for name plus extra details. If doHtml, + * wrap URL in <A ...>...</A>. */ +{ +char desc[1024]; +if (doHtml) + safef(desc, sizeof(desc), "<A HREF='%s' TARGET=_BLANK>%s</A> %s", + url, name, details); +else + safef(desc, sizeof(desc), "(%s) %s %s", + url, name, details); +return cloneString(desc); +} + +char *dbNsfpDescFromTableName(char *tableName, enum PolyPhen2Subset subset, boolean doHtml) /* Return a description for table name... if these become tracks, use tdb of course. */ { if (sameString(tableName, "dbNsfpSift")) - return "<A HREF='http://sift.bii.a-star.edu.sg/' TARGET=_BLANK>" - "SIFT</A> scores"; + return formatDesc("http://sift.bii.a-star.edu.sg/", "SIFT", + "(D = damaging, T = tolerated)", doHtml); else if (sameString(tableName, "dbNsfpPolyPhen2")) - return "<A HREF='http://genetics.bwh.harvard.edu/pph2/' TARGET=_BLANK>" - "PolyPhen-2</A> scores and predictions (HDIV, HVAR, UniProt...)"; + { + if (subset == HDIV) + return formatDesc("http://genetics.bwh.harvard.edu/pph2/", "PolyPhen-2", + "with HumDiv training set " + "(D = probably damaging, P = possibly damaging, B = benign)", doHtml); + else if (subset == HVAR) + return formatDesc("http://genetics.bwh.harvard.edu/pph2/", "PolyPhen-2", + "with HumVar training set " + "(D = probably damaging, P = possibly damaging, B = benign)", doHtml); + else + errAbort("dbNsfpDescFromTableName: invalid PolyPhen2 subset type (%d)", subset); + } else if (sameString(tableName, "dbNsfpMutationTaster")) - return "<A HREF='http://www.mutationtaster.org/' TARGET=_BLANK>" - "MutationTaster</A> scores and predictions"; + return formatDesc("http://www.mutationtaster.org/", "MutationTaster", + "(A = disease causing automatic, D = disease causing, " + "N = polymorphism, P = polymorphism automatic)", doHtml); else if (sameString(tableName, "dbNsfpMutationAssessor")) - return "<A HREF='http://mutationassessor.org/' TARGET=_BLANK>" - "MutationAssessor</A> scores and predictions"; -//else if (sameString(tableName, "dbNsfpGerpNr")) -// return "<A HREF='http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html' " -// "TARGET=_BLANK>GERP++</A> neutral rate (NR) score"; -//else if (sameString(tableName, "dbNsfpGerpRs")) -// return "<A HREF='http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html' " -// "TARGET=_BLANK>GERP++</A> Rejected Substitutions (RS) score"; + return formatDesc("http://mutationassessor.org/", "MutationAssessor", + "(high or medium: predicted functional; " + "low or neutral: predicted non-functional)", doHtml); else if (sameString(tableName, "dbNsfpLrt")) - return "<A HREF='http://www.mutationtaster.org' TARGET=_BLANK>" - "Likelihood ratio test (LRT)</A> scores"; -else if (sameString(tableName, "dbNsfpInterpro")) - return "Protein domains from <A HREF='http://www.ebi.ac.uk/interpro/' TARGET=_BLANK" - "Interpro</A>"; + return formatDesc("http://www.mutationtaster.org/", "Likelihood ratio test (LRT)", + "(D = deleterious, N = Neutral, U = unknown)", doHtml); +else if (sameString(tableName, "dbNsfpGerpNr")) + return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html", + "GERP++", "Neutral Rate (NR)", doHtml); +else if (sameString(tableName, "dbNsfpGerpRs")) + return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html", + "GERP++", "Rejected Substitutions (RS)", doHtml); +else if (sameString(tableName, "dbNsfpInterPro")) + return formatDesc("http://www.ebi.ac.uk/interpro/", "InterPro", "protein domains", doHtml); return NULL; } struct slName *findDbNsfpTables() /* See if this database contains dbNSFP tables. */ { struct sqlConnection *conn = hAllocConn(database); -struct slName *dbNsfpTables = sqlQuickList(conn, "show tables like 'dbNsfp%'"); +struct slName *dbNsfpTables = sqlQuickList(conn, "NOSQLINJ show tables like 'dbNsfp%'"); hFreeConn(&conn); return dbNsfpTables; } +void printDbNsfpSource(char *table, enum PolyPhen2Subset subset) +/* If we know what to do with table, make a checkbox with descriptive label. */ +{ +char *description = dbNsfpDescFromTableName(table, subset, TRUE); +if (description != NULL) + { + char cartVar[512]; + if (subset == HDIV) + safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s:HDIV", database, table); + else if (subset == HVAR) + safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s:HVAR", database, table); + else + safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table); + boolean defaultChecked = (sameString("dbNsfpSift", table) || + sameString("dbNsfpPolyPhen2", table)); + cartMakeCheckBox(cart, cartVar, defaultChecked); + printf("%s<BR>\n", description); + } +} + void selectDbNsfp(struct slName *dbNsfpTables) /* Let user select scores/predicitions from various tools collected by dbNSFP. */ { if (dbNsfpTables == NULL) return; -startCollapsibleSection("dbNsfp", "Non-synonymous functional predictions", FALSE); -printf("The " - "<A HREF='https://sites.google.com/site/jpopgen/dbNSFP' TARGET=_BLANK>" - "Database of Non-Synonymous Functional Predictions (dbNSFP)</A> release 2.0 " +startCollapsibleSection("dbNsfp", "Database of Non-synonymous Functional Predictions (dbNSFP)", + FALSE); +printf("<A HREF='https://sites.google.com/site/jpopgen/dbNSFP' TARGET=_BLANK>dbNSFP</A> " + "(<A HREF='http://onlinelibrary.wiley.com/doi/10.1002/humu.21517/abstract' " + "TARGET=_BLANK>Liu <em>et al.</em> 2011</A>) " + "release 2.0 " "provides pre-computed scores and predictions of functional significance " "from a variety of tools. Every possible coding change to transcripts in " //#*** hardcoded version info... sigh, we need trackDb... or metaDb?? "Gencode release 9 (Ensembl 64, Dec. 2011) gene predictions " "has been evaluated. " "<em>Note: This may not encompass all transcripts in your " "selected gene set.</em><BR>\n"); //#*** Another cheap hack: reverse alph order happens to be what we want, //#*** but priorities would be cleaner: slReverse(&dbNsfpTables); struct slName *table; for (table = dbNsfpTables; table != NULL; table = table->next) { - char *description = dbNsfpDescFromTableName(table->name); - if (description != NULL) + if (sameString(table->name, "dbNsfpPolyPhen2")) { - char cartVar[512]; - safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table->name); - boolean defaultChecked = (sameString("dbNsfpSift", table->name) || - sameString("dbNsfpPolyPhen2", table->name)); - cartMakeCheckBox(cart, cartVar, defaultChecked); - printf("%s<BR>\n", description); + printDbNsfpSource(table->name, HDIV); + printDbNsfpSource(table->name, HVAR); } + else + printDbNsfpSource(table->name, 0); } puts("<BR>"); endCollapsibleSection(); } boolean findSnpBed4(char *suffix, char **retFileName, struct trackDb **retTdb) /* If we can find the latest snpNNNsuffix track using 'show tables rlike "regex"', * or better yet a bigBed file for it (faster), * set the appropriate ret* and return TRUE, otherwise return FALSE. */ { if (suffix == NULL) suffix = ""; char query[64]; sqlSafef(query, sizeof(query), "show tables like 'snp1__%s'", suffix); struct sqlConnection *conn = hAllocConn(database); struct slName *snpNNNTables = sqlQuickList(conn, query); hFreeConn(&conn); if (snpNNNTables == NULL) return FALSE; +boolean foundIt = FALSE; // Skip to last in list -- highest number (show tables can't use rlike or 'order by'): struct slName *table = snpNNNTables; while (table->next != NULL && isdigit(table->next->name[4]) && isdigit(table->next->name[5])) table = table->next; // Do we happen to have a bigBed version? Better yet, bed4 only for current uses: char fileName[HDB_MAX_PATH_STRING]; safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bed4.bb", database, table->name); if (fileExists(fileName)) { if (retFileName != NULL) *retFileName = cloneString(fileName); - return TRUE; + foundIt = TRUE; } // Not bed4; try just .bb: safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bb", database, table->name); if (fileExists(fileName)) { if (retFileName != NULL) *retFileName = cloneString(fileName); - return TRUE; + foundIt = TRUE; } +if (foundIt && retTdb == NULL) + return TRUE; struct trackDb *tdb = tdbForTrack(database, table->name, &fullTrackList); if (tdb != NULL) { if (retTdb != NULL) *retTdb = tdb; return TRUE; } -return FALSE; +return foundIt; } void selectDbSnp(boolean gotSnp) /* Offer to include rsID (and other fields, or leave that for advanced output??) if available */ { if (!gotSnp) return; startCollapsibleSection("dbSnp", "Known variation", FALSE); cartMakeCheckBox(cart, "hgva_rsId", TRUE); -printf("Include dbSNP rs# ID if one exists<BR>\n"); +printf("Include <A HREF='http://www.ncbi.nlm.nih.gov/projects/SNP/' TARGET=_BLANK>dbSNP</A> " + "rs# ID if one exists<BR>\n"); puts("<BR>"); endCollapsibleSection(); } boolean isConsElTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get "phastConsNwayElements" tracks. */ { return (startsWith("phastCons", tdb->table) && stringIn("Elements", tdb->table)); } boolean isConsScoreTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get tracks that start with "phastCons" (no Elements) * or "phyloP". */ { return ((startsWith("phastCons", tdb->table) && !stringIn("Elements", tdb->table)) @@ -814,61 +865,276 @@ char query[512]; sqlSafef(query, sizeof(query), "select fileName from %s", table); char *fileName = sqlQuickString(conn, query); hFreeConn(&conn); return fileName; } void textOpen() /* Start serving up plain text, possibly via a pipeline to gzip. */ { char *fileName = cartUsualString(cart, "hgva_outFile", ""); char *compressType = cartUsualString(cart, "hgva_compressType", textOutCompressGzip); compressPipeline = textOutInit(fileName, compressType); } -struct annoGrator *gratorForSnp(char *suffix, struct annoAssembly *assembly, char *chrom, - enum annoGratorOverlap overlapRule) -/* Look up snpNNNsuffix; if we find it, return a grator, otherwise return NULL. */ +void setGpVarFuncFilter(struct annoGrator *gpVarGrator) +/* Use cart variables to configure gpVarGrator's filtering by functional category. */ +{ +struct annoGratorGpVarFuncFilter aggvFuncFilter; +ZeroVar(&aggvFuncFilter); +aggvFuncFilter.intergenic = cartUsualBoolean(cart, "hgva_include_intergenic", FALSE); +aggvFuncFilter.upDownstream = cartUsualBoolean(cart, "hgva_include_upDownstream", TRUE); +aggvFuncFilter.utr = cartUsualBoolean(cart, "hgva_include_utr", TRUE); +aggvFuncFilter.cdsSyn = cartUsualBoolean(cart, "hgva_include_cdsSyn", TRUE); +aggvFuncFilter.cdsNonSyn = cartUsualBoolean(cart, "hgva_include_cdsNonSyn", TRUE); +aggvFuncFilter.intron = cartUsualBoolean(cart, "hgva_include_intron", TRUE); +aggvFuncFilter.splice = cartUsualBoolean(cart, "hgva_include_splice", TRUE); +aggvFuncFilter.nonCodingExon = cartUsualBoolean(cart, "hgva_include_nonCodingExon", TRUE); +annoGratorGpVarSetFuncFilter(gpVarGrator, &aggvFuncFilter); +} + +#define NO_MAXROWS 0 + +struct annoGrator *gratorForSnpBed4(struct hash *gratorsByName, char *suffix, + struct annoAssembly *assembly, char *chrom, + enum annoGratorOverlap overlapRule, + char **retDescription) +/* Look up snpNNNsuffix; if we find it, return a grator (possibly for a bigBed 4 file), + * otherwise return NULL. */ { char *fileName = NULL; struct trackDb *tdb = NULL; if (! findSnpBed4(suffix, &fileName, &tdb)) return NULL; struct annoGrator *grator = NULL; -if (fileName != NULL) +// First look in gratorsByName to see if this grator has already been constructed: +if (tdb != NULL) + { + grator = hashFindVal(gratorsByName, tdb->table); + if (retDescription) + *retDescription = cloneString(tdb->longLabel); + } +if (grator != NULL) { - struct annoStreamer *streamer = annoStreamBigBedNew(fileName, assembly, BIGNUM); - grator = annoGratorNew(streamer); + // Set its overlap behavior and we're done. grator->setOverlapRule(grator, overlapRule); + return grator; + } +// If not in gratorsByName, then attempt to construct it here: +if (fileName != NULL) + grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, overlapRule); +else + grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, overlapRule); +if (grator != NULL) + hashAdd(gratorsByName, tdb->table, grator); +return grator; +} + +char *tableNameFromSourceName(char *sourceName) +/* Strip sourceName (file path or db.table) to just the basename or table name. */ +{ +char *tableName = cloneString(sourceName); +char *p = strrchr(tableName, '/'); +if (p != NULL) + { + // file path; strip to basename + char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN]; + splitPath(tableName, dir, name, extension); + safecpy(tableName, strlen(tableName)+1, name); } else { - grator = gratorFromTrack(assembly, tdb->table, tdb, chrom, BIGNUM, NULL, overlapRule); + // database.table -- skip database. + p = strchr(tableName, '.'); + if (p != NULL) + tableName = p+1; } -return grator; +return tableName; } -void updateGratorLists(struct annoGrator *grator, struct annoGrator **pGratorList, - struct annoGrator **pFirstExtra) -/* If grator is non-NULL, add it to gratorList; if *pFirstExtra is NULL, set it to grator. */ +char *tagFromTableName(char *tableName, char *suffix) +/* Generate a tag for VEP's extras column or VCF's info column. */ { -if (grator != NULL) +char *p = strstr(tableName, "dbNsfp"); +if (p != NULL) + tableName = p + strlen("dbNsfp"); +int suffixLen = (suffix == NULL) ? 0 : strlen(suffix); +int tagSize = strlen(tableName) + suffixLen + 1; +char *tag = cloneStringZ(tableName, tagSize); +if (isNotEmpty(suffix)) + safecat(tag, tagSize, suffix); +touppers(tag); +return tag; +} + +enum PolyPhen2Subset stripSubsetFromTrackName(char *trackName) +/* trackName may have a _suffix for a subset of PolyPhen2; convert that to enum + * and zero out the suffix so we have the real trackName. */ { +enum PolyPhen2Subset subset = noSubset; +char *p = strchr(trackName, ':'); +if (p != NULL) + { + if (sameString(p+1, "HDIV")) + subset = HDIV; + else if (sameString(p+1, "HVAR")) + subset = HVAR; + else + errAbort("unrecognized suffix in track_suffix '%s'", trackName); + *p = '\0'; + } +return subset; +} + +void updateGratorListAndVepExtra(struct annoGrator *grator, struct annoGrator **pGratorList, + struct annoFormatter *vepOut, enum PolyPhen2Subset subset, + char *column, char *description) +/* If grator is non-NULL, add it to gratorList and vepOut's list of items for EXTRAs column. */ +{ +if (grator == NULL) + return; slAddHead(pGratorList, grator); - if (pFirstExtra != NULL && *pFirstExtra == NULL) - *pFirstExtra = grator; +if (vepOut != NULL) + { + char *tableName = tableNameFromSourceName(grator->streamer.name); + char *suffix = NULL; + if (subset == HDIV) + suffix = "HDIV"; + else if (subset == HVAR) + suffix = "HVAR"; + char *tag = tagFromTableName(tableName, suffix); + if (isEmpty(description)) + description = grator->streamer.name; + annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column); + } +} + +INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList) +/* If grator is non-NULL, add it to gratorList. */ +{ +updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL); +} + +void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName, + struct annoGrator **pGratorList) +// If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange +// data, so annoFormatVep can tell whether the variant and gpFx are consistent with the +// variant and transcript that dbNsfp used to calculate scores. +{ +//#*** Yet another place where we need metadata: +char *seqChangeTable = "dbNsfpSeqChange"; +if (hashFindVal(gratorsByName, seqChangeTable) == NULL) + { + char *fileName = fileNameFromTable(seqChangeTable); + if (fileName == NULL) + errAbort("'%s' requested, but I can't find fileName for %s", + trackName, seqChangeTable); + struct annoGrator *grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, + agoNoConstraint); + updateGratorList(grator, pGratorList); + hashAdd(gratorsByName, seqChangeTable, grator); + } +} + +void addOutputTracks(struct annoGrator **pGratorList, struct hash *gratorsByName, + struct annoFormatter *vepOut, struct annoAssembly *assembly, char *chrom) +// Construct grators for tracks selected to appear in EXTRAS column +{ +char trackPrefix[128]; +safef(trackPrefix, sizeof(trackPrefix), "hgva_track_%s_", database); +int trackPrefixLen = strlen(trackPrefix); +struct slPair *trackVar, *trackVars = cartVarsWithPrefix(cart, trackPrefix); +for (trackVar = trackVars; trackVar != NULL; trackVar = trackVar->next) + { + char *val = trackVar->val; + if (! (sameWord(val, "on") || atoi(val) > 0)) + continue; + char *trackName = trackVar->name + trackPrefixLen; + if (sameString(trackName, "dbNsfpPolyPhen2")) + // PolyPhen2 must have a suffix now -- skip obsolete cartVar from existing carts + continue; + struct annoGrator *grator = hashFindVal(gratorsByName, trackName); + if (grator != NULL) + // We already have this as a grator: + continue; + enum PolyPhen2Subset subset = noSubset; + char *description = NULL; + char *column = NULL; + if (startsWith("dbNsfp", trackName)) + { + // trackName for PolyPhen2 has a suffix for subset -- strip it if we find it: + subset = stripSubsetFromTrackName(trackName); + description = dbNsfpDescFromTableName(trackName, subset, FALSE); + addDbNsfpSeqChange(trackName, assembly, gratorsByName, pGratorList); + char *fileName = fileNameFromTable(trackName); + if (fileName != NULL) + grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, agoNoConstraint); + } + else + { + struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList); + if (tdb != NULL) + { + grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, + agoNoConstraint); + if (grator != NULL) + //#*** Need something more sophisticated but this works for our + //#*** limited selection of extra tracks: + if (asColumnFind(grator->streamer.asObj, "name") != NULL) + column = "name"; + description = tdb->longLabel; + } + } + updateGratorListAndVepExtra(grator, pGratorList, vepOut, subset, column, description); + if (grator != NULL) + hashAdd(gratorsByName, trackName, grator); + } +} + +void addFilterTracks(struct annoGrator **pGratorList, struct hash *gratorsByName, + struct annoAssembly *assembly, char *chrom) +// Add grators for filters (not added to vepOut): +{ +if (cartUsualBoolean(cart, "hgva_exclude_snpCommon", FALSE)) + { + struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Common", assembly, chrom, + agoMustNotOverlap, NULL); + updateGratorList(grator, pGratorList); + } + +if (cartUsualBoolean(cart, "hgva_exclude_snpMult", FALSE)) + { + struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Mult", assembly, chrom, + agoMustNotOverlap, NULL); + updateGratorList(grator, pGratorList); + } + +if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE)) + { + char *consElTrack = cartString(cart, "hgva_require_consEl_track"); + struct annoGrator *grator = hashFindVal(gratorsByName, consElTrack); + if (grator == NULL) + { + struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList); + if (tdb != NULL) + grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, + agoMustOverlap); + updateGratorList(grator, pGratorList); + } + else + grator->setOverlapRule(grator, agoMustOverlap); } } void doQuery() /* Translate simple form inputs into anno* components and execute query. */ { char *chrom = NULL; uint start = 0, end = 0; if (sameString(regionType, hgvaRegionTypeRange)) { char *position = cartString(cart, hgvaRange); if (! parsePosition(position, &chrom, &start, &end)) errAbort("Expected position to be chrom:start-end but got '%s'", position); } char *variantTrack = cartString(cart, "hgva_variantTrack"); @@ -882,172 +1148,66 @@ warn("Can't find hub track %s; try the \"check hub\" button in " "<A href=\"%s?%s&%s=%s\">Track Data Hubs</A>.", variantTrack, hgHubConnectName(), cartSidUrlString(cart), hgHubConnectCgiDestUrl, cgiScriptName()); else warn("Can't find tdb for variant track %s", variantTrack); doUi(); return; } checkVariantTrack(varTdb); int maxVarRows = cartUsualInt(cart, "hgva_variantLimit", 10); struct annoStreamer *primary = streamerFromTrack(assembly, varTdb->table, varTdb, chrom, maxVarRows); struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList); +if (geneTdb == NULL) + { + warn("Can't find tdb for gene track %s", geneTrack); + doUi(); + return; + } enum annoGratorOverlap geneOverlapRule = agoMustOverlap; -struct annoGrator *gpVarGrator = gratorFromTrack(assembly, geneTdb->table, geneTdb, chrom, - BIGNUM, primary->asObj, geneOverlapRule); -struct annoGratorGpVarFuncFilter aggvFuncFilter; -ZeroVar(&aggvFuncFilter); -aggvFuncFilter.intergenic = cartUsualBoolean(cart, "hgva_include_intergenic", FALSE); -aggvFuncFilter.upDownstream = cartUsualBoolean(cart, "hgva_include_upDownstream", TRUE); -aggvFuncFilter.utr = cartUsualBoolean(cart, "hgva_include_utr", TRUE); -aggvFuncFilter.cdsSyn = cartUsualBoolean(cart, "hgva_include_cdsSyn", TRUE); -aggvFuncFilter.cdsNonSyn = cartUsualBoolean(cart, "hgva_include_cdsNonSyn", TRUE); -aggvFuncFilter.intron = cartUsualBoolean(cart, "hgva_include_intron", TRUE); -aggvFuncFilter.splice = cartUsualBoolean(cart, "hgva_include_splice", TRUE); -aggvFuncFilter.nonCodingExon = cartUsualBoolean(cart, "hgva_include_nonCodingExon", TRUE); -annoGratorGpVarSetFuncFilter(gpVarGrator, &aggvFuncFilter); +struct annoGrator *gpVarGrator = gratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom, + NO_MAXROWS, primary->asObj, geneOverlapRule); +setGpVarFuncFilter(gpVarGrator); + +// Some grators may be used as both filters and output values. To avoid making +// multiple grators for the same source, hash them by trackName: +struct hash *gratorsByName = hashNew(8); struct annoGrator *snpGrator = NULL; +char *snpDesc = NULL; if (cartBoolean(cart, "hgva_rsId")) - { - snpGrator = gratorForSnp("", assembly, chrom, agoNoConstraint); - } + snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc); // Now construct gratorList in the order in which annoFormatVep wants to see them, // i.e. first the gpVar, then the snpNNN, then whatever else: struct annoGrator *gratorList = NULL; slAddHead(&gratorList, gpVarGrator); if (snpGrator != NULL) slAddHead(&gratorList, snpGrator); -// extraGrators will point to the first grator in gratorList after snpGrator; -// i.e. first grator that is meant for the EXTRAS column and/or used by a filter -struct annoGrator *extraGrators = NULL; -if (cartUsualBoolean(cart, "hgva_exclude_snpCommon", FALSE)) - { - struct annoGrator *grator = gratorForSnp("Common", assembly, chrom, agoMustNotOverlap); - updateGratorLists(grator, &gratorList, &extraGrators); - } -if (cartUsualBoolean(cart, "hgva_exclude_snpMult", FALSE)) - { - struct annoGrator *grator = gratorForSnp("Mult", assembly, chrom, agoMustNotOverlap); - updateGratorLists(grator, &gratorList, &extraGrators); - } -// If we're including Conserved Elements as a filter, and it has also been selected as an -// extra output value, don't make two grators for it: -char *consElTrack = NULL; -if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE)) - { - struct annoGrator *grator = NULL; - consElTrack = cartString(cart, "hgva_require_consEl_track"); - struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList); - if (tdb != NULL) - grator = gratorFromTrack(assembly, tdb->table, tdb, chrom, BIGNUM, NULL, agoMustOverlap); - updateGratorLists(grator, &gratorList, &extraGrators); - } +struct annoFormatter *vepOut = annoFormatVepNew("stdout", primary, varTdb->longLabel, + (struct annoStreamer *)gpVarGrator, + geneTdb->longLabel, + (struct annoStreamer *)snpGrator, + snpDesc); -boolean addedDbNsfpSeqChange = FALSE; -char trackPrefix[128]; -safef(trackPrefix, sizeof(trackPrefix), "hgva_track_%s_", database); -int trackPrefixLen = strlen(trackPrefix); -struct slPair *trackVar, *trackVars = cartVarsWithPrefix(cart, trackPrefix); -for (trackVar = trackVars; trackVar != NULL; trackVar = trackVar->next) - { - char *val = trackVar->val; - if (! (sameWord(val, "on") || atoi(val) > 0)) - continue; - char *trackName = trackVar->name + trackPrefixLen; - if (sameOk(consElTrack, trackName)) - // We already have this as a grator because it's a filter (above): - continue; - struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList); - struct annoGrator *grator = NULL; - if (tdb != NULL) - grator = gratorFromTrack(assembly, tdb->table, tdb, chrom, BIGNUM, NULL, agoNoConstraint); - else if (startsWith("dbNsfp", trackName)) - { - // If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange - // data, so annoFormatVep can tell whether the variant and gpFx are consistent with the - // variant and transcript that dbNsfp used to calculate scores. - if (!addedDbNsfpSeqChange) - { - char *fileName = fileNameFromTable("dbNsfpSeqChange"); - if (fileName == NULL) - errAbort("'%s' requested, but I can't find fileName for dbNsfpSeqChange", - trackName); - struct annoStreamer *streamer = annoStreamBigBedNew(fileName, assembly, BIGNUM); - grator = annoGratorNew(streamer); - updateGratorLists(grator, &gratorList, &extraGrators); - addedDbNsfpSeqChange = TRUE; - } - char *fileName = fileNameFromTable(trackName); - if (fileName != NULL) - { - struct annoStreamer *streamer = annoStreamBigBedNew(fileName, assembly, BIGNUM); - grator = annoGratorNew(streamer); - } - } - updateGratorLists(grator, &gratorList, &extraGrators); - } -slReverse(&gratorList); +addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom); +addFilterTracks(&gratorList, gratorsByName, assembly, chrom); -struct annoFormatter *vepOut = annoFormatVepNew("stdout", primary, - (struct annoStreamer *)gpVarGrator, - (struct annoStreamer *)snpGrator); -//#*** can all the extraItem config smarts be pulled inside annoFormatVep? -struct annoStreamer *extraSource = (struct annoStreamer *)extraGrators; -for (; extraSource != NULL; extraSource = extraSource->next) - { - char *tableName = extraSource->name; - char *tag = cloneString(tableName); - char *p = strrchr(tag, '/'); - if (p != NULL) - { - // file path; strip to basenaem - tableName = tag = p+1; - p = strchr(tag, '.'); - if (p != NULL) - *p = '\0'; - } - p = strstr(tag, "dbNsfp"); - if (p != NULL) - { - tableName = p; - tag = p + strlen("dbNsfp"); - } - else - { - // database.table -- skip database. - p = strchr(tag, '.'); - if (p != NULL) - tag = p+1; - } - touppers(tag); - char *description = dbNsfpDescFromTableName(tableName); - if (description == NULL) - description = extraSource->name; -//#*** This is where the smarts probably can't all be pulled into annoFormatVep: - char *column = "ignoredVepKnowsBetterOrOverlapExcludesVariant"; - if (extraSource->rowType == arWords) - { - if (asColumnFindIx(extraSource->asObj->columnList, "name") >= 0) - column = "name"; - } - annoFormatVepAddExtraItem(vepOut, extraSource, tag, description, column); - } +slReverse(&gratorList); // Undo the htmlPushEarlyHandlers() because after this point they make ugly text: popWarnHandler(); popAbortHandler(); textOpen(); webStartText(); struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut); if (chrom != NULL) annoGratorQuerySetRegion(query, chrom, start, end); annoGratorQueryExecute(query); textOutClose(&compressPipeline); annoGratorQueryFree(&query); }