",
somethingIsVisible ? "none" : "block");
puts("No transcript status data are available for the selected gene track.");
puts("
");
puts("", refSeqSelected ? "block" : "none");
+boolean hgvsOk = canDoHgvsOut(geneTrack);
+printf("
", hgvsOk ? "block" : "none");
cartMakeCheckBox(cart, "hgva_hgvsG", FALSE);
printf("Include HGVS genomic (g.) terms in output
\n");
cartMakeCheckBox(cart, "hgva_hgvsCN", FALSE);
printf("Include HGVS coding (c.) terms if applicable, otherwise noncoding (n.) terms, in output"
"
\n");
cartMakeCheckBox(cart, "hgva_hgvsP", FALSE);
printf("Include HGVS protein (p.) terms (if applicable) in output
\n");
cartMakeCheckBox(cart, "hgva_hgvsPAddParens", FALSE);
printf("When including HGVS protein (p.) terms, add parentheses around changes to emphasize "
"that they are predictions
\n");
cartMakeCheckBox(cart, "hgva_hgvsBreakDelIns", FALSE);
printf("For variants that involve both a deletion and insertion, "
"including multi-nucleotide variants, "
"include the deleted sequence (e.g. show \"delAGinsTT\" instead of only \"delinsTT\")"
"
\n");
puts("
");
printf("
",
- refSeqSelected ? "none" : "block");
+ hgvsOk ? "none" : "block");
printf("Select RefSeq Genes in the \"Select Genes\" section above "
"in order to make options appear.\n");
puts("
");
puts("
");
endCollapsibleSection();
}
boolean isHg19RegulatoryTrack(struct trackDb *tdb, void *filterData)
/* For now, just look for a couple specific tracks by tableName. */
{
//#*** NEED METADATA
return (sameString("wgEncodeRegDnaseClusteredV3", tdb->table) ||
sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
}
boolean isHg38RegulatoryTrack(struct trackDb *tdb, void *filterData)
/* For now, just look for a couple specific tracks by tableName. */
{
//#*** NEED METADATA
return (sameString("wgEncodeRegDnaseClustered", tdb->table) ||
sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
}
struct slRef *findRegulatoryTracks()
/* Look for the very limited set of Regulation tracks that hgVai offers. */
{
struct slRef *regTrackRefList = NULL;
if (sameString(database, "hg19"))
tdbFilterGroupTrack(fullTrackList, fullGroupList, isHg19RegulatoryTrack,
NULL, NULL, ®TrackRefList);
else if (sameString(database, "hg38"))
tdbFilterGroupTrack(fullTrackList, fullGroupList, isHg38RegulatoryTrack,
NULL, NULL, ®TrackRefList);
return regTrackRefList;
}
void selectRegulatory()
/* If trackRefList is non-NULL, make a section with a checkbox for each track,
* labelled with longLabel, and optionally some filtering options. */
{
struct slRef *trackRefList = findRegulatoryTracks();
if (trackRefList != NULL)
{
puts("
");
printf("\n");
printf("The annotations in this section provide predicted regulatory regions "
"based on various experimental data. "
"When a variant overlaps an annotation selected here, the consequence term "
"
regulatory_region_variant will be assigned. "
"Follow the links to description pages that explain how each dataset was "
"constructed. "
"Some datasets cover a significant portion of the genome and it may be "
"desirable to filter these annotations by cell type and/or score in order "
"to avoid an overabundance of hits. ");
// Use a table with checkboxes in one column and label/other stuff that depends on
// checkbox in other column.
puts("
");
struct slRef *ref;
for (ref = trackRefList; ref != NULL; ref = ref->next)
{
struct trackDb *tdb = ref->val;
char cartVar[512];
safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track);
puts("");
cartMakeCheckBox(cart, cartVar, FALSE);
puts(" | ");
struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb);
printf("%s \n", hgTrackUiName(), cartSidUrlString(cart),
topTdb->track, tdb->longLabel);
printFilterOptions(tdb);
puts(" |
");
}
puts("
");
}
}
boolean isConsElTrack(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get "phastConsNwayElements" tracks. */
{
return (startsWith("phastCons", tdb->table) && stringIn("Elements", tdb->table));
}
boolean isConsScoreTrack(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get tracks that start with "phastCons" (no Elements)
* or "phyloP". */
{
return ((startsWith("phastCons", tdb->table) && !stringIn("Elements", tdb->table))
|| startsWith("phyloP", tdb->table));
}
void findCons(struct slRef **retElTrackRefList, struct slRef **retScoreTrackRefList)
/* See if this database has Conserved Elements and/or Conservation Scores */
{
tdbFilterGroupTrack(fullTrackList, fullGroupList, isConsElTrack,
NULL, NULL, retElTrackRefList);
tdbFilterGroupTrack(fullTrackList, fullGroupList, isConsScoreTrack,
NULL, NULL, retScoreTrackRefList);
}
boolean trackNameMatches(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get track(s) whose tdb->track matches name (filterData). */
{
char *name = filterData;
return sameString(tdb->track, name);
}
struct slRef *findTrackRefByName(char *name)
/* Return a reference to the named track, if found. */
{
struct slRef *trackRefList = NULL;
tdbFilterGroupTrack(fullTrackList, fullGroupList, trackNameMatches, name, NULL, &trackRefList);
return trackRefList;
}
void trackCheckBoxSection(char *sectionSuffix, char *title, struct slRef *trackRefList)
/* If trackRefList is non-NULL, make a collapsible section with a checkbox for each track,
* labelled with longLabel. */
{
if (trackRefList != NULL)
{
startCollapsibleSection(sectionSuffix, title, FALSE);
struct slRef *ref;
for (ref = trackRefList; ref != NULL; ref = ref->next)
{
struct trackDb *tdb = ref->val;
char cartVar[512];
safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track);
cartMakeCheckBox(cart, cartVar, FALSE);
struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb);
printf("
%s\n", hgTrackUiName(), cartSidUrlString(cart),
topTdb->track, tdb->longLabel);
}
puts("
");
endCollapsibleSection();
}
}
void selectAnnotations(char *geneTrack)
/* Beyond predictions of protein-coding effect, what other basic data can we integrate? */
{
struct slName *dbNsfpTables = findDbNsfpTables();
boolean gotSnp = findSnpBed4("", NULL, NULL);
struct slRef *elTrackRefList = NULL, *scoreTrackRefList = NULL;
findCons(&elTrackRefList, &scoreTrackRefList);
struct slRef *cosmicTrackRefList = findTrackRefByName("cosmic");
boolean hasTxStat = hasTxStatus();
if (dbNsfpTables == NULL && !gotSnp && elTrackRefList == NULL && scoreTrackRefList == NULL &&
cosmicTrackRefList == NULL && !hasTxStat)
return;
puts("
");
printf("\n");
// Make wrapper table for collapsible sections:
puts("
");
selectDbNsfp(dbNsfpTables);
selectTxStatus(hasTxStat, geneTrack);
selectHgvsOut(geneTrack);
selectDbSnp(gotSnp);
trackCheckBoxSection("Cosmic", "COSMIC", cosmicTrackRefList);
trackCheckBoxSection("ConsEl", "Conserved elements", elTrackRefList);
trackCheckBoxSection("ConsScore", "Conservation scores", scoreTrackRefList);
puts("
");
}
void selectFiltersFunc()
/* Options to restrict variants based on gene region/soTerm from gpFx */
{
startCollapsibleSection("filtersFunc", "Functional role", FALSE);
printf("Include variants annotated as
\n");
jsMakeSetClearContainer();
cartMakeCheckBox(cart, "hgva_include_intergenic", TRUE);
printf("intergenic
\n");
cartMakeCheckBox(cart, "hgva_include_upDownstream", TRUE);
printf("upstream/downstream of gene
\n");
cartMakeCheckBox(cart, "hgva_include_nmdTranscript", TRUE);
printf("in transcript already subject to nonsense-mediated decay (NMD)
\n");
cartMakeCheckBox(cart, "hgva_include_exonLoss", TRUE);
printf("exon loss caused by deletion
\n");
cartMakeCheckBox(cart, "hgva_include_utr", TRUE);
printf("5' or 3' UTR
\n");
cartMakeCheckBox(cart, "hgva_include_cdsSyn", TRUE);
printf("CDS - synonymous coding change
\n");
cartMakeCheckBox(cart, "hgva_include_cdsNonSyn", TRUE);
printf("CDS - non-synonymous (missense, stop gain/loss, frameshift etc)
\n");
cartMakeCheckBox(cart, "hgva_include_intron", TRUE);
printf("intron
\n");
cartMakeCheckBox(cart, "hgva_include_splice", TRUE);
printf("splice site or splice region
\n");
cartMakeCheckBox(cart, "hgva_include_nonCodingExon", TRUE);
printf("exon of non-coding gene
\n");
cartMakeCheckBox(cart, "hgva_include_noVariation", TRUE);
printf("\"variants\" for which no alternate allele has been observed
\n");
struct slRef *regTrackRefList = findRegulatoryTracks();
if (regTrackRefList != NULL)
{
cartMakeCheckBox(cart, "hgva_include_regulatory", TRUE);
printf("regulatory element (note: these are detected only if one or more tracks "
"are selected in Regulatory regions above)
\n");
}
jsEndContainer();
puts("
");
endCollapsibleSection();
}
void selectFiltersKnownVar()
/* Options to restrict output based on overlap with known variants. */
{
boolean gotCommon = findSnpBed4("Common", NULL, NULL);
boolean gotMult = findSnpBed4("Mult", NULL, NULL);
if (!gotCommon && !gotMult)
return;
startCollapsibleSection("filtersVar", "Known variation", FALSE);
if (gotMult)
{
cartMakeCheckBox(cart, "hgva_include_snpMult", TRUE);
printf("Include variants even if they are co-located with variants that have been mapped to "
"multiple genomic locations by dbSNP
\n");
}
if (gotCommon)
{
cartMakeCheckBox(cart, "hgva_include_snpCommon", TRUE);
printf("Include variants even if they are co-located with \"common\" variants "
"(uniquely mapped variants with global minor allele frequency (MAF) "
"of at least 1%% according to dbSNP)
\n");
}
puts("
");
endCollapsibleSection();
}
void selectFiltersCons()
/* Options to restrict output based on overlap with conserved elements. */
{
struct slRef *elTrackRefList = NULL;
tdbFilterGroupTrack(fullTrackList, fullGroupList, isConsElTrack, NULL, NULL, &elTrackRefList);
if (elTrackRefList == NULL)
return;
startCollapsibleSection("filtersCons", "Conservation", FALSE);
// Use a little table to indent radio buttons (if we do those) past checkbox:
puts("
");
cartMakeCheckBox(cart, "hgva_require_consEl", FALSE);
printf(" | Include only the variants that overlap:");
if (slCount(elTrackRefList) == 1)
{
struct trackDb *tdb = elTrackRefList->val;
printf(" %s \n", tdb->longLabel);
cgiMakeHiddenVar("hgva_require_consEl_track", tdb->track);
puts(" |
");
}
else
{
puts("");
char *selected = cartUsualString(cart, "hgva_require_consEl_track", "");
struct slRef *ref;
for (ref = elTrackRefList; ref != NULL; ref = ref->next)
{
printf("
| ");
struct trackDb *tdb = ref->val;
cgiMakeOnEventRadioButtonWithClass("hgva_require_consEl_track", tdb->track,
sameString(tdb->track, selected),
NULL, "click", "setCheckboxList('hgva_require_consEl', true);");
printf("%s |
\n", tdb->longLabel);
}
puts("");
}
endCollapsibleSection();
}
void selectFilters()
/* Options to restrict output to variants with specific properties */
{
puts("
");
printf("\n");
puts("
");
selectFiltersFunc();
selectFiltersKnownVar();
selectFiltersCons();
puts("
");
}
void selectOutput()
/* Just VEP text for now... should also have VEP HTML with limited # lines too.
* Next: custom track with same subset of info as we would stuff in VEP */
{
puts("
");
printf("\n");
printf("
output format: ");
char *selected = cartUsualString(cart, "hgva_outFormat", "vepTab");
printf("
\n");
char *compressType = cartUsualString(cart, "hgva_compressType", textOutCompressNone);
char *fileName = cartUsualString(cart, "hgva_outFile", "");
printf("
output file: ");
cgiMakeTextVar("hgva_outFile", fileName, 29);
printf(" (leave blank to keep output in browser)
\n");
printf("
file type returned: ");
cgiMakeRadioButton("hgva_compressType", textOutCompressNone,
sameWord(textOutCompressNone, compressType));
printf(" plain text  ");
cgiMakeRadioButton("hgva_compressType", textOutCompressGzip,
sameWord(textOutCompressGzip, compressType));
printf(" gzip compressed (ignored if output file is blank)");
puts("
");
}
void submitAndDisclaimer()
{
puts("
");
puts("This tool is for research use only. While this tool is open to the "
"public, users seeking information about a personal medical or genetic "
"condition are urged to consult with a qualified physician for "
"diagnosis and for answers to personal questions.");
puts("
");
printf("

\n");
printf("
\n");
cgiMakeOnClickButton("subDisclmAgrd","hgva.submitQueryIfDisclaimerAgreed();", "Get results");
puts("
");
}
/*
* When user clicks submit, we need to roll a JSON querySpec from form selections,
* and show data from a submission to hgAnnoGrator. redirect from this CGI?
* or have javascript submit directly?
* * primary: variants, from custom track
* * if there are genes, those w/annoGratorGpVar
* * if there are {dbSNP, dbNsfp, regulatory, cons} selections, grator for each of those
* * vep output config
*
* If we get bold & offer 1000Genomes VCF, will def. need handling of split chroms.
* Are we really going to offer genome-wide in hgVai?
* Up-front limit on #rows of input ?
*
* Eventually, we might want a FormatVep that produces structs that are passed
* forward to multiple output writers... I would want to send it lots of gratorData
* like a formatter, but it would produce rows like an annoGrator.
* Maybe annoGrators should accept a bunch of input rows like formatters?
* or would this grator wrap all the input grators inside?
*/
void doMainPage()
/* Print out initial HTML of control page. */
{
jsInit();
webIncludeResourceFile("jquery-ui.css");
webIncludeResourceFile("ui.dropdownchecklist.css");
boolean alreadyAgreed = cartUsualBoolean(cart, "hgva_agreedToDisclaimer", FALSE);
jsInlineF(
"$(document).ready(function() { hgva.disclaimer.init(%s, hgva.userClickedAgree); });\n"
, alreadyAgreed ? "true" : "false");
addSomeCss();
printAssemblySection();
puts("
");
// Make wrapper table for collapsible sections:
selectVariants();
char *geneTrack = selectGenes();
if (geneTrack != NULL)
{
selectRegulatory();
selectAnnotations(geneTrack);
selectFilters();
selectOutput();
submitAndDisclaimer();
}
printf("");
jsReloadOnBackButton(cart);
webNewSection("Using the Variant Annotation Integrator");
webIncludeHelpFileSubst("hgVaiHelpText", cart, FALSE);
jsIncludeFile("jquery-ui.js", NULL);
jsIncludeFile("hgVarAnnogrator.js", NULL);
jsIncludeFile("ui.dropdownchecklist.js", NULL);
jsIncludeFile("ddcl.js", NULL);
}
void doUi()
/* Set up globals and make web page */
{
cartWebStart(cart, database, "Variant Annotation Integrator");
doMainPage();
cartWebEnd();
/* Save variables. */
cartCheckout(&cart);
}
void checkVariantTrack(struct trackDb *tdb)
/* variantTrack should be either pgSnp or VCF. */
{
if (! sameString(tdb->type, "pgSnp") &&
! startsWith("vcf", tdb->type))
{
errAbort("Expected variant track '%s' to be type pgSnp, vcf or vcfTabix, but it's '%s'",
tdb->track, tdb->type);
}
}
char *fileNameFromTable(char *table)
/* Get fileName from a bigData table (for when we don't have a trackDb, just table). */
{
struct sqlConnection *conn = hAllocConn(database);
char query[512];
sqlSafef(query, sizeof(query), "select fileName from %s", table);
char *fileName = sqlQuickString(conn, query);
hFreeConn(&conn);
char *fileNameRewrite = hReplaceGbdb(fileName);
freez(&fileName);
return fileNameRewrite;
}
void textOpen()
/* Start serving up plain text, possibly via a pipeline to gzip. */
{
char *fileName = textOutSanitizeHttpFileName(cartUsualString(cart, "hgva_outFile", ""));
char *compressType = cartUsualString(cart, "hgva_compressType", textOutCompressGzip);
compressPipeline = textOutInit(fileName, compressType, NULL);
}
void setGpVarFuncFilter(struct annoGrator *gpVarGrator)
/* Use cart variables to configure gpVarGrator's filtering by functional category. */
{
struct annoGratorGpVarFuncFilter aggvFuncFilter;
ZeroVar(&aggvFuncFilter);
aggvFuncFilter.intergenic = cartUsualBoolean(cart, "hgva_include_intergenic", TRUE);
aggvFuncFilter.upDownstream = cartUsualBoolean(cart, "hgva_include_upDownstream", TRUE);
aggvFuncFilter.nmdTranscript = cartUsualBoolean(cart, "hgva_include_nmdTranscript", TRUE);
aggvFuncFilter.exonLoss = cartUsualBoolean(cart, "hgva_include_exonLoss", TRUE);
aggvFuncFilter.utr = cartUsualBoolean(cart, "hgva_include_utr", TRUE);
aggvFuncFilter.cdsSyn = cartUsualBoolean(cart, "hgva_include_cdsSyn", TRUE);
aggvFuncFilter.cdsNonSyn = cartUsualBoolean(cart, "hgva_include_cdsNonSyn", TRUE);
aggvFuncFilter.intron = cartUsualBoolean(cart, "hgva_include_intron", TRUE);
aggvFuncFilter.splice = cartUsualBoolean(cart, "hgva_include_splice", TRUE);
aggvFuncFilter.nonCodingExon = cartUsualBoolean(cart, "hgva_include_nonCodingExon", TRUE);
aggvFuncFilter.noVariation = cartUsualBoolean(cart, "hgva_include_noVariation", TRUE);
annoGratorGpVarSetFuncFilter(gpVarGrator, &aggvFuncFilter);
}
static void setHgvsOutOptions(struct annoGrator *gpVarGrator, char *geneTrack)
/* Use cart variables to configure gpVarGrator's HGVS output. */
{
uint hgvsOutOptions = 0;
-if (sameString(geneTrack, "refGene") || startsWith("ncbiRefSeq", geneTrack))
+if (canDoHgvsOut(geneTrack))
{
if (cartUsualBoolean(cart, "hgva_hgvsG", FALSE))
hgvsOutOptions |= HGVS_OUT_G;
if (cartUsualBoolean(cart, "hgva_hgvsCN", FALSE))
hgvsOutOptions |= HGVS_OUT_CN;
if (cartUsualBoolean(cart, "hgva_hgvsP", FALSE))
hgvsOutOptions |= HGVS_OUT_P;
if (cartUsualBoolean(cart, "hgva_hgvsPAddParens", FALSE))
hgvsOutOptions |= HGVS_OUT_P_ADD_PARENS;
if (cartUsualBoolean(cart, "hgva_hgvsBreakDelIns", FALSE))
hgvsOutOptions |= HGVS_OUT_BREAK_DELINS;
- annoGratorGpVarSetHgvsOutOptions(gpVarGrator, hgvsOutOptions);
}
+annoGratorGpVarSetHgvsOutOptions(gpVarGrator, hgvsOutOptions);
}
struct annoGrator *gratorForSnpBed4(struct hash *gratorsByName, char *suffix,
struct annoAssembly *assembly, char *chrom,
enum annoGratorOverlap overlapRule,
char **retDescription)
/* Look up snpNNNsuffix; if we find it, return a grator (possibly for a bigBed 4 file),
* otherwise return NULL. */
{
char *fileName = NULL;
struct trackDb *tdb = NULL;
if (! findSnpBed4(suffix, &fileName, &tdb))
return NULL;
struct annoGrator *grator = NULL;
// First look in gratorsByName to see if this grator has already been constructed:
if (tdb != NULL)
{
grator = hashFindVal(gratorsByName, tdb->table);
if (retDescription)
*retDescription = cloneString(tdb->longLabel);
}
if (grator != NULL)
{
// Set its overlap behavior and we're done.
grator->setOverlapRule(grator, overlapRule);
return grator;
}
// If not in gratorsByName, then attempt to construct it here:
if (fileName != NULL)
grator = hAnnoGratorFromBigFileUrl(fileName, NULL, assembly, ANNO_NO_LIMIT, overlapRule);
else
grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, ANNO_NO_LIMIT,
NULL, overlapRule, NULL);
if (grator != NULL)
hashAdd(gratorsByName, tdb->table, grator);
return grator;
}
char *tableNameFromSourceName(char *sourceName)
/* Strip sourceName (file path or db.table) to just the basename or table name. */
{
char *tableName = cloneString(sourceName);
char *p = strrchr(tableName, '/');
if (p != NULL)
{
// file path; strip to basename
char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN];
splitPath(tableName, dir, name, extension);
safecpy(tableName, strlen(tableName)+1, name);
}
else
{
// database.table -- skip database.
p = strchr(tableName, '.');
if (p != NULL)
tableName = p+1;
}
return tableName;
}
char *tagFromTableName(char *tableName, char *suffix)
/* Generate a tag for VEP's extras column or VCF's info column. */
{
char *p = strstr(tableName, "dbNsfp");
if (p != NULL)
tableName = p + strlen("dbNsfp");
int suffixLen = (suffix == NULL) ? 0 : strlen(suffix);
int tagSize = strlen(tableName) + suffixLen + 1;
char *tag = cloneStringZ(tableName, tagSize);
if (isNotEmpty(suffix))
safecat(tag, tagSize, suffix);
touppers(tag);
// Some custom shortenings, to avoid very long tag names:
(void)strSwapStrs(tag, tagSize, "POLYPHEN", "PP");
(void)strSwapStrs(tag, tagSize, "MUTATION", "MUT");
(void)strSwapStrs(tag, tagSize, "PHYLOP", "PHP");
(void)strSwapStrs(tag, tagSize, "PHASTCONS", "PHC");
(void)strSwapStrs(tag, tagSize, "ELEMENTS", "EL");
(void)strSwapStrs(tag, tagSize, "PRIMATES", "PRIM");
(void)strSwapStrs(tag, tagSize, "PLACENTAL", "PLAC");
if (regexMatch(tag, "^PH.*[0-9]WAY"))
(void)strSwapStrs(tag, tagSize, "WAY", "W");
(void)strSwapStrs(tag, tagSize, "WGENCODEREGDNASECLUSTERED", "DNASE");
(void)strSwapStrs(tag, tagSize, "WGENCODEREGTFBSCLUSTERED", "TFBS");
return tag;
}
enum PolyPhen2Subset stripSubsetFromTrackName(char *trackName)
/* trackName may have a _suffix for a subset of PolyPhen2; convert that to enum
* and zero out the suffix so we have the real trackName. */
{
enum PolyPhen2Subset subset = noSubset;
char *p = strchr(trackName, ':');
if (p != NULL)
{
if (sameString(p+1, "HDIV"))
subset = HDIV;
else if (sameString(p+1, "HVAR"))
subset = HVAR;
else
errAbort("unrecognized suffix in track_suffix '%s'", trackName);
*p = '\0';
}
return subset;
}
void updateGratorListAndVepExtra(struct annoGrator *grator, struct annoGrator **pGratorList,
struct annoFormatter *vepOut, enum PolyPhen2Subset subset,
char *column, char *description, boolean isReg)
/* If grator is non-NULL, add it to gratorList and vepOut's list of items for EXTRAs column. */
{
if (grator == NULL)
return;
slAddHead(pGratorList, grator);
if (vepOut != NULL)
{
char *tableName = tableNameFromSourceName(grator->streamer.name);
char *suffix = NULL;
if (subset == HDIV)
suffix = "HDIV";
else if (subset == HVAR)
suffix = "HVAR";
char *tag = tagFromTableName(tableName, suffix);
if (isEmpty(description))
description = grator->streamer.name;
if (isReg)
annoFormatVepAddRegulatory(vepOut, (struct annoStreamer *)grator, tag, description, column);
else
annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column,
FALSE);
}
}
INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList)
/* If grator is non-NULL, add it to gratorList. */
{
updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL, FALSE);
}
void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName,
struct annoGrator **pGratorList)
// If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange
// data, so annoFormatVep can tell whether the variant and gpFx are consistent with the
// variant and transcript that dbNsfp used to calculate scores.
{
//#*** Yet another place where we need metadata:
char *seqChangeTable = "dbNsfpSeqChange";
if (hashFindVal(gratorsByName, seqChangeTable) == NULL)
{
char *fileName = fileNameFromTable(seqChangeTable);
if (fileName == NULL)
errAbort("'%s' requested, but I can't find fileName for %s",
trackName, seqChangeTable);
struct annoGrator *grator = hAnnoGratorFromBigFileUrl(fileName, NULL, assembly, ANNO_NO_LIMIT,
agoNoConstraint);
updateGratorList(grator, pGratorList);
hashAdd(gratorsByName, seqChangeTable, grator);
}
}
static struct dyString *dyInfo = NULL;
struct hash *getTrackFilterVars(char *track)
/* Return a hash of filter variable names (cart variable suffixes) to slName lists of values. */
{
char filterPrefix[512];
safef(filterPrefix, sizeof(filterPrefix), "hgva_filter_%s_", track);
struct slPair *filterVars = cartVarsWithPrefix(cart, filterPrefix), *var;
int prefixLen = strlen(filterPrefix);
struct hash *varHash = hashNew(0);
for (var = filterVars; var != NULL; var = var->next)
{
char *varName = var->name+prefixLen;
char *val = var->val;
struct hashEl *hel = hashLookup(varHash, varName);
if (hel != NULL)
slNameAddHead((struct slName **)(&hel->val), val);
else
hashAdd(varHash, varName, slNameNew(val));
}
return varHash;
}
INLINE boolean isNotAll(struct slName *valList)
/* Return TRUE unless valList has one element with name "All" (for multiselects). */
{
if (slCount(valList) == 1 && sameString(valList->name, "All"))
return FALSE;
return TRUE;
}
void factorSourceGratorAddFilter(struct annoGrator *grator, char *name, struct slName *valList)
/* Add filter to factorSource grator. */
//#*** Do these smarts belong here in hgVai? Probably not -- should be an hg/lib module with
//#*** UI/metadata smarts.
{
struct annoStreamer *gStreamer = (struct annoStreamer *)grator;
struct annoFilter *filter = NULL;
if (sameString(name, "name") || sameString(name, "cellType") || sameString(name, "treatment"))
{
if (valList && isNotAll(valList))
filter = annoFilterFromAsColumn(gStreamer->asObj, name, afMatch, valList);
}
else if (sameString(name, "score"))
filter = annoFilterFromAsColumn(gStreamer->asObj, name, afGTE, valList);
else
errAbort("Unrecognized filter name '%s' for %s, type=factorSource", name, gStreamer->name);
if (filter)
gStreamer->addFilters(gStreamer, filter);
}
void bed5AddFilter(struct annoGrator *grator, char *name, struct slName *valList)
/* Add filter to bed 5 grator. */
{
struct annoStreamer *gStreamer = (struct annoStreamer *)grator;
struct annoFilter *filter = NULL;
if (sameString(name, "name"))
{
if (valList && isNotAll(valList))
filter = annoFilterFromAsColumn(gStreamer->asObj, name, afMatch, valList);
}
else if (sameString(name, "score"))
filter = annoFilterFromAsColumn(gStreamer->asObj, name, afGTE, valList);
else
errAbort("Unrecognized filter name '%s' for %s, type=bed 5", name, gStreamer->name);
if (filter)
gStreamer->addFilters(gStreamer, filter);
}
void addFiltersToGrator(struct annoGrator *grator, struct trackDb *tdb)
/* Look for filter variables in the cart and add filters to grator accordingly. */
{
struct hash *varHash = getTrackFilterVars(tdb->track);
struct hashEl *hel, *helList = hashElListHash(varHash);
for (hel = helList; hel != NULL; hel = hel->next)
{
char *filterName = hel->name;
struct slName *valList = hel->val;
//#*** Need a much better way to dispatch...
if (sameString("factorSource", tdb->type))
factorSourceGratorAddFilter(grator, filterName, valList);
else if (startsWith("bed 5", tdb->type))
bed5AddFilter(grator, filterName, valList);
else
dyStringPrintf(dyInfo, "Ignoring %s filter %s\n", tdb->track, filterName);
}
hashFree(&varHash);
}
void addOutputTracks(struct annoGrator **pGratorList, struct hash *gratorsByName,
struct annoFormatter *vepOut, struct annoAssembly *assembly, char *chrom,
boolean doHtml, boolean *retHasRegulatory)
// Construct grators for tracks selected to appear in EXTRAS column
{
boolean includeReg = cartUsualBoolean(cart, "hgva_include_regulatory", TRUE);
boolean haveReg = FALSE;
char trackPrefix[128];
safef(trackPrefix, sizeof(trackPrefix), "hgva_track_%s_", database);
int trackPrefixLen = strlen(trackPrefix);
struct slPair *trackVar, *trackVars = cartVarsWithPrefix(cart, trackPrefix);
for (trackVar = trackVars; trackVar != NULL; trackVar = trackVar->next)
{
char *val = trackVar->val;
if (! (sameWord(val, "on") || atoi(val) > 0))
continue;
char *trackName = trackVar->name + trackPrefixLen;
if (sameString(trackName, "dbNsfpPolyPhen2"))
// PolyPhen2 must have a suffix now -- skip obsolete cartVar from existing carts
continue;
struct annoGrator *grator = hashFindVal(gratorsByName, trackName);
if (grator != NULL)
// We already have this as a grator:
continue;
enum PolyPhen2Subset subset = noSubset;
char *description = NULL;
char *column = NULL;
boolean isReg = FALSE;
if (startsWith("dbNsfp", trackName))
{
// trackName for PolyPhen2 has a suffix for subset -- strip it if we find it:
subset = stripSubsetFromTrackName(trackName);
description = dbNsfpDescFromTableName(trackName, subset, doHtml);
addDbNsfpSeqChange(trackName, assembly, gratorsByName, pGratorList);
char *fileName = fileNameFromTable(trackName);
if (fileName != NULL)
grator = hAnnoGratorFromBigFileUrl(fileName, NULL, assembly, ANNO_NO_LIMIT, agoNoConstraint);
}
else
{
struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList);
if (tdb != NULL)
{
grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, ANNO_NO_LIMIT, NULL,
agoNoConstraint, NULL);
if (grator != NULL)
{
//#*** Need something more sophisticated but this works for our
//#*** limited selection of extra tracks:
if (asColumnFind(grator->streamer.asObj, "name") != NULL)
column = "name";
addFiltersToGrator(grator, tdb);
}
description = tdb->longLabel;
isReg = (includeReg &&
((sameString(database, "hg19") && isHg19RegulatoryTrack(tdb, NULL)) ||
(sameString(database, "hg38") && isHg38RegulatoryTrack(tdb, NULL))));
}
}
haveReg |= isReg;
updateGratorListAndVepExtra(grator, pGratorList, vepOut, subset, column, description, isReg);
if (grator != NULL)
hashAdd(gratorsByName, trackName, grator);
}
if (retHasRegulatory)
*retHasRegulatory = haveReg;
}
void addFilterTracks(struct annoGrator **pGratorList, struct hash *gratorsByName,
struct annoAssembly *assembly, char *chrom)
// Add grators for filters (not added to vepOut):
{
if (!cartUsualBoolean(cart, "hgva_include_snpCommon", TRUE))
{
struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Common", assembly, chrom,
agoMustNotOverlap, NULL);
updateGratorList(grator, pGratorList);
}
if (!cartUsualBoolean(cart, "hgva_include_snpMult", TRUE))
{
struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Mult", assembly, chrom,
agoMustNotOverlap, NULL);
updateGratorList(grator, pGratorList);
}
if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE))
{
char *consElTrack = cartString(cart, "hgva_require_consEl_track");
struct annoGrator *grator = hashFindVal(gratorsByName, consElTrack);
if (grator == NULL)
{
struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList);
if (tdb != NULL)
grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, ANNO_NO_LIMIT, NULL,
agoMustOverlap, NULL);
updateGratorList(grator, pGratorList);
}
else
grator->setOverlapRule(grator, agoMustOverlap);
}
}
static void getCartPosOrDie(char **retChrom, uint *retStart, uint *retEnd)
/* Get chrom:start-end from cart, errAbort if any problems. */
{
char *position = cartString(cart, hgvaRange);
if (! parsePosition(position, retChrom, retStart, retEnd))
errAbort("Expected position to be chrom:start-end but got '%s'", position);
}
static char *sampleVariantsPath(struct annoAssembly *assembly, char *geneTrack)
/* Construct a path for a trash file of contrived example variants for this assembly
* and gene track. */
{
char *chrom = NULL;
uint start = 0, end = 0;
getCartPosOrDie(&chrom, &start, &end);
mkdirTrashDirectory(hgvsTrashSubDir);
struct dyString *dy = dyStringCreate("%s/%s/%s_%s_%s_%u-%u.vcf",
trashDir(), hgvsTrashSubDir, assembly->name, geneTrack,
chrom, start, end);
return dyStringCannibalize(&dy);
}
static char *makeMinimalVcfHeader(char *db, char *headerDefs)
/* Return a string containing a simple no-genotypes VCF header. db must be non-NULL.
* headerDefs can be NULL or empty or line(s) starting with '##' and ending w/'\n' */
{
struct dyString *dy = dyStringCreate("##fileformat=VCFv4.2\n"
"##reference=%s\n"
"%s"
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n",
db, (headerDefs ? headerDefs : ""));
return dyStringCannibalize(&dy);
}
static void writeMinimalVcfHeader(FILE *f, char *db, char *headerDefs)
/* Write header for VCF with no genotype data. db must be non-NULL.
* headerDefs can be NULL or empty or line(s) starting with '##' and ending w/'\n' */
{
char *headerString = makeMinimalVcfHeader(db, headerDefs);
fputs(headerString, f);
freeMem(headerString);
}
static void mutateBase(char *pBase)
/* Change *pBase into a different base. */
{
static char *bases = "ACGT";
char c;
// In real life the probabilities aren't even, but this is easier.
while ((c = bases[(uint)(floor(drand48() * 4.0))]) == *pBase)
;
*pBase = c;
}
static void writeMinimalVcfRow(FILE *f, struct vcfRecord *rec)
/* Write a minimalist VCF row (coords, name, alleles and placeholders). */
{
// VCF columns: #CHROM POS ID REF ALT QUAL FILTER INFO
fprintf(f, "%s\t%u\t%s\t%s\t",
rec->chrom, rec->chromStart+1, rec->name, rec->alleles[0]);
// Comma-separated alt alleles:
int i;
for (i = 1; i < rec->alleleCount; i++)
fprintf(f, "%s%s", (i > 1 ? "," : ""), rec->alleles[i]);
// Placeholder qual, filter, info.
fprintf(f, "\t.\t.\t.\n");
}
static void writeMinimalVcfRowFromBed(FILE *f, struct bed4 *bed, struct dnaSeq *seq, uint seqOffset)
/* Write a minimalist VCF row with a mutation of the reference sequence at the given
* position. */
{
uint indelBase = 0, start = bed->chromStart, end = bed->chromEnd;
if (end != start+1)
{
// VCF treats indels in a special way: pos is the base to the left of the indel,
// and both ref and alt alleles include the base to the left of the indel.
indelBase = 1;
}
// Get reference allele sequence:
uint refAlLen = end - start + indelBase;
char ref[refAlLen+1];
uint seqStart = start - indelBase - seqOffset;
safencpy(ref, sizeof(ref), seq->dna+seqStart, refAlLen);
touppers(ref);
// Alternate allele seq needs extra room in case of single-base insertion:
char alt[refAlLen+2];
alt[0] = ref[0];
alt[1] = '\0';
if (start == end)
// insertion: alt gets extra base
safecpy(alt+1, sizeof(alt)-1, "A");
else if (end == start+1)
// single nucleotide variant
mutateBase(alt);
// else deletion: leave alt truncated after the shared base to left of indel
// VCF columns: #CHROM POS ID REF ALT QUAL FILTER INFO
fprintf(f, "%s\t%u\t%s\t%s\t%s\t.\t.\t.\n",
bed->chrom, start+1-indelBase, bed->name, ref, alt);
}
static void writeArbitraryVariants(FILE *f, struct annoAssembly *assembly)
/* Concoct some variants at an arbitrary position on some assembly sequence.
* We shouldn't ever get here unless the selected geneTrack is empty. */
{
char *chrom = "NULL";
uint start = 0, end = 0;
getCartPosOrDie(&chrom, &start, &end);
struct bed4 *bed = bed4New(chrom, start, start, "ex_ins");
struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, start-1, start+100);
writeMinimalVcfRowFromBed(f, bed, seq, start-1);
dnaSeqFree(&seq);
}
static void addGpFromRow(struct genePred **pGpList, struct annoRow *row,
boolean *pNeedCoding, boolean *pNeedNonCoding, boolean isBig)
/* If row is coding and we need a coding gp, add it to pGpList and update pNeedCoding;
* likewise for noncoding. */
{
struct genePred *gp = isBig ? (struct genePred *)genePredFromBigGenePredRow(row->data) : genePredLoad(row->data);
if (gp->cdsStart != gp->cdsEnd && *pNeedCoding)
{
slAddHead(pGpList, gp);
*pNeedCoding = FALSE;
}
else if (gp->cdsStart == gp->cdsEnd && *pNeedNonCoding)
{
slAddHead(pGpList, gp);
*pNeedNonCoding = FALSE;
}
}
static void addGpFromPos(struct annoStreamer *geneStream, char *chrom, uint start, uint end,
struct genePred **pGpList, boolean *pNeedCoding, boolean *pNeedNonCoding,
struct lm *lm, boolean isBig)
/* Get up to 10 rows from position; if we find one coding and one non-coding gene,
* add them to pGpList and update pNeed* accordingly. */
{
geneStream->setRegion(geneStream, chrom, start, end);
int rowCount = 0;
struct annoRow *row = NULL;
while (rowCount < 10 && (*pNeedCoding || *pNeedNonCoding)
&& (row = geneStream->nextRow(geneStream, NULL, 0, lm)) != NULL)
addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding, isBig);
}
static struct genePred *genesFromPosition(struct annoStreamer *geneStream, boolean isBig,
boolean *retGotCoding, boolean *retGotNonCoding)
/* Try to get a coding and non-coding gene from geneStream at cart position.
* If there are none, try whole chrom; if none there, first in assembly. */
{
struct genePred *gpList = NULL;
struct lm *lm = lmInit(0);
char *chrom = NULL;
uint start = 0, end = 0;
getCartPosOrDie(&chrom, &start, &end);
boolean needCoding = TRUE, needNonCoding = TRUE;
// First, look for both coding and noncoding genes at cart position:
addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm, isBig);
// If that didn't do it, try querying whole chrom
if (needCoding || needNonCoding)
addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig);
// If we still haven't found either coding or non-coding on the cart's current chrom,
// try whole genome:
if (needCoding && needNonCoding)
addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig);
slSort(&gpList, genePredCmp);
lmCleanup(&lm);
if (retGotCoding)
*retGotCoding = !needCoding;
if (retGotNonCoding)
*retGotNonCoding = !needNonCoding;
return gpList;
}
static void addSnv(struct bed4 **pBedList, char *chrom, uint start, char *name)
/* Add a single-nucleotide bed4 item. */
{
slAddHead(pBedList, bed4New(chrom, start, start+1, name));
}
// Stuff within this range (see gpFx.h) is considered upstream/downstream of a transcript:
#define UPSTREAMFUDGE GPRANGE
static struct bed4 *sampleVarBedFromGenePred(struct genePred *gp, struct annoAssembly *assembly,
uint txSeqStart, uint txSeqEnd, boolean exonOnly)
/* Construct imaginary variants that hit various parts of the transcript
* described in gp, using reference base values from assembly. */
{
uint basesLeft = gp->txStart - txSeqStart, basesRight = txSeqEnd - gp->txEnd;
struct bed4 *bedList = NULL;
if (!exonOnly && basesLeft > UPSTREAMFUDGE)
// SNV, intergenic to the left
addSnv(&bedList, gp->chrom, txSeqStart, "ex_igLeft");
if (!exonOnly && basesLeft > 0)
// SNV, up/downstream to the left
addSnv(&bedList, gp->chrom, gp->txStart - 1, "ex_updownLeft");
if (!exonOnly && gp->exonCount > 1)
{
// SNV, intron
uint start = (gp->exonEnds[0] + gp->exonStarts[1]) / 2;
addSnv(&bedList, gp->chrom, start, "ex_intron");
// SNV, splice
addSnv(&bedList, gp->chrom, gp->exonStarts[1] - 2, "ex_splice");
}
boolean isCoding = (gp->cdsStart != gp->cdsEnd);
if (isCoding)
{
if (gp->txStart < gp->cdsStart)
// SNV, UTR to the left
addSnv(&bedList, gp->chrom, gp->txStart + 1, "ex_utrLeft");
int cdsExon = 0;
while (gp->cdsStart > gp->exonEnds[cdsExon] && cdsExon < gp->exonCount)
cdsExon++;
uint start = gp->cdsStart + 2;
// single-base insertion, leftmost codon
slAddHead(&bedList, bed4New(gp->chrom, start, start, "ex_codonLeftIns"));
// 3-base deletion leftmost codon
slAddHead(&bedList, bed4New(gp->chrom, start, start+3, "ex_codonLeftDel"));
// SNV, leftmost codon
addSnv(&bedList, gp->chrom, start+1, "ex_codonLeft");
if (gp->txEnd > gp->cdsEnd)
// SNV, UTR to the right
addSnv(&bedList, gp->chrom, gp->txEnd - 1, "ex_utrRight");
}
else
{
// noncoding exon variant
uint start = (gp->exonStarts[0] + gp->exonEnds[0]) / 2;
addSnv(&bedList, gp->chrom, start, "ex_nce");
}
if (!exonOnly && basesRight > 0)
// SNV, up/downstream to the left
addSnv(&bedList, gp->chrom, gp->txEnd + 1, "ex_updownRight");
return bedList;
}
// margin for intergenic variants around transcript:
#define IGFUDGE 5
static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly,
struct trackDb *geneTdb, int maxOutRows)
/* Construct a VCF file of example variants for db (unless it already exists)
* and return an annoStreamer for that file. If possible, make the variants hit a gene. */
{
char *sampleFile = sampleVariantsPath(assembly, geneTdb->track);
boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE);
if (! fileExists(sampleFile) || forceRebuild)
{
struct annoStreamer *geneStream = hAnnoStreamerFromTrackDb(assembly, geneTdb->table, geneTdb,
NULL, ANNO_NO_LIMIT, NULL);
boolean isBig = sameString(geneTdb->type, "bigGenePred");
boolean gotCoding = FALSE, gotNonCoding = FALSE;
struct genePred *gpList = genesFromPosition(geneStream, isBig, &gotCoding, &gotNonCoding);
FILE *f = mustOpen(sampleFile, "w");
writeMinimalVcfHeader(f, assembly->name, NULL);
if (gpList == NULL)
{
warn("Unable to find any gene transcripts in '%s' (%s)",
geneTdb->shortLabel, geneTdb->track);
writeArbitraryVariants(f, assembly);
}
else
{
struct bed4 *bedList = NULL;
uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom);
uint minSeqStart = chromSize, maxSeqEnd = 0;
struct genePred *gp;
for (gp = gpList; gp != NULL; gp = gp->next)
{
int txSeqStart = gp->txStart - (UPSTREAMFUDGE + IGFUDGE);
uint txSeqEnd = gp->txEnd + UPSTREAMFUDGE + IGFUDGE;
if (txSeqStart < 0)
txSeqStart = 0;
if (txSeqEnd > chromSize)
txSeqEnd = chromSize;
if (txSeqStart < minSeqStart)
minSeqStart = txSeqStart;
if (txSeqEnd > maxSeqEnd)
maxSeqEnd = txSeqEnd;
// If we got a coding gp, but this gp is non-coding, just do its exon variant:
boolean exonOnly = gotCoding && (gp->cdsStart == gp->cdsEnd);
slCat(&bedList, sampleVarBedFromGenePred(gp, assembly, txSeqStart, txSeqEnd, exonOnly));
}
slSort(&bedList, bedCmp);
struct dnaSeq *txSeq = twoBitReadSeqFragLower(assembly->tbf, gpList->chrom,
minSeqStart, maxSeqEnd);
struct bed4 *bed;
for (bed = bedList; bed != NULL; bed = bed->next)
{
writeMinimalVcfRowFromBed(f, bed, txSeq, minSeqStart);
}
dnaSeqFree(&txSeq);
bed4FreeList(&bedList);
}
geneStream->close(&geneStream);
carefulClose(&f);
}
return annoStreamVcfNew(sampleFile, NULL, FALSE, assembly, maxOutRows);
}
static char *md5TrashPath(struct annoAssembly *assembly, char *string)
/* Use the md5sum of a string to make a hopefully unique trash filename. */
{
char *md5sum = md5HexForString(string);
mkdirTrashDirectory(hgvsTrashSubDir);
struct dyString *dy = dyStringCreate("%s/%s/%s_%s.vcf", trashDir(), hgvsTrashSubDir,
assembly->name, md5sum);
return dyStringCannibalize(&dy);
}
static struct slName *hashListNames(struct hash *hash)
/* Return a list of all element names in the hash (if any). */
{
struct slName *list = NULL;
struct hashCookie cookie = hashFirst(hash);
struct hashEl *hel;
while ((hel = hashNext(&cookie)) != NULL)
slAddHead(&list, slNameNew(hel->name));
return list;
}
static char *encloseInAngleBracketsDbSnp(char *stringIn)
/* Return a string that has
, with spaces replaced by '_'s. */
{
int stringInLen = strlen(stringIn);
int stringOutLen = stringInLen + strlen("") + 1;
char *stringOut = needMem(stringOutLen);
safef(stringOut, stringOutLen, "", stringIn);
subChar(stringOut, ' ', '_');
return stringOut;
}
// dbSNP named alleles have many ways to describe a deletion from the reference,
// for example "LARGEDELETION", "LARGE DELETION", "... DELETED", "... DEL":
static const char *dbSnpDelRegex = "^\\(.*(DELET.*| DEL)\\)$";
static char **parseDbSnpAltAlleles(char *refAl, char *obsAls, boolean minusStrand,
int *retAltAlCount, boolean *retNeedLeftBase)
/* Given a non-symbolic reference allele and slash-sep observed alleles from dbSNP,
* return an array of +-strand alleles that are not the same as the reference.
* If any allele is "-" (deleted, zero-length), then set retNeedLeftBase to TRUE
* because in this case VCF requires that the reference base to the left of the indel
* must be added to all alleles, and the start coord also moves one base to the left.
* Also, if any alt allele is symbolic, padding is required.
* Note: this trashes obsAls. Resulting array can be freed but not its contents. */
{
int obsCount = countChars(obsAls, '/') + 1;
char *obsWords[obsCount];
chopByChar(obsAls, '/', obsWords, obsCount);
boolean obsHasDeletion = FALSE;
int i;
for (i = 0; i < obsCount; i++)
if (sameString(obsWords[i], "-"))
{
obsHasDeletion = TRUE;
break;
}
char **altAls;
AllocArray(altAls, obsCount);
int altCount = 0;
boolean needLeftBase = isEmpty(refAl) || sameString(refAl, "-");
for (i = 0; i < obsCount; i++)
{
char *altAl = obsWords[i];
int altAlLen = strlen(altAl);
if (minusStrand && isAllNt(altAl, altAlLen))
reverseComplement(altAl, altAlLen);
if (differentString(altAl, refAl))
{
if (sameString(altAl, "-"))
{
altAls[altCount] = "";
needLeftBase = TRUE;
}
else
{
// It would be nice to expand the "(CA)11/12/14/15/16/17/18/19/20" syntax of
// some dbSNP observed's. What are these?: "(D1S243)", "(D1S2870)"
// Unfortunately for observed="lengthTooLong" we just can't get the correct allele
// sequence. (76,130 of those in snp138)
// Hmmm, I guess we could at least stick in the right number of N's if we can
// parse "(245 BP INSERTION)". (2403 rows rlike "[0-9]+ BP ?INSERTION" in snp138)
if (!isAllNt(altAl, altAlLen))
{
// Symbolic allele: left base required, and enclose it in 's.
// But if it's one of dbSNP's LARGEDELETION kind of alleles, that is redundant
// with the reference allele, so if we know there is already a "-" allele,
// skip it.
if (obsHasDeletion && regexMatch(altAl, dbSnpDelRegex))
continue;
needLeftBase = TRUE;
altAl = encloseInAngleBracketsDbSnp(altAl);
}
altAls[altCount] = altAl;
}
altCount++;
}
}
*retAltAlCount = altCount;
*retNeedLeftBase = needLeftBase;
return altAls;
}
char *firstNCommaSep(struct slName *nameList, int n)
/* Return a comma-separated string with the first n names in nameList. */
{
struct dyString *dy = dyStringNew(0);
int i;
struct slName *el;
for (i=0, el=nameList; i < 5 && el != NULL; i++, el = el->next)
{
if (i > 0)
dyStringAppend(dy, ", ");
dyStringPrintf(dy, "'%s'", el->name);
}
return dyStringCannibalize(&dy);
}
//#*** Variant ID-matching should be metadata-driven too. termRegex -> data source.
static const char *rsIdRegex = "^rs[0-9]+$";
static void rsIdsToVcfRecords(struct annoAssembly *assembly, struct slName *rsIds,
struct vcfFile *vcff, struct vcfRecord **pRecList,
struct slName **pCommentList)
/* If possible, look up coordinates and alleles of dbSNP rs IDs. */
{
if (rsIds == NULL)
return;
struct trackDb *tdb = hFindLatestSnpTrack(database, NULL, &fullTrackList);
if (tdb == NULL)
return;
struct sqlConnection *conn = hAllocConn(assembly->name);
// Build a 'name in (...)' query, and build a hash of IDs so we can test whether all were found
struct dyString *dq = sqlDyStringCreate("select chrom, chromStart, chromEnd, name, strand, "
"refUCSC, observed from %s where name in (",
tdb->table);
struct hash *idHash = hashNew(0);
struct slName *id;
for (id = rsIds; id != NULL; id = id->next)
{
tolowers(id->name);
dyStringPrintf(dq, "%s'%s'", (id != rsIds ? "," : ""), id->name);
hashStoreName(idHash, id->name);
}
dyStringAppend(dq, ");");
struct sqlResult *sr = sqlGetResult(conn, dq->string);
// Construct a minimal VCF row to make a vcfRecord for each variant.
char *vcfRow[9];
vcfRow[5] = vcfRow[6] = vcfRow[7] = "."; // placeholder for qual, filter, info
// It would be cool to someday add snpNNN's exceptions column to the filter or info.
struct dyString *dyAltAlStr = dyStringNew(0);
int i;
char **row;
while ((row = sqlNextRow(sr)) != NULL)
{
char *chrom = row[0];
uint chromStart = atoll(row[1]);
uint chromEnd = atoll(row[2]);
char *name = row[3];
char *strand = row[4];
char refAl[max(strlen(row[5]), chromEnd-chromStart) + 2];
safecpy(refAl, sizeof(refAl), row[5]);
if (sameString(refAl, "-"))
refAl[0] = '\0';
else if (! isAllNt(refAl, strlen(refAl)))
{
// refUCSC is symbolic like "( 2548bp insertion )" -- just use the actual reference
// sequence for now, to keep things simple downstream.
//#*** Need something more efficient, like a sequence cache inside assembly!
struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, chromStart, chromEnd);
safecpy(refAl, sizeof(refAl), seq->dna);
toUpperN(refAl, seq->size);
dnaSeqFree(&seq);
}
char *obsAls = row[6];
int altAlCount = 0;
boolean needLeftBase = FALSE;
char **altAls = parseDbSnpAltAlleles(refAl, obsAls, sameString(strand, "-"),
&altAlCount, &needLeftBase);
needLeftBase |= (chromStart == chromEnd); // should be redundant, but just in case.
hashRemove(idHash, name);
uint vcfStart = chromStart + 1;
dyStringClear(dyAltAlStr);
// If this is a non-symbolic indel, we'll have to move start one base to the left
// and add the reference base to the left of the actual alleles
if (needLeftBase)
{
vcfStart--;
//#*** Need something more efficient, like a sequence cache inside assembly!
struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom,
chromStart-1, chromStart);
toUpperN(seq->dna, 1);
char leftBase = seq->dna[0];
dnaSeqFree(&seq);
char refAlPlus[strlen(refAl)+2];
safef(refAlPlus, sizeof(refAlPlus), "%c%s", leftBase, refAl);
safecpy(refAl, sizeof(refAl), refAlPlus);
for (i = 0; i < altAlCount; i++)
{
if (i > 0)
dyStringAppendC(dyAltAlStr, ',');
if (isAllNt(altAls[i], strlen(altAls[i])))
dyStringPrintf(dyAltAlStr, "%c%s", leftBase, altAls[i]);
else
dyStringAppend(dyAltAlStr, altAls[i]);
}
}
else
{
// Not an indel, just make comma-sep string of alt alleles.
for (i = 0; i < altAlCount; i++)
{
if (i > 0)
dyStringAppendC(dyAltAlStr, ',');
dyStringAppend(dyAltAlStr, altAls[i]);
}
}
char vcfStartStr[64];
safef(vcfStartStr, sizeof(vcfStartStr), "%d", vcfStart);
vcfRow[0] = chrom;
vcfRow[1] = vcfStartStr;
vcfRow[2] = name;
vcfRow[3] = refAl;
vcfRow[4] = dyAltAlStr->string;
struct vcfRecord *rec = vcfRecordFromRow(vcff, vcfRow);
slAddHead(pRecList, rec);
}
dyStringFree(&dyAltAlStr);
// Check for IDs not found
struct slName *notFoundIds = hashListNames(idHash);
if (notFoundIds != NULL)
{
char *namesNotFound = firstNCommaSep(notFoundIds, 5);
struct dyString *dy = dyStringCreate("%d rs# IDs not found, e.g. %s",
slCount(notFoundIds), namesNotFound);
slAddTail(pCommentList, slNameNew(dy->string));
freeMem(namesNotFound);
dyStringFree(&dy);
}
slNameFreeList(¬FoundIds);
}
static struct vcfRecord *parseVariantIds(struct annoAssembly *assembly, char *variantIdText,
struct slName **pCommentList)
/* Return a sorted list of minimal vcfRecords (coords, name, ref and alt alleles)
* corresponding to each pasted variant. */
{
struct vcfRecord *recList = NULL;
char *p = cloneString(variantIdText), *id;
struct slName *rsIds = NULL, *unknownIds = NULL;
subChar(p, ',', ' ');
while ((id = nextWord(&p)) != NULL)
{
if (regexMatchNoCase(id, rsIdRegex))
slNameAddHead(&rsIds, id);
else
slNameAddHead(&unknownIds, id);
}
if (unknownIds != NULL)
{
slReverse(&unknownIds);
char *firstUnknownIds = firstNCommaSep(unknownIds, 5);
struct dyString *dy = dyStringCreate("%d variant identifiers are unrecognized, e.g. %s",
slCount(unknownIds), firstUnknownIds);
slAddTail(pCommentList, slNameNew(dy->string));
freeMem(firstUnknownIds);
dyStringFree(&dy);
}
struct vcfFile *vcff = vcfFileNew();
rsIdsToVcfRecords(assembly, rsIds, vcff, &recList, pCommentList);
slSort(&recList, vcfRecordCmp);
slNameFreeList(&rsIds);
slNameFreeList(&unknownIds);
return recList;
}
static void adjustRangeForVariants(struct vcfRecord *recList, char **pChrom,
uint *pStart, uint *pEnd)
/* Given a *sorted* list of VCF records, look at the first and last records and if
* they fall outside of the range, expand the range to include them. */
{
if (pChrom != NULL && *pChrom != NULL && recList != NULL)
{
// We have a non-empty sorted list of records, and search is not genome-wide;
// look at first and last variants to see if they're not already included in search range.
struct vcfRecord *first = recList, *last = recList;
while (last->next)
last = last->next;
if (differentString(*pChrom, first->chrom) || differentString(*pChrom, last->chrom))
{
// Variants span multiple chroms; force genome-wide search.
*pChrom = NULL;
*pStart = *pEnd = 0;
}
else
{
// Variant(s) are on the same chrom as search range; check starts and ends.
if (*pStart > first->chromEnd)
*pStart = first->chromEnd;
if (*pEnd < last->chromStart)
*pEnd = last->chromStart;
}
}
}
static struct annoStreamer *makeVariantIdStreamer(struct annoAssembly *assembly, int maxOutRows,
char **pChrom, uint *pStart, uint *pEnd,
struct slName **pCommentList)
/* Translate user's pasted/uploaded variant IDs into minimal VCF if possible.
* Return a VCF streamer for those, and if the current search position is too narrow
* to include all of the variants, widen it as necessary. */
{
// Hash variant text to get trash filename. Use if exists, otherwise build it.
char *variantIds = cartString(cart, hgvaVariantIds);
char *varFile = md5TrashPath(assembly, variantIds);
boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildVariantIds", FALSE);
if (! fileExists(varFile) || forceRebuild)
{
struct vcfRecord *varList = parseVariantIds(assembly, variantIds, pCommentList);
//#*** If no variants were recognized, we should probably show main page with a warning.
adjustRangeForVariants(varList, pChrom, pStart, pEnd);
FILE *f = mustOpen(varFile, "w");
writeMinimalVcfHeader(f, assembly->name, NULL);
struct vcfRecord *var;
for (var = varList; var != NULL; var = var->next)
writeMinimalVcfRow(f, var);
carefulClose(&f);
}
return annoStreamVcfNew(varFile, NULL, FALSE, assembly, maxOutRows);
}
static struct vcfRecord *parseHgvs(struct vcfFile *vcff, struct annoAssembly *assembly,
char *hgvsTerms, struct slName **pCommentList)
/* Return a sorted list of vcfRecords . */
{
struct vcfRecord *recList = NULL;
struct slName *failedTerms = NULL;
struct dyString *dyError = dyStringNew(0);
struct lineFile *lf = lineFileOnString("user-provided HGVS terms", TRUE, cloneString(hgvsTerms));
char *term = NULL;
while (lineFileNextReal(lf, &term))
{
eraseTrailingSpaces(term);
dyStringClear(dyError);
struct vcfRow *vcfRow = hgvsToVcfRow(assembly->name, term, FALSE, dyError);
if (vcfRow)
{
char *row[8];
row[0] = vcfRow->chrom;
char posStr[64];
safef(posStr, sizeof(posStr), "%d", vcfRow->pos);
row[1] = posStr;
row[2] = vcfRow->id;
row[3] = vcfRow->ref;
row[4] = vcfRow->alt;
row[5] = ".";
row[6] = vcfRow->filter;
row[7] = vcfRow->info;
slAddHead(&recList, vcfRecordFromRow(vcff, row));
}
else
slNameAddHead(&failedTerms, dyStringContents(dyError));
}
if (failedTerms != NULL)
{
slReverse(&failedTerms);
char *firstUnknownIds = firstNCommaSep(failedTerms, 5);
struct dyString *dy = dyStringCreate("%d HGVS terms could not be parsed and/or mapped to %s, "
"e.g. %s",
slCount(failedTerms), assembly->name, firstUnknownIds);
slAddTail(pCommentList, slNameNew(dy->string));
freeMem(firstUnknownIds);
dyStringFree(&dy);
}
slSort(&recList, vcfRecordCmp);
slNameFreeList(&failedTerms);
dyStringFree(&dyError);
lineFileClose(&lf);
return recList;
}
static struct annoStreamer *makeHgvsStreamer(struct annoAssembly *assembly, int maxOutRows,
char **pChrom, uint *pStart, uint *pEnd,
struct slName **pCommentList)
/* Translate user's pasted/uploaded variant IDs into minimal VCF if possible.
* Return a VCF streamer for those., and if the current search position is too narrow
* to include all of the variants, widen it as necessary. */
{
// Hash HGVS input to get trash filename. Use if exists, otherwise build it.
char *hgvsTerms = cartString(cart, hgvaHgvs);
char *varFile = md5TrashPath(assembly, hgvsTerms);
boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildHgvs", FALSE);
if (! fileExists(varFile) || forceRebuild)
{
char *headerString = makeMinimalVcfHeader(assembly->name, HGVS_VCF_HEADER_DEFS);
struct vcfFile *vcff = vcfFileFromHeader("hgVaiHgvs", headerString, VCF_IGNORE_ERRS);
struct vcfRecord *varList = parseHgvs(vcff, assembly, hgvsTerms, pCommentList);
//#*** If no HGVS terms could be mapped, we should probably show main page with a warning.
adjustRangeForVariants(varList, pChrom, pStart, pEnd);
FILE *f = mustOpen(varFile, "w");
writeMinimalVcfHeader(f, assembly->name, HGVS_VCF_HEADER_DEFS);
struct vcfRecord *var;
for (var = varList; var != NULL; var = var->next)
writeMinimalVcfRow(f, var);
carefulClose(&f);
vcfFileFree(&vcff);
}
return annoStreamVcfNew(varFile, NULL, FALSE, assembly, maxOutRows);
}
static struct trackDb *getVariantTrackDb(char *variantTrack)
/* Get a tdb for the user's selected variant track, or warn and return NULL
* (main page will be displayed) */
{
struct trackDb *varTdb = tdbForTrack(database, variantTrack, &fullTrackList);
if (varTdb == NULL)
{
if (isHubTrack(variantTrack))
warn("Can't find hub track '%s'", variantTrack);
else
warn("Can't find tdb for variant track '%s'", variantTrack);
}
else
checkVariantTrack(varTdb);
return varTdb;
}
static char *gencodeVersionFromTrack(char *track)
/* If track is a GENCODE table, find and return a pointer to the version at the end;
* otherwise return NULL. */
{
if (startsWithGencodeGene(track))
{
char *v = strrchr(track, 'V');
return v;
}
return NULL;
}
static char *gencodeTagTableForTrack(char *db, char *track)
/* If there is a wgEncodeGencodeTag table that can be associated with track,
* return it; otherwise return NULL. */
{
struct slName *versionList = getGencodeTagVersions();
if (startsWithGencodeGene(track))
{
char *version = gencodeVersionFromTrack(track);
if (version != NULL)
{
char table[PATH_LEN];
return cloneString(gencodeTableName("Tag", version, table, sizeof(table)));
}
}
else if (sameString(track, "refGene") && refGeneHasGencodeTags(versionList))
{
char *version = getLatestGencodeVersion(versionList);
char table[PATH_LEN];
if (hTableExists(db, gencodeTableName("RefSeq", version, table, sizeof(table))))
return cloneString(gencodeTableName("Tag", version, table, sizeof(table)));
}
else if (sameString(track, "knownGene") && knownGeneHasGencodeTags())
{
if (hTableExists(db, "knownToTag"))
return cloneString("knownToTag");
}
return NULL;
}
static struct joinerDtf *getTxStatusExtras(char *db, char *track)
// Several kinds of transcript status may be enabled in the cart; if any are enabled,
// and if they apply to track, return the tables & fields to be joined with the track.
{
struct joinerDtf *txStatusExtras = NULL;
if (cartUsualBoolean(cart, "hgva_txStatus_gencode", FALSE))
{
char *gencodeTagTable = gencodeTagTableForTrack(db, track);
if (gencodeTagTable != NULL)
{
char *field = "tag";
if (sameString("knownToTag", gencodeTagTable))
field = "value";
slAddHead(&txStatusExtras, joinerDtfNew(db, gencodeTagTable, field));
}
}
if (cartUsualBoolean(cart, "hgva_txStatus_knownCanonical", FALSE) &&
sameString(track, "knownGene") &&
hTableExists(db, "knownCanonical"))
{
slAddHead(&txStatusExtras, joinerDtfNew(db, "knownCanonical", "transcript"));
}
if (cartUsualBoolean(cart, "hgva_txStatus_refSeqStatus", FALSE) &&
sameString(track, "refGene") &&
genbankTableExists(db, refSeqStatusTable))
{
char *genbankDb=NULL, *tableName=NULL;
genbankGetDbTable(db, refSeqStatusTable, &genbankDb, &tableName);
slAddHead(&txStatusExtras, joinerDtfNew(genbankDb, tableName, "status"));
}
return txStatusExtras;
}
static void configAddDtf(struct dyString *dy, struct joinerDtf *dtf, boolean *pIsFirst)
/* Add a JSON object with [db].table and (list of one) field. */
{
if (! *pIsFirst)
dyStringAppend(dy, ", ");
// The convention for related tables in same db as track is to prepend "." instead of ".":
char *db = (sameString(dtf->database, database) ? "" : dtf->database);
dyStringPrintf(dy, "{ \"table\": \"%s.%s\", \"fields\": [\"%s\"] }", db, dtf->table, dtf->field);
*pIsFirst = FALSE;
}
static void configAddTableField(struct dyString *dy, char *table, char *field, boolean *pIsFirst)
/* Add a JSON object with current database, table and (list of one) field. */
{
struct joinerDtf dtf;
dtf.database = database;
dtf.table = table;
dtf.field = field;
configAddDtf(dy, &dtf, pIsFirst);
}
static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb,
struct joinerDtf *txStatusExtras)
/* Add VAI-specific config options, if applicable. */
{
struct jsonElement *config = NULL;
char *track = tdb->track;
struct dyString *dyConfig = dyStringCreate("{ \"naForMissing\": false,"
" \"relatedTables\": [ ");
boolean isFirst = TRUE;
// If track is sql-based knownGene and we have kgXref, then add kgXref.geneSymbol after
// the columns of knownGene.
if (sameString(track, "knownGene") &&
!isCustomTrack(track) && !isHubTrack(track) &&
!trackDbSetting(tdb, "bigDataUrl") &&
hTableExists(db, "kgXref"))
{
configAddTableField(dyConfig, "kgXref", "geneSymbol", &isFirst);
}
struct joinerDtf *txStatDtf;
for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next)
configAddDtf(dyConfig, txStatDtf, &isFirst);
// If any of the above apply, close the relatedTables list and config object
// and parse into jsonElements.
if (! isFirst)
{
dyStringAppend(dyConfig, " ] }");
config = jsonParse(dyConfig->string);
dyStringFree(&dyConfig);
}
return config;
}
static void adjustGpVarOverlapRule(struct annoGrator *gpVarGrator, boolean haveRegulatory)
/* If we're able to detect regulatory elements, and want to keep those annotations, loosen up
* gpVarGrator's overlap rule from the default (must overlap). */
{
if (haveRegulatory && cartUsualBoolean(cart, "hgva_include_regulatory", TRUE))
gpVarGrator->setOverlapRule(gpVarGrator, agoNoConstraint);
}
static void addTxStatusExtras(struct annoFormatter *vepOut, char *geneTrack,
struct annoGrator *gpVarGrator,
struct joinerDtf *txStatusExtras)
/* Given a list of tables and fields that will be joined with geneTrack to provide transcript
* status info, configure vepOut to put them in the EXTRAs column. */
{
struct joinerDtf *txStatDtf;
for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next)
{
char *tag = NULL, *description = NULL;
boolean isBoolean = FALSE;
// Double-check that we're not joining in some table from a different assembly database:
if (differentString(txStatDtf->database, database) &&
differentString(txStatDtf->database, "hgFixed"))
errAbort("addTxStatusExtras: Expected db=%s or hgFixed in txStatDtf but got %s",
database, txStatDtf->database);
if ((startsWith(GENCODE_PREFIX"Tag", txStatDtf->table) &&
sameString(txStatDtf->field, "tag")) ||
(sameString(txStatDtf->table, "knownToTag") &&
sameString(txStatDtf->field, "value")))
{
tag = "GENCODE_TAG";
description = "GENCODE tags for the transcript";
}
else if (sameString(txStatDtf->table, "knownCanonical") &&
sameString(txStatDtf->field, "transcript"))
{
tag = "CANONICAL";
description = "If present, the transcript is the 'canonical' transcript of the gene "
"(generally the longest isoform of the gene)";
isBoolean = TRUE;
}
else if (sameString(txStatDtf->table, "refSeqStatus") &&
sameString(txStatDtf->field, "status"))
{
tag = "REFSEQ_STATUS";
description = "RefSeq status of the transcript";
}
else
{
errAbort("addTxStatusExtras: Unrecognized {table,field}: {%s,%s}",
txStatDtf->table, txStatDtf->field);
}
char *column = annoStreamDbColumnNameFromDtf(database, geneTrack, txStatDtf);
annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)gpVarGrator,
tag, description, column, isBoolean);
}
}
void doQuery()
/* Translate simple form inputs into anno* components and execute query. */
{
dyInfo = dyStringNew(0);
char *chrom = NULL;
uint start = 0, end = 0;
if (sameString(regionType, hgvaRegionTypeRange))
getCartPosOrDie(&chrom, &start, &end);
struct annoAssembly *assembly = hAnnoGetAssembly(database);
boolean isCommandLine = (cgiOptionalString("cgiSpoof") != NULL);
char *geneTrack = cartString(cart, "hgva_geneTrack");
struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList);
if (geneTdb == NULL)
{
warn("Can't find tdb for gene track %s", geneTrack);
if (! isCommandLine)
doUi();
return;
}
int maxVarRows = cartUsualInt(cart, "hgva_variantLimit", 10);
struct annoStreamer *primary = NULL;
char *primaryLongLabel = NULL;
char *variantTrack = cartString(cart, "hgva_variantTrack");
struct slName *commentList = NULL;
if (sameString(variantTrack, hgvaSampleVariants))
{
primary = makeSampleVariantsStreamer(assembly, geneTdb, maxVarRows);
primaryLongLabel = hgvaSampleVariantsLabel;
// Sample variants can't always be made within the currently selected position range,
// so just for these, force search to be genome-wide.
chrom = NULL;
start = 0;
end = 0;
}
else if (sameString(variantTrack, hgvaUseVariantIds))
{
// Override search position if cart position doesn't include all variants:
primary = makeVariantIdStreamer(assembly, maxVarRows, &chrom, &start, &end, &commentList);
primaryLongLabel = hgvaVariantIdsLabel;
}
else if (sameString(variantTrack, hgvaUseHgvs))
{
primary = makeHgvsStreamer(assembly, maxVarRows, &chrom, &start, &end, &commentList);
primaryLongLabel = hgvaHgvsLabel;
}
else if (sameString(variantTrack, hgvaUseVariantFileOrUrl))
{
char *fileOrUrl = cartString(cart, hgvaVariantFileOrUrl);
char *type = cartOptionalString(cart, hgvaVariantFileOrUrlType);
primary = hAnnoStreamerFromBigFileUrl(fileOrUrl, NULL, assembly, maxVarRows, type);
primaryLongLabel = hgvaVariantFileOrUrlLabel;
}
else
{
struct trackDb *varTdb = getVariantTrackDb(variantTrack);
if (varTdb == NULL)
{
if (! isCommandLine)
doUi();
return;
}
primary = hAnnoStreamerFromTrackDb(assembly, varTdb->table, varTdb, chrom, maxVarRows, NULL);
primaryLongLabel = varTdb->longLabel;
}
enum annoGratorOverlap geneOverlapRule = agoMustOverlap;
struct joinerDtf *txStatusExtras = getTxStatusExtras(database, geneTrack);
struct jsonElement *gpConfig = configForStreamer(database, geneTdb, txStatusExtras);
struct annoGrator *gpVarGrator = hAnnoGratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom,
ANNO_NO_LIMIT, primary->asObj,
geneOverlapRule, gpConfig);
setGpVarFuncFilter(gpVarGrator);
setHgvsOutOptions(gpVarGrator, geneTdb->track);
// Some grators may be used as both filters and output values. To avoid making
// multiple grators for the same source, hash them by trackName:
struct hash *gratorsByName = hashNew(8);
struct annoGrator *snpGrator = NULL;
char *snpDesc = NULL;
if (cartUsualBoolean(cart, "hgva_rsId", TRUE))
snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc);
// Now construct gratorList in the order in which annoFormatVep wants to see them,
// i.e. first the gpVar, then the snpNNN, then whatever else:
struct annoGrator *gratorList = NULL;
slAddHead(&gratorList, gpVarGrator);
if (snpGrator != NULL)
slAddHead(&gratorList, snpGrator);
// Text or HTML output?
char *outFormat = cartUsualString(cart, "hgva_outFormat", "vepTab");
boolean doHtml = sameString(outFormat, "vepHtml");
// Initialize VEP formatter:
struct annoFormatter *vepOut = annoFormatVepNew("stdout", doHtml,
primary, primaryLongLabel,
(struct annoStreamer *)gpVarGrator,
geneTdb->longLabel,
(struct annoStreamer *)snpGrator,
snpDesc, assembly);
addTxStatusExtras(vepOut, geneTrack, gpVarGrator, txStatusExtras);
boolean haveRegulatory = FALSE;
addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom, doHtml, &haveRegulatory);
adjustGpVarOverlapRule(gpVarGrator, haveRegulatory);
addFilterTracks(&gratorList, gratorsByName, assembly, chrom);
slReverse(&gratorList);
if (doHtml)
{
webStart(cart, database, "Annotated Variants in VEP/HTML format");
}
else if (! isCommandLine)
{
// Undo the htmlPushEarlyHandlers() because after this point they make ugly text:
popWarnHandler();
popAbortHandler();
textOpen();
webStartText();
}
if (isNotEmpty(dyInfo->string))
puts(dyInfo->string);
struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
struct slName *comment;
for (comment = commentList; comment != NULL; comment = comment->next)
vepOut->comment(vepOut, comment->name);
if (chrom != NULL)
annoGratorQuerySetRegion(query, chrom, start, end);
annoGratorQueryExecute(query);
annoGratorQueryFree(&query);
if (doHtml)
webEnd();
else if (! isCommandLine)
textOutClose(&compressPipeline, NULL);
}
int main(int argc, char *argv[])
/* Process command line. */
{
long enteredMainTime = clock1000();
if (hIsPrivateHost())
pushCarefulMemHandler(LIMIT_2or6GB);
cgiSpoof(&argc, argv);
boolean isCommandLine = (cgiOptionalString("cgiSpoof") != NULL);
if (!isCommandLine)
htmlPushEarlyHandlers(); /* Make errors legible during initialization. */
oldVars = hashNew(10);
boolean startQuery = (cgiUsualString("hgva_startQuery", NULL) != NULL);
if (startQuery)
{
if (isCommandLine)
// No HTTP header for command-line use.
cart = cartForSession(hUserCookie(), excludeVars, oldVars);
else
cart = cartAndCookieNoContent(hUserCookie(), excludeVars, oldVars);
}
else
cart = cartAndCookie(hUserCookie(), excludeVars, oldVars);
// Try to deal with virt chrom position used by hgTracks.
if (startsWith("virt:", cartUsualString(cart, "position", "")))
cartSetString(cart, "position", cartUsualString(cart, "nonVirtPosition", ""));
/* Set up global variables. */
getDbAndGenome(cart, &database, &genome, oldVars);
initGenbankTableNames(database);
regionType = cartUsualString(cart, hgvaRegionType, hgvaRegionTypeGenome);
if (isEmpty(cartOptionalString(cart, hgvaRange)))
cartSetString(cart, hgvaRange, hDefaultPos(database));
int timeout = cartUsualInt(cart, "udcTimeout", 300);
if (udcCacheTimeout() < timeout)
udcSetCacheTimeout(timeout);
knetUdcInstall();
cartTrackDbInit(cart, &fullTrackList, &fullGroupList, TRUE);
if (lookupPosition(cart, hgvaRange))
{
if (startQuery)
doQuery();
else if (! isCommandLine)
doUi();
}
else
{
// Revert to lastPosition if we have multiple matches or warnings,
// especially in case user manually edits browser location as in #13009:
char *position = cartUsualString(cart, "lastPosition", hDefaultPos(database));
cartSetString(cart, hgvaRange, position);
if (webGotWarnings())
{
// We land here when lookupPosition pops up a warning box.
// Reset the problematic position and show the main page.
doMainPage();
}
// If lookupPosition returned FALSE and didn't report warnings,
// then it wrote HTML showing multiple position matches & links.
}
cartCheckout(&cart);
if (! isCommandLine)
cgiExitTime("hgVai", enteredMainTime);
return 0;
}