44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c index 337ac33..7d18a19 100644 --- src/hg/hgVai/hgVai.c +++ src/hg/hgVai/hgVai.c @@ -1,3096 +1,3099 @@ /* hgVai - Variant Annotation Integrator. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "jksql.h" #include "htmshell.h" #include "web.h" #include "cheapcgi.h" #include "cart.h" #include "cartTrackDb.h" #include "genbank.h" #include "hgConfig.h" #include "hgHgvs.h" #include "hui.h" #include "grp.h" #include "hCommon.h" #include "hgFind.h" #include "hPrint.h" #include "jsHelper.h" #include "memalloc.h" #include "textOut.h" #include "trackHub.h" #include "hubConnect.h" #include "twoBit.h" #include "gpFx.h" #include "bigGenePred.h" #include "udc.h" #include "knetUdc.h" #include "md5.h" #include "regexHelper.h" #include "hAnno.h" #include "annoGratorQuery.h" #include "annoGratorGpVar.h" #include "annoFormatVep.h" #include "annoStreamBigBed.h" #include "annoStreamDb.h" #include "annoStreamVcf.h" #include "windowsToAscii.h" #include "obscure.h" #include "chromAlias.h" #include "libifyMe.h" #define GENCODE_TAG_DOC_URL "\"http://www.gencodegenes.org/gencode_tags.html\"" #define GENCODE_BASIC_DOC_URL "\"http://www.gencodegenes.org/faq.html\"" #define REFSEQ_STATUS_DOC_URL "\"https://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_status_codes\"" #define APPRIS_DOC_URL "\"http://appris.bioinfo.cnio.es/#/help/database\"" #define HGVS_MUST_USE_ACC "Note: HGVS terms must use versioned transcript or genomic accessions " \ "(e.g. NM_000023.3, NC_000012.11, ENST00000000233.9), not gene symbols." /* Global Variables */ struct cart *cart; /* CGI and other variables */ struct hash *oldVars = NULL; /* The cart before new cgi stuff added. */ char *genome = NULL; /* Name of genome - mouse, human, etc. */ char *database = NULL; /* Current genome database - hg17, mm5, etc. */ char *regionType = NULL; /* genome, ENCODE pilot regions, or specific position range. */ struct grp *fullGroupList = NULL; /* List of all groups. */ struct trackDb *fullTrackList = NULL; /* List of all tracks in database. */ static struct pipeline *compressPipeline = (struct pipeline *)NULL; // Null terminated list of CGI Variables we don't want to save permanently: char *excludeVars[] = {"Submit", "submit", "hgva_startQuery", NULL,}; #define hgvaRange "position" #define hgvaRegionType "hgva_regionType" #define hgvaRegionTypeEncode "encode" #define hgvaRegionTypeGenome "genome" #define hgvaRegionTypeRange "range" #define hgvaPositionContainer "positionContainer" #define hgvsTrashSubDir "hgv" // auto-generated variants as input #define hgvaSampleVariants "hgva_internally_generated_sample_variants" #define hgvaSampleVariantsLabel "Artificial Example Variants" // dbSNP rsIDs as variant inputs: #define hgvaUseVariantIds "hgva_useVariantIds" #define hgvaVariantIdsLabel "Variant Identifiers" #define hgvaVariantIds "hgva_variantIds" #define hgvaUseVariantFileOrUrl "hgva_useVariantFileOrUrl" #define hgvaVariantPasteContainer "variantPasteContainer" // A local file or URL as variant input: #define hgvaVariantFileOrUrlLabel "from file or URL" #define hgvaVariantFileOrUrl "hgva_variantFileOrUrl" #define hgvaVariantFileOrUrlType "hgva_variantFileOrUrlType" // HGVS terms as variant inputs: #define hgvaUseHgvs "hgva_useHgvs" #define hgvaHgvsLabel "HGVS terms" #define hgvaHgvs "hgva_hgvs" #define hgvaHgvsPasteContainer "hgvsPasteContainer" void addSomeCss() /*#*** This should go in a .css file of course. */ { printf("<style>\n" "div.sectionLite { border-width: 1px; border-color: #"HG_COL_BORDER"; border-style: solid;" " background-color: #"HG_COL_INSIDE"; padding-left: 10px; padding-right: 10px; " " padding-top: 8px; padding-bottom: 5px; margin-top: 5px; margin-bottom: 5px }\n" ".sectionLiteHeader { font-weight: bold; font-size:larger; color:#000000;" " text-align:left; vertical-align:bottom; white-space:nowrap; }\n" "div.sectionLiteHeader.noReorderRemove { padding-bottom:5px; }\n" "div.sourceFilter { padding-top: 5px; padding-bottom: 5px }\n" "</style>\n"); } INLINE void startCollapsibleSection(char *sectionSuffix, char *title, boolean onByDefault) // Wrap shared args to jsBeginCollapsibleSectionFontSize { jsBeginCollapsibleSectionFontSize(cart, "hgva", sectionSuffix, title, onByDefault, "1.1em"); } INLINE void startSmallCollapsibleSection(char *sectionSuffix, char *title, boolean onByDefault) // Wrap shared args to jsBeginCollapsibleSectionFontSize { jsBeginCollapsibleSectionFontSize(cart, "hgva", sectionSuffix, title, onByDefault, "0.9em"); } #define endCollapsibleSection jsEndCollapsibleSection static struct dyString *onChangeStart() /* Start up a javascript onChange command */ { struct dyString *dy = jsOnChangeStart(); jsTextCarryOver(dy, hgvaRegionType); jsTextCarryOver(dy, hgvaRange); return dy; } static char *onChangeClade() /* Return javascript executed when they change clade. */ { struct dyString *dy = onChangeStart(); jsDropDownCarryOver(dy, "clade"); dyStringAppend(dy, " document.hiddenForm.org.value=0;"); dyStringAppend(dy, " document.hiddenForm.db.value=0;"); dyStringAppend(dy, " document.hiddenForm." hgvaRange ".value='';"); return jsOnChangeEnd(&dy); } static char *onChangeOrg() /* Return javascript executed when they change organism. */ { struct dyString *dy = onChangeStart(); jsDropDownCarryOver(dy, "clade"); jsDropDownCarryOver(dy, "org"); dyStringAppend(dy, " document.hiddenForm.db.value=0;"); dyStringAppend(dy, " document.hiddenForm." hgvaRange ".value='';"); return jsOnChangeEnd(&dy); } static char *onChangeDb() /* Return javascript executed when they change database. */ { struct dyString *dy = onChangeStart(); jsDropDownCarryOver(dy, "clade"); jsDropDownCarryOver(dy, "db"); dyStringAppend(dy, " document.hiddenForm." hgvaRange ".value='';"); return jsOnChangeEnd(&dy); } INLINE void printOption(char *val, char *selectedVal, char *label) /* For rolling our own select without having to build conditional arrays/lists. */ { printf("<OPTION VALUE='%s'%s>%s\n", val, (sameString(selectedVal, val) ? " SELECTED" : ""), label); } void printRegionListHtml(char *db) /* Make a dropdown choice of region type, with position input box that appears if needed. * Unlike hgTables, don't bother with ENCODE pilot regions -- unless someone misses it. * Return the selected region type. */ { printf("<SELECT ID='"hgvaRegionType"' NAME='"hgvaRegionType"'>\n"); jsOnEventById("change", hgvaRegionType, "hgva.changeRegion();"); printOption(hgvaRegionTypeGenome, regionType, "genome"); printOption(hgvaRegionTypeRange, regionType, "position or search term"); printf("</SELECT>"); } void topLabelSpansStart(char *label) { printf("<span style='display: inline-block; padding-right: 5px;'>" "<span style='display: block;'>%s</span>\n" "<span style='display: block; padding-bottom: 5px;'>\n", label); } void topLabelSpansEnd() { printf("</span></span>"); } char *makePositionInput() /* Return HTML for the position input. */ { struct dyString *dy = dyStringCreate("<INPUT TYPE=TEXT NAME=\"%s\" SIZE=%d VALUE=\"%s\">", hgvaRange, 45, addCommasToPos(NULL, cartString(cart, hgvaRange))); return dyStringCannibalize(&dy); } void printCtAndHubButtons() /* Print a div with buttons for hgCustom and hgHubConnect */ { boolean hasCustomTracks = customTracksExist(cart, NULL); puts("<div style='padding-top: 5px; padding-bottom: 5px'>"); hOnClickButton("prtCtHub_CtBut", "document.customTrackForm.submit(); return false;", hasCustomTracks ? CT_MANAGE_BUTTON_LABEL : CT_ADD_BUTTON_LABEL); printf(" "); if (hubConnectTableExists()) hOnClickButton("prtCtHub_TrkHub", "document.trackHubForm.submit(); return false;", "track hubs"); nbSpaces(3); printf("To reset <B>all</B> user cart settings (including custom tracks), \n" "<A HREF=\"cartReset?destination=%s\">click here</A>.\n", cgiScriptName()); puts("</div>"); } void hgGatewayCladeGenomeDb() /* Make a row of labels and row of buttons like hgGateway, but not using tables. */ { boolean gotClade = hGotClade(); if (gotClade) { topLabelSpansStart("clade"); printCladeListHtml(genome, "change", onChangeClade()); topLabelSpansEnd(); } topLabelSpansStart("genome"); if (gotClade) printGenomeListForCladeHtml(database, "change", onChangeOrg()); else printGenomeListHtml(database, "change", onChangeOrg()); topLabelSpansEnd(); topLabelSpansStart("assembly"); printAssemblyListHtml(database, "change", onChangeDb()); topLabelSpansEnd(); puts("<BR>"); topLabelSpansStart("region to annotate"); printRegionListHtml(database); topLabelSpansEnd(); topLabelSpansStart(""); // Yet another span, for hiding/showing position input and lookup button: printf("<span id='"hgvaPositionContainer"'%s>\n", differentString(regionType, hgvaRegionTypeRange) ? " style='display: none;'" : ""); puts(makePositionInput()); printf("</span>\n"); topLabelSpansEnd(); puts("<BR>"); } void printAssemblySection() /* Print assembly-selection stuff. * Redrawing the whole page when the assembly changes seems fine to me. */ { //#*** ---------- More copied verbatim, from hgTables/mainPage.c: ----------- /* Hidden form - for benefit of javascript. */ { static char *saveVars[] = { "clade", "org", "db", hgvaRange, hgvaRegionType }; jsCreateHiddenForm(cart, cgiScriptName(), saveVars, ArraySize(saveVars)); } /* Hidden form for jumping to custom tracks CGI. */ printf("<FORM ACTION='%s' NAME='customTrackForm' ID='customTrackForm'>", hgCustomName()); cartSaveSession(cart); printf("</FORM>\n"); /* Hidden form for jumping to track hub manager CGI. */ printf("<FORM ACTION='%s' NAME='trackHubForm'>", hgHubConnectName()); //#*** well, almost verbatim... lib version should use cgiScriptName. cartSaveSession(cart); printf("</FORM>\n"); printf("<FORM ACTION=\"%s\" NAME=\"mainForm\" ID=\"mainForm\" METHOD=%s>\n", cgiScriptName(), cartUsualString(cart, "formMethod", "POST")); cartSaveSession(cart); //#*** ------------------ end verbatim --------------- printf("<div class='sectionLiteHeader noReorderRemove'>" "Select Genome Assembly and Region</div>\n"); /* Print clade, genome and assembly line. */ hgGatewayCladeGenomeDb(); //#*** No longer ending form here... } void factorSourceListInputProperties(struct trackDb *tdb, struct slName **retFactorList, struct slName **retCellTypeList, struct slName **retTreatmentList) /* Get a list of factor names used in factorSource track. */ { char *inputsTable = trackDbSetting(tdb, "inputTrackTable"); if (isEmpty(inputsTable)) errAbort("track %s does not have an inputTrackTable setting", tdb->track); char query[2048]; struct sqlConnection *conn = hAllocConn(database); if (retFactorList && retCellTypeList && retTreatmentList) { sqlSafef(query, sizeof(query), "select distinct(factor) from %s order by factor", inputsTable); *retFactorList = sqlQuickList(conn, query); sqlSafef(query, sizeof(query), "select distinct(cellType) from %s order by cellType", inputsTable); *retCellTypeList = sqlQuickList(conn, query); sqlSafef(query, sizeof(query), "select distinct(treatment) from %s order by treatment", inputsTable); *retTreatmentList = sqlQuickList(conn, query); } else errAbort("factorSourceListInputProperties: ret args must be non-NULL."); hFreeConn(&conn); } void printMultiselect(char *track, char *label, char *var, struct slName *optionList) /* Print a label and multi-select, with hgva_track_var as cart var, with options in optionList, * marking as selected any options stored in cart. */ { printf("%s: ", label); char cartVar[1024]; safef(cartVar, sizeof(cartVar), "hgva_filter_%s_%s", track, var); struct hash *selHash = NULL; if (cartListVarExists(cart, cartVar)) selHash = hashFromSlNameList(cartOptionalSlNameList(cart, cartVar)); printf("<select id=\"%s\" name=\"%s\" style=\"display: none; font-size:.9em;\" " "class=\"filterBy\" multiple>\n", cartVar, cartVar); char *selected = ""; if (!selHash || hashLookup(selHash, "All")) selected = " selected"; printf("<option%s>All</option>", selected); struct slName *option; for (option = optionList; option != NULL; option = option->next) { selected = ""; if (selHash && hashLookup(selHash, option->name)) selected = " selected"; printf("<option%s>%s</option>", selected, option->name); } printf("</select>\n"); char shadowVar[1024]; safef(shadowVar, sizeof(shadowVar), "%s%s", cgiMultListShadowPrefix(), cartVar); cgiMakeHiddenVar(shadowVar, "1"); jsInlineF("$(document).ready(function(){ ddcl.setup($('#%s')[0]); });\n", cartVar); } void printFilterOptions(struct trackDb *tdb) /* Print a collapsible filter section for tdb, with controls depending on tdb->type. */ { char sectionName[512], cartVar[512]; safef(sectionName, sizeof(sectionName), "%s_filter", tdb->track); if (sameString(tdb->type, "factorSource")) { puts("<TABLE>"); startSmallCollapsibleSection(sectionName, "filter items", FALSE); struct slName *factorOptions = NULL, *cellTypeOptions = NULL, *treatmentOptions = NULL; factorSourceListInputProperties(tdb, &factorOptions, &cellTypeOptions, &treatmentOptions); printMultiselect(tdb->track, "factor", "name", factorOptions); printMultiselect(tdb->track, "cell type", "cellType", cellTypeOptions); printMultiselect(tdb->track, "treatment", "treatment", treatmentOptions); puts("<BR>"); puts("minimum peak score [0-1000]: "); safef(cartVar, sizeof(cartVar), "hgva_filter_%s_score", tdb->track); char *defaultScore = cartUsualString(cart, cartVar, "0"); printf("<input type=text name=\"%s\" size=12 value=\"%s\"><BR>", cartVar, defaultScore); // The dimensions of ui-dropdownchecklist multiselects are not correct when // the item is hidden. So, when this filter section is made visible, reinit them. jsInlineF( "$(function(){\n" "$('tr[id^=\"%s-\"]').bind('show',\n" " function(jqev) { \n" " var $multisels = $(jqev.target).find('.filterBy');\n" " var multiselElList = $multisels.each(function(ix, el){ return el; });\n" " ddcl.reinit(multiselElList);" " });\n" "});\n" , sectionName); puts("</TABLE>"); endCollapsibleSection(); } if (startsWith("bed 5", tdb->type)) //#*** TODO: detect bed# properly { puts("<TABLE>"); startSmallCollapsibleSection(sectionName, "filter items", FALSE); //#*** Also watch out for noScoreFilter or whatever it's called puts("minimum peak score [0-1000]: "); safef(cartVar, sizeof(cartVar), "hgva_filter_%s_score", tdb->track); char *defaultScore = cartUsualString(cart, cartVar, "0"); printf("<input type=text name=\"%s\" size=12 value=\"%s\"><BR>", cartVar, defaultScore); puts("</TABLE>"); endCollapsibleSection(); } } typedef boolean TdbFilterFunction(struct trackDb *tdb, void *filterData); /* Return TRUE if tdb passes filter criteria. */ void rFilterTrackList(struct trackDb *trackList, struct slRef **pPassingRefs, TdbFilterFunction *filterFunc, void *filterData) /* Recursively apply filterFunc and filterData to all tracks in trackList and * their subtracks. Add an slRef to pPassingRefs for each track/subtrack that passes. * Caller should slReverse(pPassingRefs) when recursion is all done. */ { struct trackDb *tdb; for (tdb = trackList; tdb != NULL; tdb = tdb->next) { if (tdb->subtracks != NULL) rFilterTrackList(tdb->subtracks, pPassingRefs, filterFunc, filterData); if (filterFunc(tdb, filterData)) slAddHead(pPassingRefs, slRefNew(tdb)); } } void tdbFilterGroupTrack(struct trackDb *fullTrackList, struct grp *fullGroupList, TdbFilterFunction *filterFunc, void *filterData, struct slRef **retGroupRefList, struct slRef **retTrackRefList) /* Apply filterFunc and filterData to each track in fullTrackList and all subtracks; * make an slRef list of tracks that pass the filter (retTrackRefList) and an slRef * list of groups to which the passing tracks belong (retGroupRefList). */ { struct slRef *trackRefList = NULL; rFilterTrackList(fullTrackList, &trackRefList, filterFunc, filterData); slReverse(&trackRefList); if (retTrackRefList != NULL) *retTrackRefList = trackRefList; if (retGroupRefList != NULL) { // Which groups have tracks that passed the filter? struct hash *hash = hashNew(0); struct slRef *trackRef; for (trackRef = trackRefList; trackRef != NULL; trackRef = trackRef->next) { struct trackDb *tdb = trackRef->val; hashAddInt(hash, tdb->grp, TRUE); } struct slRef *groupRefList = NULL; struct grp *grp; for (grp = fullGroupList; grp != NULL; grp = grp->next) { if (hashIntValDefault(hash, grp->name, FALSE)) slAddHead(&groupRefList, slRefNew(grp)); } hashFree(&hash); slReverse(&groupRefList); *retGroupRefList = groupRefList; } } boolean isVariantCustomTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get custom or hub tracks with type pgSnp or vcf(Tabix). */ { return ((sameString(tdb->grp, "user") || isHubTrack(tdb->track)) && (sameString(tdb->type, "pgSnp") || startsWith("vcf", tdb->type))); } void selectVariants() /* Offer selection of user's variant custom tracks, example variants, pasted input etc. */ { #define PGSNP_OR_VCF "<A HREF='../FAQ/FAQformat.html#format10' TARGET=_BLANK>pgSnp</A> or " \ "<A HREF='../goldenPath/help/vcf.html' TARGET=_BLANK>VCF</A>" printf("<div class='sectionLiteHeader'>Select Variants</div>\n"); /* Check for variant custom tracks. If there are none, tell the user that they should add one. */ struct slRef *varTrackList = NULL, *varGroupList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, isVariantCustomTrack, NULL, &varGroupList, &varTrackList); if (varTrackList == NULL) { printf("Your session doesn't have any custom tracks or hub tracks in " PGSNP_OR_VCF " format.\n"); hOnClickButton("selVar_AddPgpVcfCt", "return hgva.goToAddCustomTrack();", "add pgSnp or VCF custom track"); if (hubConnectTableExists()) { nbSpaces(2); hOnClickButton("selVar_AddTrkHub", "document.trackHubForm.submit(); return false;", "add track hub"); } puts("<BR>"); } else if (slCount(varTrackList) > 1) { printf("If you have more than one custom track or hub track in " PGSNP_OR_VCF " format, please select the one you wish to annotate:<BR>\n"); } printf("<B>variants: </B>"); printf("<SELECT ID='hgva_variantTrack' NAME='hgva_variantTrack'>\n"); jsOnEventById("change", "hgva_variantTrack", "hgva.changeVariantSource();"); char *selected = cartUsualString(cart, "hgva_variantTrack", ""); struct slRef *ref; for (ref = varTrackList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; printOption(tdb->track, selected, tdb->longLabel); } printOption(hgvaSampleVariants, selected, hgvaSampleVariantsLabel); printOption(hgvaUseHgvs, selected, hgvaHgvsLabel); struct trackDb *snpTdb = hFindLatestSnpTrack(database, NULL, &fullTrackList); // We can convert rsIds from snpNNN tables (but not ancient "snp" - no refUCSC column) into VCF boolean hasSnps = (snpTdb != NULL && differentString(snpTdb->table, "snp")); if (hasSnps) printOption(hgvaUseVariantIds, selected, hgvaVariantIdsLabel); printf("</SELECT><BR>\n"); printf("<div id='"hgvaHgvsPasteContainer"'%s>\n", differentString(selected, hgvaUseHgvs) ? " style='display: none;'" : ""); printf("Enter HGVS terms: one term per line; blank lines and comment lines beginning with '#' " "are ignored.<BR>\n" HGVS_MUST_USE_ACC"<br>\n"); char *oldPasted = cartUsualString(cart, hgvaHgvs, ""); cgiMakeTextArea(hgvaHgvs, oldPasted, 10, 70); puts("</div>"); if (hasSnps) { printf("<div id='"hgvaVariantPasteContainer"'%s>\n", differentString(selected, hgvaUseVariantIds) ? " style='display: none;'" : ""); printf("Enter dbSNP rs# identifiers separated by whitespace or commas:<BR>\n"); char *oldPasted = cartUsualString(cart, hgvaVariantIds, ""); cgiMakeTextArea(hgvaVariantIds, oldPasted, 10, 70); puts("</div>"); } printf("<B>maximum number of variants to be processed:</B>\n"); char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000"); char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" }; char *limitValues[] = { "10", "100", "1000", "10000", "100000" }; cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels), curLimit); printCtAndHubButtons(); puts("<BR>"); } boolean isGeneTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get genePred tracks. */ { return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type)); } char *selectGenes() /* Let user select a gene predictions track; return NULL if there are no genePred tracks. */ { struct slRef *trackRefList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList); boolean gotGP = (trackRefList != NULL); if (!gotGP) warn("This assembly (%s) has no gene prediction tracks, " "so the VAI will not be able to annotate it.", database); printf("<div class='sectionLiteHeader'>Select Genes</div>\n"); if (gotGP) printf("The gene predictions selected here will be used "); else printf("Gene predictions are required in order "); printf("to determine the effect of " "each variant on genes, for example intronic, missense, splice site, intergenic etc."); if (!gotGP) printf(" Since this assembly has no gene prediction tracks, " "the VAI can't provide functional annotations. " "Please select a different genome.<BR>"); printf("<BR>\n"); if (! gotGP) return NULL; char *firstTrack = ((struct trackDb *)(trackRefList->val))->track; char *cartGeneTrack = cartOptionalString(cart, "hgva_geneTrack"); if (isNotEmpty(cartGeneTrack)) { // Make sure it's actually in trackRefList (might have been carried over from other db) boolean exists = FALSE; struct slRef *ref; for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; if (sameString(cartGeneTrack, tdb->track)) { exists = TRUE; break; } } if (!exists) cartGeneTrack = NULL; } char *selected = isNotEmpty(cartGeneTrack) ? cartGeneTrack : firstTrack; //#*** should show more info about each track... button to pop up track desc? if (gotGP) { printf("<SELECT ID='hgva_geneTrack' NAME='hgva_geneTrack'>\n"); jsOnEventById("change", "hgva_geneTrack", "hgva.changeGeneSource();"); struct slRef *ref; for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; if (tdb->subtracks == NULL) printOption(tdb->track, selected, tdb->longLabel); } puts("</SELECT><BR>"); } return selected; } //#*** We really need a dbNsfp.[ch]: enum PolyPhen2Subset { noSubset, HDIV, HVAR }; char *formatDesc(char *url, char *name, char *details, boolean doHtml) /* Return a description with URL for name plus extra details. If doHtml, * wrap URL in <A ...>...</A>. */ { char desc[1024]; if (doHtml) safef(desc, sizeof(desc), "<A HREF='%s' TARGET=_BLANK>%s</A> %s", url, name, details); else safef(desc, sizeof(desc), "(%s) %s %s", url, name, details); return cloneString(desc); } char *dbNsfpDescFromTableName(char *tableName, enum PolyPhen2Subset subset, boolean doHtml) /* Return a description for table name... if these become tracks, use tdb of course. */ { if (sameString(tableName, "dbNsfpSift")) return formatDesc("http://sift.bii.a-star.edu.sg/", "SIFT", "(D = damaging, T = tolerated)", doHtml); else if (sameString(tableName, "dbNsfpPolyPhen2")) { if (subset == HDIV) return formatDesc("http://genetics.bwh.harvard.edu/pph2/", "PolyPhen-2", "with HumDiv training set " "(D = probably damaging, P = possibly damaging, B = benign)", doHtml); else if (subset == HVAR) return formatDesc("http://genetics.bwh.harvard.edu/pph2/", "PolyPhen-2", "with HumVar training set " "(D = probably damaging, P = possibly damaging, B = benign)", doHtml); else errAbort("dbNsfpDescFromTableName: invalid PolyPhen2 subset type (%d)", subset); } else if (sameString(tableName, "dbNsfpMutationTaster")) return formatDesc("http://www.mutationtaster.org/", "MutationTaster", "(A = disease causing automatic, D = disease causing, " "N = polymorphism, P = polymorphism automatic)", doHtml); else if (sameString(tableName, "dbNsfpMutationAssessor")) return formatDesc("http://mutationassessor.org/", "MutationAssessor", "(high or medium: predicted functional; " "low or neutral: predicted non-functional)", doHtml); else if (sameString(tableName, "dbNsfpLrt")) return formatDesc("http://www.genetics.wustl.edu/jflab/lrt_query.html", "Likelihood ratio test (LRT)", "(D = deleterious, N = Neutral, U = unknown)", doHtml); else if (sameString(tableName, "dbNsfpVest")) return formatDesc("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665549/", "Variant Effect Scoring Tool (VEST)", "(scores [0-1] predict confidence that a change is deleterious", doHtml); else if (sameString(tableName, "dbNsfpGerpNr")) return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html", "GERP++", "Neutral Rate (NR)", doHtml); else if (sameString(tableName, "dbNsfpGerpRs")) return formatDesc("http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html", "GERP++", "Rejected Substitutions (RS)", doHtml); else if (sameString(tableName, "dbNsfpInterPro")) return formatDesc("http://www.ebi.ac.uk/interpro/", "InterPro", "protein domains", doHtml); return NULL; } struct slName *findDbNsfpTables() /* See if this database contains dbNSFP tables. */ { if (startsWith(hubTrackPrefix, database)) return NULL; struct sqlConnection *conn = hAllocConn(database); struct slName *dbNsfpTables = sqlListTablesLike(conn, "dbNsfp%"); hFreeConn(&conn); return dbNsfpTables; } void printDbNsfpSource(char *table, enum PolyPhen2Subset subset) /* If we know what to do with table, make a checkbox with descriptive label. */ { char *description = dbNsfpDescFromTableName(table, subset, TRUE); if (description != NULL) { char cartVar[512]; if (subset == HDIV) safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s:HDIV", database, table); else if (subset == HVAR) safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s:HVAR", database, table); else safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, table); boolean defaultChecked = (sameString("dbNsfpSift", table) || sameString("dbNsfpPolyPhen2", table)); cartMakeCheckBox(cart, cartVar, defaultChecked); printf("%s<BR>\n", description); } } void selectDbNsfp(struct slName *dbNsfpTables) /* Let user select scores/predicitions from various tools collected by dbNSFP. */ { if (dbNsfpTables == NULL) return; startCollapsibleSection("dbNsfp", "Database of Non-synonymous Functional Predictions (dbNSFP)", TRUE); //#*** hardcoded version info... we need metadata (#11462) char *dbNsfpVersion = "3.1a"; char *txVersion = "Gencode release 22 (Ensembl 79, Mar. 2015)"; char *refYear = "2015"; char *refUrl = "http://onlinelibrary.wiley.com/doi/10.1002/humu.22932/abstract"; // For the time being hg19 is still on version 2.0: if (sameString(database, "hg19")) { dbNsfpVersion = "2.0"; txVersion = "Gencode release 9 (Ensembl 64, Dec. 2011)"; refYear = "2013"; refUrl = "http://onlinelibrary.wiley.com/doi/10.1002/humu.22376/abstract"; } printf("<A HREF='https://sites.google.com/site/jpopgen/dbNSFP' TARGET=_BLANK>dbNSFP</A> " "(<A HREF='%s' " "TARGET=_BLANK>Liu <em>et al.</em> %s</A>) " "release %s " "provides pre-computed scores and predictions of functional significance " "from a variety of tools. Every possible coding change to transcripts in " "%s gene predictions " "has been evaluated. " "<em>Note: This may not encompass all transcripts in your " "selected gene set.</em><BR>\n", refUrl, refYear, dbNsfpVersion, txVersion); //#*** Another cheap hack: reverse alph order happens to be what we want (until VEST??), //#*** but priorities would be cleaner: slReverse(&dbNsfpTables); jsMakeSetClearContainer(); struct slName *table; for (table = dbNsfpTables; table != NULL; table = table->next) { if (sameString(table->name, "dbNsfpPolyPhen2")) { printDbNsfpSource(table->name, HDIV); printDbNsfpSource(table->name, HVAR); } else printDbNsfpSource(table->name, 0); } jsEndContainer(); puts("<BR>"); endCollapsibleSection(); } boolean findSnpBed4(char *suffix, char **retFileName, struct trackDb **retTdb) /* If we can find the latest snpNNNsuffix table, or better yet a bigBed file for it (faster), * set the appropriate ret* and return TRUE, otherwise return FALSE. */ { struct trackDb *tdb = hFindLatestSnpTrack(database, suffix, &fullTrackList); if (tdb == NULL) return FALSE; boolean foundIt = FALSE; // Do we happen to have a bigBed version? Better yet, bed4 only for current uses: char origFileName[HDB_MAX_PATH_STRING]; safef(origFileName, sizeof(origFileName), "/gbdb/%s/vai/%s.bed4.bb", database, tdb->table); char* fileName = hReplaceGbdb(origFileName); if (fileExists(fileName)) { if (retFileName != NULL) *retFileName = fileName; foundIt = TRUE; } else { // Not bed4; try just .bb: freez(&fileName); safef(origFileName, sizeof(origFileName), "/gbdb/%s/vai/%s.bb", database, tdb->table); fileName = hReplaceGbdb(origFileName); if (fileExists(fileName)) { if (retFileName != NULL) *retFileName = fileName; foundIt = TRUE; } } if (retTdb == NULL) return foundIt; else *retTdb = tdb; return TRUE; } void selectDbSnp(boolean gotSnp) /* Offer to include rsID (and other fields, or leave that for advanced output??) if available */ { if (!gotSnp) return; startCollapsibleSection("dbSnp", "Known variation", TRUE); cartMakeCheckBox(cart, "hgva_rsId", TRUE); printf("Include <A HREF='https://www.ncbi.nlm.nih.gov/projects/SNP/' TARGET=_BLANK>dbSNP</A> " "rs# ID if one exists<BR>\n"); puts("<BR>"); endCollapsibleSection(); } #define GENCODE_PREFIX "wgEncodeGencode" struct slName *getGencodeTagVersions() /* Return a list of version strings from the ends of wgEncodeGencodeTag% tables. */ { static struct slName *tagVersions = NULL; if (tagVersions == NULL && !startsWith(hubTrackPrefix, database)) { struct sqlConnection *conn = hAllocConn(database); - struct slName *tagTables = sqlQuickList(conn, - NOSQLINJ "show tables like '"GENCODE_PREFIX"Tag%'"); + char query[1024]; + sqlSafef(query, sizeof query, "show tables like '"GENCODE_PREFIX"Tag%%'"); + struct slName *tagTables = sqlQuickList(conn, query); int offset = strlen(GENCODE_PREFIX"Tag"); struct slName *tt; for (tt = tagTables; tt != NULL; tt = tt->next) slAddHead(&tagVersions, slNameNew(tt->name + offset)); hFreeConn(&conn); } return slNameCloneList(tagVersions); } boolean knownGeneHasGencodeTags() /* Return TRUE if this database has knownToTag for knownGene. */ { return hTableExists(database, "knownGene") && hTableExists(database, "knownToTag"); } boolean hasGencodeTags() /* Return TRUE if GENCODE tags can be associated with some gene track in database. */ { return knownGeneHasGencodeTags() || (getGencodeTagVersions() != NULL); } void genbankGetDbTable(char *db, char *gbTable, char **retGenbankDb, char **retTableName) /* Genbank tables (e.g. gbCdnaInfoTable) may now include a 'db.' prefix, e.g. * hgFixed.gbCdnaInfo. If so, separate out the database and table name. If gbTable * does not have a 'db.' prefix then use the configured genbankDb if given, else db. */ { char *genbankDb = cfgOptionEnv("GENBANKDB", "genbankDb"); char *tableName = gbTable; char tableCopy[strlen(gbTable)+1]; safecpy(tableCopy, sizeof(tableCopy), gbTable); char *words[2]; int wordCount = chopByChar(tableCopy, '.', words, ArraySize(words)); if (wordCount == 2) { genbankDb = words[0]; tableName = words[1]; } else if (isEmpty(genbankDb)) genbankDb = db; if (retGenbankDb) *retGenbankDb = cloneString(genbankDb); if (retTableName) *retTableName = cloneString(tableName); } boolean genbankTableExists(char *db, char *gbTable) /* Return TRUE if table (which may or may not be prefixed by genbankDb) exists in * genbankDb or db. */ { char *genbankDb=NULL, *tableName=NULL; genbankGetDbTable(db, gbTable, &genbankDb, &tableName); return hTableExists(genbankDb, tableName); } boolean hasTxStatus() /* Return TRUE if any gene track in database has some kind of transcript status info * like knownCanonical, GENCODE tags and/or RefSeq status. */ { if (hasGencodeTags()) return TRUE; if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical")) return TRUE; if (hTableExists(database, "refGene") && genbankTableExists(database, refSeqStatusTable)) return TRUE; return FALSE; } char *getLatestGencodeVersion(struct slName *versionList) /* Return the numerically largest version found in versionList. */ { int maxVersionNum = -1; char *maxVersion = NULL; struct slName *version; for (version = versionList; version != NULL; version = version->next) { int versionNum = atoi(skipToNumeric(version->name)); if (versionNum > maxVersionNum) { maxVersionNum = versionNum; maxVersion = version->name; } } return cloneString(maxVersion); } INLINE char *gencodeTableName(char *suffix, char *version, char *buf, size_t bufSize) /* Write wgEncodeGencode<suffix><version> into buf. Return buf for convenience. */ { safef(buf, bufSize, GENCODE_PREFIX"%s%s", suffix, version); return buf; } boolean refGeneHasGencodeTags(struct slName *gencodeVersionList) /* Return TRUE if this database has a wgEncodeGencodeRefSeq table as well as a ...Tag table. */ { char *version = getLatestGencodeVersion(gencodeVersionList); char table[PATH_LEN]; return hTableExists(database, gencodeTableName("RefSeq", version, table, sizeof(table))); } boolean startsWithGencodeGene(char *geneTrack) /* Return TRUE if geneTrack starts with wgEncodeGencode{Basic,Comp,PseudoGene}. * (There are other GENCODE genepred tracks that don't have tags associated with them.) */ { return (startsWith(GENCODE_PREFIX"Basic", geneTrack) || startsWith(GENCODE_PREFIX"Comp", geneTrack) || startsWith(GENCODE_PREFIX"PseudoGene", geneTrack)); } boolean isGencodeWithVersion(char *geneTrack, struct slName *versionList) /* Return TRUE if geneTrack looks like a Gencode gene track for a supported version. */ { if (! startsWithGencodeGene(geneTrack)) return FALSE; struct slName *v; for (v = versionList; v != NULL; v = v->next) { if (endsWith(geneTrack, v->name)) return TRUE; } return FALSE; } void selectTxStatus(boolean hasTxStatus, char *geneTrack) /* Offer to include transcript status, e.g. whether it is in knownCanonical or has GENCODE tags. * This makes one div per category of txStatus info; each div is visible only if its info is * applicable to the selected gene track. If no divs are visible, display a message that * there's nothing for the currently selected gene track. */ { if (! hasTxStatus) return; startCollapsibleSection("txStatus", "Transcript status", FALSE); boolean somethingIsVisible = FALSE; if (hasGencodeTags()) { struct slName *versionList = getGencodeTagVersions(); char *maybeKnownGene = knownGeneHasGencodeTags() ? "knownGene" : ""; char *maybeRefGene = refGeneHasGencodeTags(versionList) ? "refGene" : ""; char *maybeEnsGene = ""; char *versions = ""; if (versionList != NULL) { if (hTableExists(database, "ensGene")) maybeEnsGene = "ensGene"; versions = slNameListToString(versionList, ' '); } boolean isVisible = (sameString(geneTrack, maybeKnownGene) || sameString(geneTrack, maybeEnsGene) || sameString(geneTrack, maybeRefGene) || isGencodeWithVersion(geneTrack, versionList)); somethingIsVisible |= isVisible; printf("<div class=\"txStatus %s %s %s %s\" style=\"display: %s;\">", maybeKnownGene, maybeRefGene, maybeEnsGene, versions, isVisible ? "block" : "none"); cartMakeCheckBox(cart, "hgva_txStatus_gencode", FALSE); puts("Include the <A HREF=" GENCODE_TAG_DOC_URL " TARGET=_BLANK>GENCODE tags</A> (if any) " "associated with each transcript.<BR>"); puts("</div>"); } if (hTableExists(database, "knownGene") && hTableExists(database, "knownCanonical")) { boolean isVisible = sameString(geneTrack, "knownGene"); somethingIsVisible |= isVisible; printf("<div class=\"txStatus knownGene\" style=\"display: %s;\">", isVisible ? "block" : "none"); cartMakeCheckBox(cart, "hgva_txStatus_knownCanonical", FALSE); char *desc = hTableExists(database, "knownToTag") ? "based on <A HREF=" APPRIS_DOC_URL " TARGET=_BLANK>" "APPRIS</A> status or inclusion in " "<A HREF=" GENCODE_BASIC_DOC_URL " TARGET=_BLANK>GENCODE Basic</A> subset: " "principal > alternative > basic > longest isoform" : "generally the longest isoform of a gene"; printf("Indicate whether each transcript is 'canonical' (%s).<BR>\n", desc); puts("</div>"); } if (hTableExists(database, "refGene") && genbankTableExists(database, refSeqStatusTable)) { boolean isVisible = sameString(geneTrack, "refGene"); somethingIsVisible |= isVisible; printf("<div class=\"txStatus refGene\" style=\"display: %s;\">", isVisible ? "block" : "none"); cartMakeCheckBox(cart, "hgva_txStatus_refSeqStatus", FALSE); puts("Include the " "<A HREF=" REFSEQ_STATUS_DOC_URL " " "TARGET=_BLANK>RefSeq status</A> of each transcript.<BR>"); puts("</div>"); } printf("<div class=\"noTxStatus\" style=\"display: %s;\">", somethingIsVisible ? "none" : "block"); puts("No transcript status data are available for the selected gene track."); puts("</div>"); puts("<BR>"); endCollapsibleSection(); } static boolean canDoHgvsOut(char *geneTrack) /* Return TRUE if we're able to make HGVS output terms for transcripts in geneTrack. */ { return (sameString(geneTrack, "refGene") || startsWith("ncbiRefSeq", geneTrack) || startsWith("wgEncodeGencodeBasic", geneTrack) || startsWith("wgEncodeGencodeComp", geneTrack)); } static void selectHgvsOut(char *geneTrack) /* Offer HGVS output choices if RefSeq Genes are selected */ { startCollapsibleSection("hgvsOut", "HGVS variant nomenclature", TRUE); printf("The <a href='http://www.hgvs.org/' target=_blank>Human Genome Variation Society (HGVS)</a> " "has established a " "<a href='http://varnomen.hgvs.org/' target=_blank>sequence variant nomenclature</a>, " "an international standard used to report variation in " "genomic, transcript and protein sequences.<br>\n"); boolean hgvsOk = canDoHgvsOut(geneTrack); printf("<div id=\"hgvsOptions\" style=\"display: %s;\">", hgvsOk ? "block" : "none"); cartMakeCheckBox(cart, "hgva_hgvsG", FALSE); printf("Include HGVS genomic (g.) terms in output<br>\n"); cartMakeCheckBox(cart, "hgva_hgvsCN", FALSE); printf("Include HGVS coding (c.) terms if applicable, otherwise noncoding (n.) terms, in output" "<br>\n"); cartMakeCheckBox(cart, "hgva_hgvsP", FALSE); printf("Include HGVS protein (p.) terms (if applicable) in output<br>\n"); cartMakeCheckBox(cart, "hgva_hgvsPAddParens", FALSE); printf("When including HGVS protein (p.) terms, add parentheses around changes to emphasize " "that they are predictions<br>\n"); cartMakeCheckBox(cart, "hgva_hgvsBreakDelIns", FALSE); printf("For variants that involve both a deletion and insertion, " "including multi-nucleotide variants, " "include the deleted sequence (e.g. show \"delAGinsTT\" instead of only \"delinsTT\")" "<br>\n"); puts("</div>"); printf("<div id=\"noHgvs\" style=\"display: %s;\">", hgvsOk ? "none" : "block"); printf("Select RefSeq Genes or an official GENCODE release " "(\"Basic Gene Annotation Set from GENCODE...\" " "or \"Comprehensive Gene Annotation Set...\") " "in the \"Select Genes\" section above in order to make options appear.\n"); puts("</div>"); puts("<br>"); endCollapsibleSection(); } boolean isHg19RegulatoryTrack(struct trackDb *tdb, void *filterData) /* For now, just look for a couple specific tracks by tableName. */ { //#*** NEED METADATA return (sameString("wgEncodeRegDnaseClusteredV3", tdb->table) || sameString("wgEncodeRegTfbsClusteredV3", tdb->table)); } boolean isHg38RegulatoryTrack(struct trackDb *tdb, void *filterData) /* For now, just look for a couple specific tracks by tableName. */ { //#*** NEED METADATA return (sameString("wgEncodeRegDnaseClustered", tdb->table) || sameString("wgEncodeRegTfbsClusteredV3", tdb->table)); } struct slRef *findRegulatoryTracks() /* Look for the very limited set of Regulation tracks that hgVai offers. */ { struct slRef *regTrackRefList = NULL; if (sameString(database, "hg19")) tdbFilterGroupTrack(fullTrackList, fullGroupList, isHg19RegulatoryTrack, NULL, NULL, ®TrackRefList); else if (sameString(database, "hg38")) tdbFilterGroupTrack(fullTrackList, fullGroupList, isHg38RegulatoryTrack, NULL, NULL, ®TrackRefList); return regTrackRefList; } void selectRegulatory() /* If trackRefList is non-NULL, make a section with a checkbox for each track, * labelled with longLabel, and optionally some filtering options. */ { struct slRef *trackRefList = findRegulatoryTracks(); if (trackRefList != NULL) { puts("<BR>"); printf("<div class='sectionLiteHeader'>Select Regulatory Annotations</div>\n"); printf("The annotations in this section provide predicted regulatory regions " "based on various experimental data. " "When a variant overlaps an annotation selected here, the consequence term " "<A HREF=\"http://sequenceontology.org/browser/current_release/term/SO:0001566\" " "TARGET=_BLANK>regulatory_region_variant</A> will be assigned. " "Follow the links to description pages that explain how each dataset was " "constructed. " "Some datasets cover a significant portion of the genome and it may be " "desirable to filter these annotations by cell type and/or score in order " "to avoid an overabundance of hits. "); // Use a table with checkboxes in one column and label/other stuff that depends on // checkbox in other column. puts("<TABLE>"); struct slRef *ref; for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; char cartVar[512]; safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track); puts("<TR><TD valign=top>"); cartMakeCheckBox(cart, cartVar, FALSE); puts("</TD><TD>"); struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb); printf("<A HREF=\"%s?%s&g=%s\">%s</A><BR>\n", hgTrackUiName(), cartSidUrlString(cart), topTdb->track, tdb->longLabel); printFilterOptions(tdb); puts("</TD></TR>"); } puts("</TABLE>"); } } boolean isConsElTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get "phastConsNwayElements" tracks. */ { return (startsWith("phastCons", tdb->table) && stringIn("Elements", tdb->table)); } boolean isConsScoreTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get tracks that start with "phastCons" (no Elements) * or "phyloP". */ { return ((startsWith("phastCons", tdb->table) && !stringIn("Elements", tdb->table)) || startsWith("phyloP", tdb->table)); } void findCons(struct slRef **retElTrackRefList, struct slRef **retScoreTrackRefList) /* See if this database has Conserved Elements and/or Conservation Scores */ { tdbFilterGroupTrack(fullTrackList, fullGroupList, isConsElTrack, NULL, NULL, retElTrackRefList); tdbFilterGroupTrack(fullTrackList, fullGroupList, isConsScoreTrack, NULL, NULL, retScoreTrackRefList); } boolean trackNameMatches(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get track(s) whose tdb->track matches name (filterData). */ { char *name = filterData; return sameString(tdb->track, name); } struct slRef *findTrackRefByName(char *name) /* Return a reference to the named track, if found. */ { struct slRef *trackRefList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, trackNameMatches, name, NULL, &trackRefList); return trackRefList; } void trackCheckBoxSection(char *sectionSuffix, char *title, struct slRef *trackRefList) /* If trackRefList is non-NULL, make a collapsible section with a checkbox for each track, * labelled with longLabel. */ { if (trackRefList != NULL) { startCollapsibleSection(sectionSuffix, title, FALSE); struct slRef *ref; for (ref = trackRefList; ref != NULL; ref = ref->next) { struct trackDb *tdb = ref->val; char cartVar[512]; safef(cartVar, sizeof(cartVar), "hgva_track_%s_%s", database, tdb->track); cartMakeCheckBox(cart, cartVar, FALSE); struct trackDb *topTdb = trackDbTopLevelSelfOrParent(tdb); printf("<A HREF=\"%s?%s&g=%s\">%s</A><BR>\n", hgTrackUiName(), cartSidUrlString(cart), topTdb->track, tdb->longLabel); } puts("<BR>"); endCollapsibleSection(); } } void selectAnnotations(char *geneTrack) /* Beyond predictions of protein-coding effect, what other basic data can we integrate? */ { struct slName *dbNsfpTables = findDbNsfpTables(); boolean gotSnp = findSnpBed4("", NULL, NULL); struct slRef *elTrackRefList = NULL, *scoreTrackRefList = NULL; findCons(&elTrackRefList, &scoreTrackRefList); struct slRef *cosmicTrackRefList = findTrackRefByName("cosmic"); boolean hasTxStat = hasTxStatus(); if (dbNsfpTables == NULL && !gotSnp && elTrackRefList == NULL && scoreTrackRefList == NULL && cosmicTrackRefList == NULL && !hasTxStat) return; puts("<BR>"); printf("<div class='sectionLiteHeader'>Select More Annotations (optional)</div>\n"); // Make wrapper table for collapsible sections: puts("<TABLE border=0 cellspacing=5 cellpadding=0 style='padding-left: 10px;'>"); selectDbNsfp(dbNsfpTables); selectTxStatus(hasTxStat, geneTrack); selectHgvsOut(geneTrack); selectDbSnp(gotSnp); trackCheckBoxSection("Cosmic", "COSMIC", cosmicTrackRefList); trackCheckBoxSection("ConsEl", "Conserved elements", elTrackRefList); trackCheckBoxSection("ConsScore", "Conservation scores", scoreTrackRefList); puts("</TABLE>"); } void selectFiltersFunc() /* Options to restrict variants based on gene region/soTerm from gpFx */ { startCollapsibleSection("filtersFunc", "Functional role", FALSE); printf("Include variants annotated as<BR>\n"); jsMakeSetClearContainer(); cartMakeCheckBox(cart, "hgva_include_intergenic", TRUE); printf("intergenic<BR>\n"); cartMakeCheckBox(cart, "hgva_include_upDownstream", TRUE); printf("upstream/downstream of gene<BR>\n"); cartMakeCheckBox(cart, "hgva_include_nmdTranscript", TRUE); printf("in transcript already subject to nonsense-mediated decay (NMD)<BR>\n"); cartMakeCheckBox(cart, "hgva_include_exonLoss", TRUE); printf("exon loss caused by deletion<BR>\n"); cartMakeCheckBox(cart, "hgva_include_utr", TRUE); printf("5' or 3' UTR<BR>\n"); cartMakeCheckBox(cart, "hgva_include_cdsSyn", TRUE); printf("CDS - synonymous coding change<BR>\n"); cartMakeCheckBox(cart, "hgva_include_cdsNonSyn", TRUE); printf("CDS - non-synonymous (missense, stop gain/loss, frameshift etc)<BR>\n"); cartMakeCheckBox(cart, "hgva_include_intron", TRUE); printf("intron<BR>\n"); cartMakeCheckBox(cart, "hgva_include_splice", TRUE); printf("splice site or splice region<BR>\n"); cartMakeCheckBox(cart, "hgva_include_nonCodingExon", TRUE); printf("exon of non-coding gene<BR>\n"); cartMakeCheckBox(cart, "hgva_include_noVariation", TRUE); printf("\"variants\" for which no alternate allele has been observed<BR>\n"); struct slRef *regTrackRefList = findRegulatoryTracks(); if (regTrackRefList != NULL) { cartMakeCheckBox(cart, "hgva_include_regulatory", TRUE); printf("regulatory element (note: these are detected only if one or more tracks " "are selected in Regulatory regions above)<BR>\n"); } jsEndContainer(); puts("<BR>"); endCollapsibleSection(); } void selectFiltersKnownVar() /* Options to restrict output based on overlap with known variants. */ { boolean gotCommon = findSnpBed4("Common", NULL, NULL); boolean gotMult = findSnpBed4("Mult", NULL, NULL); if (!gotCommon && !gotMult) return; startCollapsibleSection("filtersVar", "Known variation", FALSE); if (gotMult) { cartMakeCheckBox(cart, "hgva_include_snpMult", TRUE); printf("Include variants even if they are co-located with variants that have been mapped to " "multiple genomic locations by dbSNP<BR>\n"); } if (gotCommon) { cartMakeCheckBox(cart, "hgva_include_snpCommon", TRUE); printf("Include variants even if they are co-located with \"common\" variants " "(uniquely mapped variants with global minor allele frequency (MAF) " "of at least 1%% according to dbSNP)<BR>\n"); } puts("<BR>"); endCollapsibleSection(); } void selectFiltersCons() /* Options to restrict output based on overlap with conserved elements. */ { struct slRef *elTrackRefList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, isConsElTrack, NULL, NULL, &elTrackRefList); if (elTrackRefList == NULL) return; startCollapsibleSection("filtersCons", "Conservation", FALSE); // Use a little table to indent radio buttons (if we do those) past checkbox: puts("<TABLE><TR><TD>"); cartMakeCheckBox(cart, "hgva_require_consEl", FALSE); printf("</TD><TD>Include only the variants that overlap:"); if (slCount(elTrackRefList) == 1) { struct trackDb *tdb = elTrackRefList->val; printf(" %s<BR>\n", tdb->longLabel); cgiMakeHiddenVar("hgva_require_consEl_track", tdb->track); puts("</TD></TR></TABLE>"); } else { puts("</TD></TR>"); char *selected = cartUsualString(cart, "hgva_require_consEl_track", ""); struct slRef *ref; for (ref = elTrackRefList; ref != NULL; ref = ref->next) { printf("<TR><TD></TD><TD>"); struct trackDb *tdb = ref->val; cgiMakeOnEventRadioButtonWithClass("hgva_require_consEl_track", tdb->track, sameString(tdb->track, selected), NULL, "click", "setCheckboxList('hgva_require_consEl', true);"); printf("%s</TD></TR>\n", tdb->longLabel); } puts("</TABLE>"); } endCollapsibleSection(); } void selectFilters() /* Options to restrict output to variants with specific properties */ { puts("<BR>"); printf("<div class='sectionLiteHeader'>Define Filters</div>\n"); puts("<TABLE border=0 cellspacing=5 cellpadding=0 style='padding-left: 10px;'>"); selectFiltersFunc(); selectFiltersKnownVar(); selectFiltersCons(); puts("</TABLE>"); } void selectOutput() /* Just VEP text for now... should also have VEP HTML with limited # lines too. * Next: custom track with same subset of info as we would stuff in VEP */ { puts("<BR>"); printf("<div class='sectionLiteHeader'>Configure Output</div>\n"); printf("<B>output format: </B>"); char *selected = cartUsualString(cart, "hgva_outFormat", "vepTab"); printf("<SELECT ID='hgva_outFormat' NAME='hgva_outFormat'>\n"); printOption("vepTab", selected, "Variant Effect Predictor (tab-separated text)"); printOption("vepHtml", selected, "Variant Effect Predictor (HTML)"); printf("</SELECT><BR>\n"); char *compressType = cartUsualString(cart, "hgva_compressType", textOutCompressNone); char *fileName = cartUsualString(cart, "hgva_outFile", ""); printf("<B>output file:</B> "); cgiMakeTextVar("hgva_outFile", fileName, 29); printf(" (leave blank to keep output in browser)<BR>\n"); printf("<B>file type returned: </B>"); cgiMakeRadioButton("hgva_compressType", textOutCompressNone, sameWord(textOutCompressNone, compressType)); printf(" plain text  "); cgiMakeRadioButton("hgva_compressType", textOutCompressGzip, sameWord(textOutCompressGzip, compressType)); printf(" gzip compressed (ignored if output file is blank)"); puts("<BR>"); } void submitAndDisclaimer() { puts("<div id=disclaimerDialog title='NOTE'>"); puts("This tool is for research use only. While this tool is open to the " "public, users seeking information about a personal medical or genetic " "condition are urged to consult with a qualified physician for " "diagnosis and for answers to personal questions."); puts("</div><BR>"); printf("<div><img id='loadingImg' src='../images/loading.gif' />\n"); printf("<span id='loadingMsg'></span></div>\n"); cgiMakeOnClickButton("subDisclmAgrd","hgva.submitQueryIfDisclaimerAgreed();", "Get results"); puts("<BR><BR>"); } /* * When user clicks submit, we need to roll a JSON querySpec from form selections, * and show data from a submission to hgAnnoGrator. redirect from this CGI? * or have javascript submit directly? * * primary: variants, from custom track * * if there are genes, those w/annoGratorGpVar * * if there are {dbSNP, dbNsfp, regulatory, cons} selections, grator for each of those * * vep output config * * If we get bold & offer 1000Genomes VCF, will def. need handling of split chroms. * Are we really going to offer genome-wide in hgVai? * Up-front limit on #rows of input ? * * Eventually, we might want a FormatVep that produces structs that are passed * forward to multiple output writers... I would want to send it lots of gratorData * like a formatter, but it would produce rows like an annoGrator. * Maybe annoGrators should accept a bunch of input rows like formatters? * or would this grator wrap all the input grators inside? */ void doMainPage() /* Print out initial HTML of control page. */ { jsInit(); webIncludeResourceFile("jquery-ui.css"); webIncludeResourceFile("ui.dropdownchecklist.css"); boolean alreadyAgreed = cartUsualBoolean(cart, "hgva_agreedToDisclaimer", FALSE); jsInlineF( "$(document).ready(function() { hgva.disclaimer.init(%s, hgva.userClickedAgree); });\n" , alreadyAgreed ? "true" : "false"); addSomeCss(); printAssemblySection(); puts("<BR>"); // Make wrapper table for collapsible sections: selectVariants(); char *geneTrack = selectGenes(); if (geneTrack != NULL) { selectRegulatory(); selectAnnotations(geneTrack); selectFilters(); selectOutput(); submitAndDisclaimer(); } printf("</FORM>"); jsReloadOnBackButton(cart); webNewSection("Using the Variant Annotation Integrator"); webIncludeHelpFileSubst("hgVaiHelpText", cart, FALSE); jsIncludeFile("jquery-ui.js", NULL); jsIncludeFile("hgVai.js", NULL); jsIncludeFile("ui.dropdownchecklist.js", NULL); jsIncludeFile("ddcl.js", NULL); } void doUi() /* Set up globals and make web page */ { cartWebStart(cart, database, "Variant Annotation Integrator"); doMainPage(); cartWebEnd(); /* Save variables. */ cartCheckout(&cart); } void checkVariantTrack(struct trackDb *tdb) /* variantTrack should be either pgSnp or VCF. */ { if (! sameString(tdb->type, "pgSnp") && ! startsWith("vcf", tdb->type)) { errAbort("Expected variant track '%s' to be type pgSnp, vcf or vcfTabix, but it's '%s'", tdb->track, tdb->type); } } char *fileNameFromTable(char *table) /* Get fileName from a bigData table (for when we don't have a trackDb, just table). */ { struct sqlConnection *conn = hAllocConn(database); char query[512]; sqlSafef(query, sizeof(query), "select fileName from %s", table); char *fileName = sqlQuickString(conn, query); hFreeConn(&conn); char *fileNameRewrite = hReplaceGbdb(fileName); freez(&fileName); return fileNameRewrite; } void textOpen() /* Start serving up plain text, possibly via a pipeline to gzip. */ { char *fileName = textOutSanitizeHttpFileName(cartUsualString(cart, "hgva_outFile", "")); char *compressType = cartUsualString(cart, "hgva_compressType", textOutCompressGzip); compressPipeline = textOutInit(fileName, compressType, NULL); } void setGpVarFuncFilter(struct annoGrator *gpVarGrator) /* Use cart variables to configure gpVarGrator's filtering by functional category. */ { struct annoGratorGpVarFuncFilter aggvFuncFilter; ZeroVar(&aggvFuncFilter); aggvFuncFilter.intergenic = cartUsualBoolean(cart, "hgva_include_intergenic", TRUE); aggvFuncFilter.upDownstream = cartUsualBoolean(cart, "hgva_include_upDownstream", TRUE); aggvFuncFilter.nmdTranscript = cartUsualBoolean(cart, "hgva_include_nmdTranscript", TRUE); aggvFuncFilter.exonLoss = cartUsualBoolean(cart, "hgva_include_exonLoss", TRUE); aggvFuncFilter.utr = cartUsualBoolean(cart, "hgva_include_utr", TRUE); aggvFuncFilter.cdsSyn = cartUsualBoolean(cart, "hgva_include_cdsSyn", TRUE); aggvFuncFilter.cdsNonSyn = cartUsualBoolean(cart, "hgva_include_cdsNonSyn", TRUE); aggvFuncFilter.intron = cartUsualBoolean(cart, "hgva_include_intron", TRUE); aggvFuncFilter.splice = cartUsualBoolean(cart, "hgva_include_splice", TRUE); aggvFuncFilter.nonCodingExon = cartUsualBoolean(cart, "hgva_include_nonCodingExon", TRUE); aggvFuncFilter.noVariation = cartUsualBoolean(cart, "hgva_include_noVariation", TRUE); annoGratorGpVarSetFuncFilter(gpVarGrator, &aggvFuncFilter); } static void setHgvsOutOptions(struct annoGrator *gpVarGrator, char *geneTrack, struct annoFormatter *vepOut) /* Use cart variables to configure gpVarGrator's HGVS output. */ { uint hgvsOutOptions = 0; if (canDoHgvsOut(geneTrack)) { if (cartUsualBoolean(cart, "hgva_hgvsG", FALSE)) hgvsOutOptions |= HGVS_OUT_G; if (cartUsualBoolean(cart, "hgva_hgvsCN", FALSE)) hgvsOutOptions |= HGVS_OUT_CN; if (cartUsualBoolean(cart, "hgva_hgvsP", FALSE)) hgvsOutOptions |= HGVS_OUT_P; if (cartUsualBoolean(cart, "hgva_hgvsPAddParens", FALSE)) hgvsOutOptions |= HGVS_OUT_P_ADD_PARENS; if (cartUsualBoolean(cart, "hgva_hgvsBreakDelIns", FALSE)) hgvsOutOptions |= HGVS_OUT_BREAK_DELINS; } annoGratorGpVarSetHgvsOutOptions(gpVarGrator, hgvsOutOptions); annoFormatVepSetHgvsOutOptions(vepOut, hgvsOutOptions); } struct annoGrator *gratorForSnpBed4(struct hash *gratorsByName, char *suffix, struct annoAssembly *assembly, char *chrom, enum annoGratorOverlap overlapRule, char **retDescription) /* Look up snpNNNsuffix; if we find it, return a grator (possibly for a bigBed 4 file), * otherwise return NULL. */ { char *fileName = NULL; struct trackDb *tdb = NULL; if (! findSnpBed4(suffix, &fileName, &tdb)) return NULL; struct annoGrator *grator = NULL; // First look in gratorsByName to see if this grator has already been constructed: if (tdb != NULL) { grator = hashFindVal(gratorsByName, tdb->table); if (retDescription) *retDescription = cloneString(tdb->longLabel); } if (grator != NULL) { // Set its overlap behavior and we're done. grator->setOverlapRule(grator, overlapRule); return grator; } // If not in gratorsByName, then attempt to construct it here: if (fileName != NULL) grator = hAnnoGratorFromBigFileUrl(fileName, NULL, assembly, ANNO_NO_LIMIT, overlapRule); else grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, ANNO_NO_LIMIT, NULL, overlapRule, NULL); if (grator != NULL) hashAdd(gratorsByName, tdb->table, grator); return grator; } char *tableNameFromSourceName(char *sourceName) /* Strip sourceName (file path or db.table) to just the basename or table name. */ { char *tableName = cloneString(sourceName); char *p = strrchr(tableName, '/'); if (p != NULL) { // file path; strip to basename char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN]; splitPath(tableName, dir, name, extension); safecpy(tableName, strlen(tableName)+1, name); } else { // database.table -- skip database. p = strchr(tableName, '.'); if (p != NULL) tableName = p+1; } return tableName; } char *tagFromTableName(char *tableName, char *suffix) /* Generate a tag for VEP's extras column or VCF's info column. */ { char *p = strstr(tableName, "dbNsfp"); if (p != NULL) tableName = p + strlen("dbNsfp"); int suffixLen = (suffix == NULL) ? 0 : strlen(suffix); int tagSize = strlen(tableName) + suffixLen + 1; char *tag = cloneStringZ(tableName, tagSize); if (isNotEmpty(suffix)) safecat(tag, tagSize, suffix); touppers(tag); // Some custom shortenings, to avoid very long tag names: (void)strSwapStrs(tag, tagSize, "POLYPHEN", "PP"); (void)strSwapStrs(tag, tagSize, "MUTATION", "MUT"); (void)strSwapStrs(tag, tagSize, "PHYLOP", "PHP"); (void)strSwapStrs(tag, tagSize, "PHASTCONS", "PHC"); (void)strSwapStrs(tag, tagSize, "ELEMENTS", "EL"); (void)strSwapStrs(tag, tagSize, "PRIMATES", "PRIM"); (void)strSwapStrs(tag, tagSize, "PLACENTAL", "PLAC"); if (regexMatch(tag, "^PH.*[0-9]WAY")) (void)strSwapStrs(tag, tagSize, "WAY", "W"); (void)strSwapStrs(tag, tagSize, "WGENCODEREGDNASECLUSTERED", "DNASE"); (void)strSwapStrs(tag, tagSize, "WGENCODEREGTFBSCLUSTERED", "TFBS"); return tag; } enum PolyPhen2Subset stripSubsetFromTrackName(char *trackName) /* trackName may have a _suffix for a subset of PolyPhen2; convert that to enum * and zero out the suffix so we have the real trackName. */ { enum PolyPhen2Subset subset = noSubset; char *p = strchr(trackName, ':'); if (p != NULL) { if (sameString(p+1, "HDIV")) subset = HDIV; else if (sameString(p+1, "HVAR")) subset = HVAR; else errAbort("unrecognized suffix in track_suffix '%s'", trackName); *p = '\0'; } return subset; } void updateGratorListAndVepExtra(struct annoGrator *grator, struct annoGrator **pGratorList, struct annoFormatter *vepOut, enum PolyPhen2Subset subset, char *column, char *description, boolean isReg) /* If grator is non-NULL, add it to gratorList and vepOut's list of items for EXTRAs column. */ { if (grator == NULL) return; slAddHead(pGratorList, grator); if (vepOut != NULL) { char *tableName = tableNameFromSourceName(grator->streamer.name); char *suffix = NULL; if (subset == HDIV) suffix = "HDIV"; else if (subset == HVAR) suffix = "HVAR"; char *tag = tagFromTableName(tableName, suffix); if (isEmpty(description)) description = grator->streamer.name; if (isReg) annoFormatVepAddRegulatory(vepOut, (struct annoStreamer *)grator, tag, description, column); else annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)grator, tag, description, column, FALSE); } } INLINE void updateGratorList(struct annoGrator *grator, struct annoGrator **pGratorList) /* If grator is non-NULL, add it to gratorList. */ { updateGratorListAndVepExtra(grator, pGratorList, NULL, 0, NULL, NULL, FALSE); } void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName, struct annoGrator **pGratorList) // If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange // data, so annoFormatVep can tell whether the variant and gpFx are consistent with the // variant and transcript that dbNsfp used to calculate scores. { //#*** Yet another place where we need metadata: char *seqChangeTable = "dbNsfpSeqChange"; if (hashFindVal(gratorsByName, seqChangeTable) == NULL) { char *fileName = fileNameFromTable(seqChangeTable); if (fileName == NULL) errAbort("'%s' requested, but I can't find fileName for %s", trackName, seqChangeTable); struct annoGrator *grator = hAnnoGratorFromBigFileUrl(fileName, NULL, assembly, ANNO_NO_LIMIT, agoNoConstraint); updateGratorList(grator, pGratorList); hashAdd(gratorsByName, seqChangeTable, grator); } } static struct dyString *dyInfo = NULL; struct hash *getTrackFilterVars(char *track) /* Return a hash of filter variable names (cart variable suffixes) to slName lists of values. */ { char filterPrefix[512]; safef(filterPrefix, sizeof(filterPrefix), "hgva_filter_%s_", track); struct slPair *filterVars = cartVarsWithPrefix(cart, filterPrefix), *var; int prefixLen = strlen(filterPrefix); struct hash *varHash = hashNew(0); for (var = filterVars; var != NULL; var = var->next) { char *varName = var->name+prefixLen; char *val = var->val; struct hashEl *hel = hashLookup(varHash, varName); if (hel != NULL) slNameAddHead((struct slName **)(&hel->val), val); else hashAdd(varHash, varName, slNameNew(val)); } return varHash; } INLINE boolean isNotAll(struct slName *valList) /* Return TRUE unless valList has one element with name "All" (for multiselects). */ { if (slCount(valList) == 1 && sameString(valList->name, "All")) return FALSE; return TRUE; } void factorSourceGratorAddFilter(struct annoGrator *grator, char *name, struct slName *valList) /* Add filter to factorSource grator. */ //#*** Do these smarts belong here in hgVai? Probably not -- should be an hg/lib module with //#*** UI/metadata smarts. { struct annoStreamer *gStreamer = (struct annoStreamer *)grator; struct annoFilter *filter = NULL; if (sameString(name, "name") || sameString(name, "cellType") || sameString(name, "treatment")) { if (valList && isNotAll(valList)) filter = annoFilterFromAsColumn(gStreamer->asObj, name, afMatch, valList); } else if (sameString(name, "score")) filter = annoFilterFromAsColumn(gStreamer->asObj, name, afGTE, valList); else errAbort("Unrecognized filter name '%s' for %s, type=factorSource", name, gStreamer->name); if (filter) gStreamer->addFilters(gStreamer, filter); } void bed5AddFilter(struct annoGrator *grator, char *name, struct slName *valList) /* Add filter to bed 5 grator. */ { struct annoStreamer *gStreamer = (struct annoStreamer *)grator; struct annoFilter *filter = NULL; if (sameString(name, "name")) { if (valList && isNotAll(valList)) filter = annoFilterFromAsColumn(gStreamer->asObj, name, afMatch, valList); } else if (sameString(name, "score")) filter = annoFilterFromAsColumn(gStreamer->asObj, name, afGTE, valList); else errAbort("Unrecognized filter name '%s' for %s, type=bed 5", name, gStreamer->name); if (filter) gStreamer->addFilters(gStreamer, filter); } void addFiltersToGrator(struct annoGrator *grator, struct trackDb *tdb) /* Look for filter variables in the cart and add filters to grator accordingly. */ { struct hash *varHash = getTrackFilterVars(tdb->track); struct hashEl *hel, *helList = hashElListHash(varHash); for (hel = helList; hel != NULL; hel = hel->next) { char *filterName = hel->name; struct slName *valList = hel->val; //#*** Need a much better way to dispatch... if (sameString("factorSource", tdb->type)) factorSourceGratorAddFilter(grator, filterName, valList); else if (startsWith("bed 5", tdb->type)) bed5AddFilter(grator, filterName, valList); else dyStringPrintf(dyInfo, "Ignoring %s filter %s\n", tdb->track, filterName); } hashFree(&varHash); } void addOutputTracks(struct annoGrator **pGratorList, struct hash *gratorsByName, struct annoFormatter *vepOut, struct annoAssembly *assembly, char *chrom, boolean doHtml, boolean *retHasRegulatory) // Construct grators for tracks selected to appear in EXTRAS column { boolean includeReg = cartUsualBoolean(cart, "hgva_include_regulatory", TRUE); boolean haveReg = FALSE; char trackPrefix[128]; safef(trackPrefix, sizeof(trackPrefix), "hgva_track_%s_", database); int trackPrefixLen = strlen(trackPrefix); struct slPair *trackVar, *trackVars = cartVarsWithPrefix(cart, trackPrefix); for (trackVar = trackVars; trackVar != NULL; trackVar = trackVar->next) { char *val = trackVar->val; if (! (sameWord(val, "on") || atoi(val) > 0)) continue; char *trackName = trackVar->name + trackPrefixLen; if (sameString(trackName, "dbNsfpPolyPhen2")) // PolyPhen2 must have a suffix now -- skip obsolete cartVar from existing carts continue; struct annoGrator *grator = hashFindVal(gratorsByName, trackName); if (grator != NULL) // We already have this as a grator: continue; enum PolyPhen2Subset subset = noSubset; char *description = NULL; char *column = NULL; boolean isReg = FALSE; if (startsWith("dbNsfp", trackName)) { // trackName for PolyPhen2 has a suffix for subset -- strip it if we find it: subset = stripSubsetFromTrackName(trackName); description = dbNsfpDescFromTableName(trackName, subset, doHtml); addDbNsfpSeqChange(trackName, assembly, gratorsByName, pGratorList); char *fileName = fileNameFromTable(trackName); if (fileName != NULL) grator = hAnnoGratorFromBigFileUrl(fileName, NULL, assembly, ANNO_NO_LIMIT, agoNoConstraint); } else { struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList); if (tdb != NULL) { grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, ANNO_NO_LIMIT, NULL, agoNoConstraint, NULL); if (grator != NULL) { //#*** Need something more sophisticated but this works for our //#*** limited selection of extra tracks: if (asColumnFind(grator->streamer.asObj, "name") != NULL) column = "name"; addFiltersToGrator(grator, tdb); } description = tdb->longLabel; isReg = (includeReg && ((sameString(database, "hg19") && isHg19RegulatoryTrack(tdb, NULL)) || (sameString(database, "hg38") && isHg38RegulatoryTrack(tdb, NULL)))); } } haveReg |= isReg; updateGratorListAndVepExtra(grator, pGratorList, vepOut, subset, column, description, isReg); if (grator != NULL) hashAdd(gratorsByName, trackName, grator); } if (retHasRegulatory) *retHasRegulatory = haveReg; } void addFilterTracks(struct annoGrator **pGratorList, struct hash *gratorsByName, struct annoAssembly *assembly, char *chrom) // Add grators for filters (not added to vepOut): { if (!cartUsualBoolean(cart, "hgva_include_snpCommon", TRUE)) { struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Common", assembly, chrom, agoMustNotOverlap, NULL); updateGratorList(grator, pGratorList); } if (!cartUsualBoolean(cart, "hgva_include_snpMult", TRUE)) { struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Mult", assembly, chrom, agoMustNotOverlap, NULL); updateGratorList(grator, pGratorList); } if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE)) { char *consElTrack = cartString(cart, "hgva_require_consEl_track"); struct annoGrator *grator = hashFindVal(gratorsByName, consElTrack); if (grator == NULL) { struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList); if (tdb != NULL) grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, ANNO_NO_LIMIT, NULL, agoMustOverlap, NULL); updateGratorList(grator, pGratorList); } else grator->setOverlapRule(grator, agoMustOverlap); } } static void getCartPosOrDie(char **retChrom, uint *retStart, uint *retEnd) /* Get chrom:start-end from cart, errAbort if any problems. */ { char *position = windowsToAscii(cartString(cart, hgvaRange)); if (! parsePosition(position, retChrom, retStart, retEnd)) errAbort("Expected position to be chrom:start-end but got '%s'", position); } static char *sampleVariantsPath(struct annoAssembly *assembly, char *geneTrack) /* Construct a path for a trash file of contrived example variants for this assembly * and gene track. */ { char *chrom = NULL; uint start = 0, end = 0; getCartPosOrDie(&chrom, &start, &end); mkdirTrashDirectory(hgvsTrashSubDir); struct dyString *dy = dyStringCreate("%s/%s/%s_%s_%s_%u-%u.vcf", trashDir(), hgvsTrashSubDir, assembly->name, geneTrack, chrom, start, end); return dyStringCannibalize(&dy); } static char *makeMinimalVcfHeader(char *db, char *headerDefs) /* Return a string containing a simple no-genotypes VCF header. db must be non-NULL. * headerDefs can be NULL or empty or line(s) starting with '##' and ending w/'\n' */ { struct dyString *dy = dyStringCreate("##fileformat=VCFv4.2\n" "##reference=%s\n" "%s" "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n", db, (headerDefs ? headerDefs : "")); return dyStringCannibalize(&dy); } static void writeMinimalVcfHeader(FILE *f, char *db, char *headerDefs) /* Write header for VCF with no genotype data. db must be non-NULL. * headerDefs can be NULL or empty or line(s) starting with '##' and ending w/'\n' */ { char *headerString = makeMinimalVcfHeader(db, headerDefs); fputs(headerString, f); freeMem(headerString); } static void mutateBase(char *pBase) /* Change *pBase into a different base. */ { static char *bases = "ACGT"; char c; // In real life the probabilities aren't even, but this is easier. while ((c = bases[(uint)(floor(drand48() * 4.0))]) == *pBase) ; *pBase = c; } static void writeMinimalVcfRow(FILE *f, struct vcfRecord *rec) /* Write a minimalist VCF row (coords, name, alleles and placeholders). */ { // VCF columns: #CHROM POS ID REF ALT QUAL FILTER INFO fprintf(f, "%s\t%u\t%s\t%s\t", rec->chrom, rec->chromStart+1, rec->name, rec->alleles[0]); // Comma-separated alt alleles: int i; for (i = 1; i < rec->alleleCount; i++) fprintf(f, "%s%s", (i > 1 ? "," : ""), rec->alleles[i]); // Placeholder qual, filter, info. fprintf(f, "\t.\t.\t.\n"); } static void writeMinimalVcfRowFromBed(FILE *f, struct bed4 *bed, struct dnaSeq *seq, uint seqOffset) /* Write a minimalist VCF row with a mutation of the reference sequence at the given * position. */ { uint indelBase = 0, start = bed->chromStart, end = bed->chromEnd; if (end != start+1) { // VCF treats indels in a special way: pos is the base to the left of the indel, // and both ref and alt alleles include the base to the left of the indel. indelBase = 1; } // Get reference allele sequence: uint refAlLen = end - start + indelBase; char ref[refAlLen+1]; uint seqStart = start - indelBase - seqOffset; safencpy(ref, sizeof(ref), seq->dna+seqStart, refAlLen); touppers(ref); // Alternate allele seq needs extra room in case of single-base insertion: char alt[refAlLen+2]; alt[0] = ref[0]; alt[1] = '\0'; if (start == end) // insertion: alt gets extra base safecpy(alt+1, sizeof(alt)-1, "A"); else if (end == start+1) // single nucleotide variant mutateBase(alt); // else deletion: leave alt truncated after the shared base to left of indel // VCF columns: #CHROM POS ID REF ALT QUAL FILTER INFO fprintf(f, "%s\t%u\t%s\t%s\t%s\t.\t.\t.\n", bed->chrom, start+1-indelBase, bed->name, ref, alt); } static void writeArbitraryVariants(FILE *f, struct annoAssembly *assembly) /* Concoct some variants at an arbitrary position on some assembly sequence. * We shouldn't ever get here unless the selected geneTrack is empty. */ { char *chrom = "NULL"; uint start = 0, end = 0; getCartPosOrDie(&chrom, &start, &end); struct bed4 *bed = bed4New(chrom, start, start, "ex_ins"); struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, start-1, start+100); writeMinimalVcfRowFromBed(f, bed, seq, start-1); dnaSeqFree(&seq); } static void addGpFromRow(struct genePred **pGpList, struct annoRow *row, boolean *pNeedCoding, boolean *pNeedNonCoding, boolean isBig) /* If row is coding and we need a coding gp, add it to pGpList and update pNeedCoding; * likewise for noncoding. */ { struct genePred *gp = isBig ? (struct genePred *)genePredFromBigGenePredRow(row->data) : genePredLoad(row->data); if (gp->cdsStart != gp->cdsEnd && *pNeedCoding) { slAddHead(pGpList, gp); *pNeedCoding = FALSE; } else if (gp->cdsStart == gp->cdsEnd && *pNeedNonCoding) { slAddHead(pGpList, gp); *pNeedNonCoding = FALSE; } } static void addGpFromPos(struct annoStreamer *geneStream, char *chrom, uint start, uint end, struct genePred **pGpList, boolean *pNeedCoding, boolean *pNeedNonCoding, struct lm *lm, boolean isBig) /* Get up to 10 rows from position; if we find one coding and one non-coding gene, * add them to pGpList and update pNeed* accordingly. */ { geneStream->setRegion(geneStream, chrom, start, end); int rowCount = 0; struct annoRow *row = NULL; while (rowCount < 10 && (*pNeedCoding || *pNeedNonCoding) && (row = geneStream->nextRow(geneStream, NULL, 0, lm)) != NULL) addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding, isBig); } static struct genePred *genesFromPosition(struct annoStreamer *geneStream, boolean isBig, boolean *retGotCoding, boolean *retGotNonCoding) /* Try to get a coding and non-coding gene from geneStream at cart position. * If there are none, try whole chrom; if none there, first in assembly. */ { struct genePred *gpList = NULL; struct lm *lm = lmInit(0); char *chrom = NULL; uint start = 0, end = 0; getCartPosOrDie(&chrom, &start, &end); boolean needCoding = TRUE, needNonCoding = TRUE; // First, look for both coding and noncoding genes at cart position: addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm, isBig); // If that didn't do it, try querying whole chrom if (needCoding || needNonCoding) addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig); // If we still haven't found either coding or non-coding on the cart's current chrom, // try whole genome: if (needCoding && needNonCoding) addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig); slSort(&gpList, genePredCmp); lmCleanup(&lm); if (retGotCoding) *retGotCoding = !needCoding; if (retGotNonCoding) *retGotNonCoding = !needNonCoding; return gpList; } static void addSnv(struct bed4 **pBedList, char *chrom, uint start, char *name) /* Add a single-nucleotide bed4 item. */ { slAddHead(pBedList, bed4New(chrom, start, start+1, name)); } // Stuff within this range (see gpFx.h) is considered upstream/downstream of a transcript: #define UPSTREAMFUDGE GPRANGE static struct bed4 *sampleVarBedFromGenePred(struct genePred *gp, struct annoAssembly *assembly, uint txSeqStart, uint txSeqEnd, boolean exonOnly) /* Construct imaginary variants that hit various parts of the transcript * described in gp, using reference base values from assembly. */ { uint basesLeft = gp->txStart - txSeqStart, basesRight = txSeqEnd - gp->txEnd; struct bed4 *bedList = NULL; if (!exonOnly && basesLeft > UPSTREAMFUDGE) // SNV, intergenic to the left addSnv(&bedList, gp->chrom, txSeqStart, "ex_igLeft"); if (!exonOnly && basesLeft > 0) // SNV, up/downstream to the left addSnv(&bedList, gp->chrom, gp->txStart - 1, "ex_updownLeft"); if (!exonOnly && gp->exonCount > 1) { // SNV, intron uint start = (gp->exonEnds[0] + gp->exonStarts[1]) / 2; addSnv(&bedList, gp->chrom, start, "ex_intron"); // SNV, splice addSnv(&bedList, gp->chrom, gp->exonStarts[1] - 2, "ex_splice"); } boolean isCoding = (gp->cdsStart != gp->cdsEnd); if (isCoding) { if (gp->txStart < gp->cdsStart) // SNV, UTR to the left addSnv(&bedList, gp->chrom, gp->txStart + 1, "ex_utrLeft"); int cdsExon = 0; while (gp->cdsStart > gp->exonEnds[cdsExon] && cdsExon < gp->exonCount) cdsExon++; uint start = gp->cdsStart + 2; // single-base insertion, leftmost codon slAddHead(&bedList, bed4New(gp->chrom, start, start, "ex_codonLeftIns")); // 3-base deletion leftmost codon slAddHead(&bedList, bed4New(gp->chrom, start, start+3, "ex_codonLeftDel")); // SNV, leftmost codon addSnv(&bedList, gp->chrom, start+1, "ex_codonLeft"); if (gp->txEnd > gp->cdsEnd) // SNV, UTR to the right addSnv(&bedList, gp->chrom, gp->txEnd - 1, "ex_utrRight"); } else { // noncoding exon variant uint start = (gp->exonStarts[0] + gp->exonEnds[0]) / 2; addSnv(&bedList, gp->chrom, start, "ex_nce"); } if (!exonOnly && basesRight > 0) // SNV, up/downstream to the left addSnv(&bedList, gp->chrom, gp->txEnd + 1, "ex_updownRight"); return bedList; } // margin for intergenic variants around transcript: #define IGFUDGE 5 static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly, struct trackDb *geneTdb, int maxOutRows) /* Construct a VCF file of example variants for db (unless it already exists) * and return an annoStreamer for that file. If possible, make the variants hit a gene. */ { char *sampleFile = sampleVariantsPath(assembly, geneTdb->track); boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE); if (! fileExists(sampleFile) || forceRebuild) { struct annoStreamer *geneStream = hAnnoStreamerFromTrackDb(assembly, geneTdb->table, geneTdb, NULL, ANNO_NO_LIMIT, NULL); boolean isBig = sameString(geneTdb->type, "bigGenePred"); boolean gotCoding = FALSE, gotNonCoding = FALSE; struct genePred *gpList = genesFromPosition(geneStream, isBig, &gotCoding, &gotNonCoding); FILE *f = mustOpen(sampleFile, "w"); writeMinimalVcfHeader(f, assembly->name, NULL); if (gpList == NULL) { warn("Unable to find any gene transcripts in '%s' (%s)", geneTdb->shortLabel, geneTdb->track); writeArbitraryVariants(f, assembly); } else { struct bed4 *bedList = NULL; uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom); uint minSeqStart = chromSize, maxSeqEnd = 0; struct genePred *gp; for (gp = gpList; gp != NULL; gp = gp->next) { int txSeqStart = gp->txStart - (UPSTREAMFUDGE + IGFUDGE); uint txSeqEnd = gp->txEnd + UPSTREAMFUDGE + IGFUDGE; if (txSeqStart < 0) txSeqStart = 0; if (txSeqEnd > chromSize) txSeqEnd = chromSize; if (txSeqStart < minSeqStart) minSeqStart = txSeqStart; if (txSeqEnd > maxSeqEnd) maxSeqEnd = txSeqEnd; // If we got a coding gp, but this gp is non-coding, just do its exon variant: boolean exonOnly = gotCoding && (gp->cdsStart == gp->cdsEnd); slCat(&bedList, sampleVarBedFromGenePred(gp, assembly, txSeqStart, txSeqEnd, exonOnly)); } slSort(&bedList, bedCmp); struct dnaSeq *txSeq = twoBitReadSeqFragLower(assembly->tbf, gpList->chrom, minSeqStart, maxSeqEnd); struct bed4 *bed; for (bed = bedList; bed != NULL; bed = bed->next) { writeMinimalVcfRowFromBed(f, bed, txSeq, minSeqStart); } dnaSeqFree(&txSeq); bed4FreeList(&bedList); } geneStream->close(&geneStream); carefulClose(&f); } return annoStreamVcfNew(sampleFile, NULL, FALSE, assembly, maxOutRows); } static char *md5TrashPath(struct annoAssembly *assembly, char *string) /* Use the md5sum of a string to make a hopefully unique trash filename. */ { char *md5sum = md5HexForString(string); mkdirTrashDirectory(hgvsTrashSubDir); struct dyString *dy = dyStringCreate("%s/%s/%s_%s.vcf", trashDir(), hgvsTrashSubDir, assembly->name, md5sum); return dyStringCannibalize(&dy); } static struct slName *hashListNames(struct hash *hash) /* Return a list of all element names in the hash (if any). */ { struct slName *list = NULL; struct hashCookie cookie = hashFirst(hash); struct hashEl *hel; while ((hel = hashNext(&cookie)) != NULL) slAddHead(&list, slNameNew(hel->name)); return list; } static char *encloseInAngleBracketsDbSnp(char *stringIn) /* Return a string that has <dbSNP: stringIn>, with spaces replaced by '_'s. */ { int stringInLen = strlen(stringIn); int stringOutLen = stringInLen + strlen("<dbSNP:>") + 1; char *stringOut = needMem(stringOutLen); safef(stringOut, stringOutLen, "<dbSNP:%s>", stringIn); subChar(stringOut, ' ', '_'); return stringOut; } // dbSNP named alleles have many ways to describe a deletion from the reference, // for example "LARGEDELETION", "LARGE DELETION", "... DELETED", "... DEL": static const char *dbSnpDelRegex = "^\\(.*(DELET.*| DEL)\\)$"; static char **altAlsFromObserved(char *refAl, int obsCount, char **obsWords, boolean minusStrand, int *retAltAlCount, boolean *retNeedLeftBase) /* Given a non-symbolic reference allele and an array of observed alleles, * return an array of +-strand alleles that are not the same as the reference. * If any allele is "-" (deleted, zero-length), then set retNeedLeftBase to TRUE * because in this case VCF requires that the reference base to the left of the indel * must be added to all alleles, and the start coord also moves one base to the left. * Also, if any alt allele is symbolic, padding is required. * Note: this trashes obsAls. Resulting array can be freed but not its contents. */ { boolean obsHasDeletion = FALSE; int i; for (i = 0; i < obsCount; i++) if (sameString(obsWords[i], "-")) { obsHasDeletion = TRUE; break; } char **altAls; AllocArray(altAls, obsCount); int altCount = 0; boolean needLeftBase = isEmpty(refAl) || sameString(refAl, "-"); for (i = 0; i < obsCount; i++) { char *altAl = obsWords[i]; if (sameString(altAl, "-")) altAl[0] = '\0'; int altAlLen = strlen(altAl); if (minusStrand && isAllNt(altAl, altAlLen)) reverseComplement(altAl, altAlLen); if (differentString(altAl, refAl)) { if (isEmpty(altAl)) { needLeftBase = TRUE; } else { // It would be nice to expand the "(CA)11/12/14/15/16/17/18/19/20" syntax of // some dbSNP observed's. What are these?: "(D1S243)", "(D1S2870)" // Unfortunately for observed="lengthTooLong" we just can't get the correct allele // sequence. (76,130 of those in snp138) // Hmmm, I guess we could at least stick in the right number of N's if we can // parse "(245 BP INSERTION)". (2403 rows rlike "[0-9]+ BP ?INSERTION" in snp138) if (!isAllNt(altAl, altAlLen)) { // Symbolic allele: left base required, and enclose it in <dbSNP:>'s. // But if it's one of dbSNP's LARGEDELETION kind of alleles, that is redundant // with the reference allele, so if we know there is already a "-" allele, // skip it. if (obsHasDeletion && regexMatch(altAl, dbSnpDelRegex)) continue; needLeftBase = TRUE; altAl = encloseInAngleBracketsDbSnp(altAl); } altAls[altCount] = altAl; } altAls[altCount] = altAl; altCount++; } } *retAltAlCount = altCount; *retNeedLeftBase = needLeftBase; return altAls; } static char **parseDbSnpAltAlleles(char *refAl, char *obsAls, int alleleFreqCount, char *alleles, boolean minusStrand, int *retAltAlCount, boolean *retNeedLeftBase) /* Given a non-symbolic reference allele and slash-sep observed alleles from dbSNP, * return an array of +-strand alleles that are not the same as the reference. * If obsAls is "lengthTooLong" but alleleFreqCount is at least 2 and alleles is a comma-sep * list of nucleotide bases, use alleles instead. * If any allele is "-" (deleted, zero-length), then set retNeedLeftBase to TRUE * because in this case VCF requires that the reference base to the left of the indel * must be added to all alleles, and the start coord also moves one base to the left. * Also, if any alt allele is symbolic, padding is required. * Note: this trashes obsAls. Resulting array can be freed but not its contents. */ { int obsCount = 0; int maxWords = 1000; char *obsWords[maxWords]; if (sameString(obsAls, "lengthTooLong") && alleleFreqCount > 1) { obsCount = countChars(alleles, ','); if (obsCount > maxWords) errAbort("parseDbSnpAltAlleles: too many alleles (%d) in observed column", obsCount); chopByChar(alleles, ',', obsWords, obsCount); } else { obsCount = countChars(obsAls, '/') + 1; if (obsCount > maxWords) errAbort("parseDbSnpAltAlleles: too many alleles (%d) in observed column", obsCount); chopByChar(obsAls, '/', obsWords, obsCount); } return altAlsFromObserved(refAl, obsCount, obsWords, minusStrand, retAltAlCount, retNeedLeftBase); } char *firstNCommaSep(struct slName *nameList, int n) /* Return a comma-separated string with the first n names in nameList. */ { struct dyString *dy = dyStringNew(0); int i; struct slName *el; for (i=0, el=nameList; i < 5 && el != NULL; i++, el = el->next) { if (i > 0) dyStringAppend(dy, ", "); dyStringPrintf(dy, "'%s'", el->name); } return dyStringCannibalize(&dy); } //#*** Variant ID-matching should be metadata-driven too. termRegex -> data source. static const char *rsIdRegex = "^rs[0-9]+$"; static void rsIdsToVcfRecords(struct annoAssembly *assembly, struct slName *rsIds, struct vcfFile *vcff, struct vcfRecord **pRecList, FILE *warnF) /* If possible, look up coordinates and alleles of dbSNP rs IDs. */ { if (rsIds == NULL) return; struct trackDb *tdb = hFindLatestSnpTrack(database, NULL, &fullTrackList); if (tdb == NULL) return; struct sqlConnection *conn = hAllocConn(assembly->name); // Build a 'name in (...)' query, and build a hash of IDs so we can test whether all were found struct dyString *dq = sqlDyStringCreate("select chrom, chromStart, chromEnd, name, strand, " "refUCSC, observed, alleleFreqCount, alleles " "from %s where name in (", tdb->table); struct hash *idHash = hashNew(0); struct slName *id; for (id = rsIds; id != NULL; id = id->next) { tolowers(id->name); - dyStringPrintf(dq, "%s'%s'", (id != rsIds ? "," : ""), id->name); + if (id != rsIds) + sqlDyStringPrintf(dq, ","); + sqlDyStringPrintf(dq, "'%s'", id->name); hashStoreName(idHash, id->name); } -dyStringAppend(dq, ");"); +sqlDyStringPrintf(dq, ");"); struct sqlResult *sr = sqlGetResult(conn, dq->string); // Construct a minimal VCF row to make a vcfRecord for each variant. char *vcfRow[9]; vcfRow[5] = vcfRow[6] = vcfRow[7] = "."; // placeholder for qual, filter, info // It would be cool to someday add snpNNN's exceptions column to the filter or info. struct dyString *dyAltAlStr = dyStringNew(0); int i; char **row; while ((row = sqlNextRow(sr)) != NULL) { char *chrom = row[0]; uint chromStart = atoll(row[1]); uint chromEnd = atoll(row[2]); char *name = row[3]; char *strand = row[4]; char refAl[max(strlen(row[5]), chromEnd-chromStart) + 2]; safecpy(refAl, sizeof(refAl), row[5]); if (sameString(refAl, "-")) refAl[0] = '\0'; else if (! isAllNt(refAl, strlen(refAl))) { // refUCSC is symbolic like "( 2548bp insertion )" -- just use the actual reference // sequence for now, to keep things simple downstream. //#*** Need something more efficient, like a sequence cache inside assembly! struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, chromStart, chromEnd); safecpy(refAl, sizeof(refAl), seq->dna); toUpperN(refAl, seq->size); dnaSeqFree(&seq); } char *obsAls = row[6]; int alFCount = atoi(row[7]); char *alFAls = row[8]; int altAlCount = 0; boolean needLeftBase = FALSE; char **altAls = parseDbSnpAltAlleles(refAl, obsAls, alFCount, alFAls, sameString(strand, "-"), &altAlCount, &needLeftBase); needLeftBase |= (chromStart == chromEnd); // should be redundant, but just in case. // variantProjector doesn't accept symbolic alleles (aside from VCF <DEL>), // so filter those out and warn: int j = 0; struct dyString *noSym = NULL; char *filteredAltAls[altAlCount]; for (i = 0; i < altAlCount; i++) { if (!isAllNt(altAls[i], strlen(altAls[i]))) { if (noSym == NULL) noSym = dyStringCreate("%s has symbolic allele(s) e.g. '%s', ignoring. ", name, altAls[i]); } else filteredAltAls[j++] = altAls[i]; } if (noSym != NULL) { fputs(noSym->string, warnF); dyStringFree(&noSym); } altAlCount = j; altAls = filteredAltAls; hashRemove(idHash, name); uint vcfStart = chromStart + 1; dyStringClear(dyAltAlStr); // If this is a non-symbolic indel, we'll have to move start one base to the left // and add the reference base to the left of the actual alleles if (needLeftBase) { vcfStart--; //#*** Need something more efficient, like a sequence cache inside assembly! struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, chromStart-1, chromStart); toUpperN(seq->dna, 1); char leftBase = seq->dna[0]; dnaSeqFree(&seq); char refAlPlus[strlen(refAl)+2]; safef(refAlPlus, sizeof(refAlPlus), "%c%s", leftBase, refAl); safecpy(refAl, sizeof(refAl), refAlPlus); for (i = 0; i < altAlCount; i++) { if (i > 0) - dyStringAppendC(dyAltAlStr, ','); - dyStringPrintf(dyAltAlStr, "%c%s", leftBase, altAls[i]); + sqlDyStringPrintf(dyAltAlStr, ","); + sqlDyStringPrintf(dyAltAlStr, "%c%s", leftBase, altAls[i]); } } else { // Not an indel, just make comma-sep string of alt alleles. for (i = 0; i < altAlCount; i++) { if (i > 0) - dyStringAppendC(dyAltAlStr, ','); - dyStringAppend(dyAltAlStr, altAls[i]); + sqlDyStringPrintf(dyAltAlStr, ","); + sqlDyStringPrintf(dyAltAlStr, "%s", altAls[i]); } } if (altAlCount == 0) - dyStringAppendC(dyAltAlStr, '.'); + sqlDyStringPrintf(dyAltAlStr, "."); char vcfStartStr[64]; safef(vcfStartStr, sizeof(vcfStartStr), "%d", vcfStart); vcfRow[0] = chrom; vcfRow[1] = vcfStartStr; vcfRow[2] = name; vcfRow[3] = refAl; vcfRow[4] = dyAltAlStr->string; struct vcfRecord *rec = vcfRecordFromRow(vcff, vcfRow); slAddHead(pRecList, rec); } dyStringFree(&dyAltAlStr); // Check for IDs not found struct slName *notFoundIds = hashListNames(idHash); if (notFoundIds != NULL) { char *namesNotFound = firstNCommaSep(notFoundIds, 5); fprintf(warnF, "%d rs# IDs not found, e.g. %s", slCount(notFoundIds), namesNotFound); freeMem(namesNotFound); } slNameFreeList(¬FoundIds); } static struct vcfRecord *parseVariantIds(struct annoAssembly *assembly, char *variantIdText, FILE *warnF) /* Return a sorted list of minimal vcfRecords (coords, name, ref and alt alleles) * corresponding to each pasted variant. */ { struct vcfRecord *recList = NULL; char *p = cloneString(variantIdText), *id; struct slName *rsIds = NULL, *unknownIds = NULL; subChar(p, ',', ' '); struct lineFile *lf = lineFileOnString("rs# IDs", TRUE, p); char *line = NULL; while (lineFileNextReal(lf, &line)) { while ((id = nextWord(&line)) != NULL) { if (regexMatchNoCase(id, rsIdRegex)) slNameAddHead(&rsIds, id); else slNameAddHead(&unknownIds, id); } } lineFileClose(&lf); if (unknownIds != NULL) { slReverse(&unknownIds); char *firstUnknownIds = firstNCommaSep(unknownIds, 5); fprintf(warnF, "%d variant identifiers are unrecognized, e.g. %s. ", slCount(unknownIds), firstUnknownIds); freeMem(firstUnknownIds); } struct vcfFile *vcff = vcfFileNew(); rsIdsToVcfRecords(assembly, rsIds, vcff, &recList, warnF); slSort(&recList, vcfRecordCmp); slNameFreeList(&rsIds); slNameFreeList(&unknownIds); return recList; } static void adjustRangeForVariants(struct vcfRecord *recList, char **pChrom, uint *pStart, uint *pEnd) /* Given a *sorted* list of VCF records, look at the first and last records and if * they fall outside of the range, expand the range to include them. */ { if (pChrom != NULL && *pChrom != NULL && recList != NULL) { // We have a non-empty sorted list of records, and search is not genome-wide; // look at first and last variants to see if they're not already included in search range. struct vcfRecord *first = recList, *last = recList; while (last->next) last = last->next; if (differentString(*pChrom, first->chrom) || differentString(*pChrom, last->chrom)) { // Variants span multiple chroms; force genome-wide search. *pChrom = NULL; *pStart = *pEnd = 0; } else { // Variant(s) are on the same chrom as search range; check starts and ends. if (*pStart > first->chromEnd) *pStart = first->chromEnd; if (*pEnd < last->chromStart) *pEnd = last->chromStart; } } } static void slAddFileContentsTail(struct slName **pSlNameList, char *filename) /* If filename exists and is nonempty, add its contents as a new slName at end of list. */ { char *contents = NULL; size_t contentSize = 0; if (fileExists(filename)) readInGulp(filename, &contents, &contentSize); if (isNotEmpty(contents)) slAddTail(pSlNameList, slNameNew(contents)); freeMem(contents); } static struct annoStreamer *makeVariantIdStreamer(struct annoAssembly *assembly, int maxOutRows, char **pChrom, uint *pStart, uint *pEnd, struct slName **pCommentList) /* Translate user's pasted/uploaded variant IDs into minimal VCF if possible. * Return a VCF streamer for those, and if the current search position is too narrow * to include all of the variants, widen it as necessary. */ { // Hash variant text to get trash filename. Use if exists, otherwise build it. char *variantIds = cartString(cart, hgvaVariantIds); char *varFile = md5TrashPath(assembly, variantIds); char warnFile[strlen(varFile)+6]; safef(warnFile, sizeof(warnFile), "%s.warn", varFile); boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildVariantIds", FALSE); if (! fileExists(varFile) || forceRebuild) { FILE *warnF = mustOpen(warnFile, "w"); struct vcfRecord *varList = parseVariantIds(assembly, variantIds, warnF); //#*** If no variants were recognized, we should probably show main page with a warning. adjustRangeForVariants(varList, pChrom, pStart, pEnd); FILE *f = mustOpen(varFile, "w"); writeMinimalVcfHeader(f, assembly->name, NULL); struct vcfRecord *var; for (var = varList; var != NULL; var = var->next) writeMinimalVcfRow(f, var); carefulClose(&f); carefulClose(&warnF); } slAddFileContentsTail(pCommentList, warnFile); return annoStreamVcfNew(varFile, NULL, FALSE, assembly, maxOutRows); } static struct vcfRecord *parseHgvs(struct vcfFile *vcff, struct annoAssembly *assembly, char *hgvsTerms, FILE *warnF) /* Return a sorted list of vcfRecords . */ { struct vcfRecord *recList = NULL; struct slName *failedTerms = NULL; struct dyString *dyError = dyStringNew(0); struct lineFile *lf = lineFileOnString("user-provided HGVS terms", TRUE, cloneString(hgvsTerms)); char *term = NULL; boolean notVersionedAcc = FALSE; while (lineFileNextReal(lf, &term)) { eraseTrailingSpaces(term); dyStringClear(dyError); struct vcfRow *vcfRow = hgvsToVcfRow(assembly->name, term, FALSE, dyError); if (vcfRow) { char *row[8]; row[0] = vcfRow->chrom; char posStr[64]; safef(posStr, sizeof(posStr), "%d", vcfRow->pos); row[1] = posStr; row[2] = vcfRow->id; row[3] = vcfRow->ref; row[4] = vcfRow->alt; row[5] = "."; row[6] = vcfRow->filter; row[7] = vcfRow->info; slAddHead(&recList, vcfRecordFromRow(vcff, row)); } else { if (!regexMatch(term, "^[A-Z_0-9]+\\.[0-9]+[: ]")) notVersionedAcc = TRUE; slNameAddHead(&failedTerms, dyStringContents(dyError)); } } if (failedTerms != NULL) { slReverse(&failedTerms); char *firstUnknownIds = firstNCommaSep(failedTerms, 5); fprintf(warnF, "%d HGVS terms could not be parsed and/or mapped to %s, e.g. %s. ", slCount(failedTerms), assembly->name, firstUnknownIds); if (notVersionedAcc) fputs(HGVS_MUST_USE_ACC, warnF); freeMem(firstUnknownIds); } slSort(&recList, vcfRecordCmp); slNameFreeList(&failedTerms); dyStringFree(&dyError); lineFileClose(&lf); return recList; } static struct annoStreamer *makeHgvsStreamer(struct annoAssembly *assembly, int maxOutRows, char **pChrom, uint *pStart, uint *pEnd, struct slName **pCommentList) /* Translate user's pasted/uploaded variant IDs into minimal VCF if possible. * Return a VCF streamer for those., and if the current search position is too narrow * to include all of the variants, widen it as necessary. */ { // Hash HGVS input to get trash filename. Use if exists, otherwise build it. char *hgvsTerms = cartString(cart, hgvaHgvs); char *varFile = md5TrashPath(assembly, hgvsTerms); char warnFile[strlen(varFile)+6]; safef(warnFile, sizeof(warnFile), "%s.warn", varFile); boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildHgvs", FALSE); if (! fileExists(varFile) || forceRebuild) { char *headerString = makeMinimalVcfHeader(assembly->name, HGVS_VCF_HEADER_DEFS); struct vcfFile *vcff = vcfFileFromHeader("hgVaiHgvs", headerString, VCF_IGNORE_ERRS); FILE *warnF = mustOpen(warnFile, "w"); struct vcfRecord *varList = parseHgvs(vcff, assembly, hgvsTerms, warnF); //#*** If no HGVS terms could be mapped, we should probably show main page with a warning. adjustRangeForVariants(varList, pChrom, pStart, pEnd); FILE *f = mustOpen(varFile, "w"); writeMinimalVcfHeader(f, assembly->name, HGVS_VCF_HEADER_DEFS); struct vcfRecord *var; for (var = varList; var != NULL; var = var->next) writeMinimalVcfRow(f, var); carefulClose(&f); carefulClose(&warnF); vcfFileFree(&vcff); } slAddFileContentsTail(pCommentList, warnFile); return annoStreamVcfNew(varFile, NULL, FALSE, assembly, maxOutRows); } static struct trackDb *getVariantTrackDb(char *variantTrack) /* Get a tdb for the user's selected variant track, or warn and return NULL * (main page will be displayed) */ { struct trackDb *varTdb = tdbForTrack(database, variantTrack, &fullTrackList); if (varTdb == NULL) { if (isHubTrack(variantTrack)) warn("Can't find hub track '%s'", variantTrack); else warn("Can't find tdb for variant track '%s'", variantTrack); } else checkVariantTrack(varTdb); return varTdb; } static char *gencodeVersionFromTrack(char *track) /* If track is a GENCODE table, find and return a pointer to the version at the end; * otherwise return NULL. */ { if (startsWithGencodeGene(track)) { char *v = strrchr(track, 'V'); return v; } return NULL; } static char *gencodeTagTableForTrack(char *db, char *track) /* If there is a wgEncodeGencodeTag<version> table that can be associated with track, * return it; otherwise return NULL. */ { struct slName *versionList = getGencodeTagVersions(); if (startsWithGencodeGene(track)) { char *version = gencodeVersionFromTrack(track); if (version != NULL) { char table[PATH_LEN]; return cloneString(gencodeTableName("Tag", version, table, sizeof(table))); } } else if (sameString(track, "refGene") && refGeneHasGencodeTags(versionList)) { char *version = getLatestGencodeVersion(versionList); char table[PATH_LEN]; if (hTableExists(db, gencodeTableName("RefSeq", version, table, sizeof(table)))) return cloneString(gencodeTableName("Tag", version, table, sizeof(table))); } else if (sameString(track, "knownGene") && knownGeneHasGencodeTags()) { if (hTableExists(db, "knownToTag")) return cloneString("knownToTag"); } return NULL; } static struct joinerDtf *getTxStatusExtras(char *db, char *track) // Several kinds of transcript status may be enabled in the cart; if any are enabled, // and if they apply to track, return the tables & fields to be joined with the track. { struct joinerDtf *txStatusExtras = NULL; if (cartUsualBoolean(cart, "hgva_txStatus_gencode", FALSE)) { char *gencodeTagTable = gencodeTagTableForTrack(db, track); if (gencodeTagTable != NULL) { char *field = "tag"; if (sameString("knownToTag", gencodeTagTable)) field = "value"; slAddHead(&txStatusExtras, joinerDtfNew(db, gencodeTagTable, field)); } } if (cartUsualBoolean(cart, "hgva_txStatus_knownCanonical", FALSE) && sameString(track, "knownGene") && hTableExists(db, "knownCanonical")) { slAddHead(&txStatusExtras, joinerDtfNew(db, "knownCanonical", "transcript")); } if (cartUsualBoolean(cart, "hgva_txStatus_refSeqStatus", FALSE) && sameString(track, "refGene") && genbankTableExists(db, refSeqStatusTable)) { char *genbankDb=NULL, *tableName=NULL; genbankGetDbTable(db, refSeqStatusTable, &genbankDb, &tableName); slAddHead(&txStatusExtras, joinerDtfNew(genbankDb, tableName, "status")); } return txStatusExtras; } static void configAddDtf(struct dyString *dy, struct joinerDtf *dtf, boolean *pIsFirst) /* Add a JSON object with [db].table and (list of one) field. */ { if (! *pIsFirst) dyStringAppend(dy, ", "); // The convention for related tables in same db as track is to prepend "." instead of "<db>.": char *db = (sameString(dtf->database, database) ? "" : dtf->database); dyStringPrintf(dy, "{ \"table\": \"%s.%s\", \"fields\": [\"%s\"] }", db, dtf->table, dtf->field); *pIsFirst = FALSE; } static void configAddTableField(struct dyString *dy, char *table, char *field, boolean *pIsFirst) /* Add a JSON object with current database, table and (list of one) field. */ { struct joinerDtf dtf; dtf.database = database; dtf.table = table; dtf.field = field; configAddDtf(dy, &dtf, pIsFirst); } static struct jsonElement *configForStreamer(char *db, struct trackDb *tdb, struct joinerDtf *txStatusExtras) /* Add VAI-specific config options, if applicable. */ { struct jsonElement *config = NULL; char *track = tdb->track; struct dyString *dyConfig = dyStringCreate("{ \"naForMissing\": false," " \"relatedTables\": [ "); boolean isFirst = TRUE; // If track is sql-based knownGene and we have kgXref, then add kgXref.geneSymbol after // the columns of knownGene. if (sameString(track, "knownGene") && !isCustomTrack(track) && !isHubTrack(track) && !trackDbSetting(tdb, "bigDataUrl") && hTableExists(db, "kgXref")) { configAddTableField(dyConfig, "kgXref", "geneSymbol", &isFirst); } struct joinerDtf *txStatDtf; for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next) configAddDtf(dyConfig, txStatDtf, &isFirst); // If any of the above apply, close the relatedTables list and config object // and parse into jsonElements. if (! isFirst) { dyStringAppend(dyConfig, " ] }"); config = jsonParse(dyConfig->string); dyStringFree(&dyConfig); } return config; } static void adjustGpVarOverlapRule(struct annoGrator *gpVarGrator, boolean haveRegulatory) /* If we're able to detect regulatory elements, and want to keep those annotations, loosen up * gpVarGrator's overlap rule from the default (must overlap). */ { if (haveRegulatory && cartUsualBoolean(cart, "hgva_include_regulatory", TRUE)) gpVarGrator->setOverlapRule(gpVarGrator, agoNoConstraint); } static void addTxStatusExtras(struct annoFormatter *vepOut, char *geneTrack, struct annoGrator *gpVarGrator, struct joinerDtf *txStatusExtras) /* Given a list of tables and fields that will be joined with geneTrack to provide transcript * status info, configure vepOut to put them in the EXTRAs column. */ { struct joinerDtf *txStatDtf; for (txStatDtf = txStatusExtras; txStatDtf != NULL; txStatDtf = txStatDtf->next) { char *tag = NULL, *description = NULL; boolean isBoolean = FALSE; // Double-check that we're not joining in some table from a different assembly database: if (differentString(txStatDtf->database, database) && differentString(txStatDtf->database, "hgFixed")) errAbort("addTxStatusExtras: Expected db=%s or hgFixed in txStatDtf but got %s", database, txStatDtf->database); if ((startsWith(GENCODE_PREFIX"Tag", txStatDtf->table) && sameString(txStatDtf->field, "tag")) || (sameString(txStatDtf->table, "knownToTag") && sameString(txStatDtf->field, "value"))) { tag = "GENCODE_TAG"; description = "<A HREF=" GENCODE_TAG_DOC_URL " " "TARGET=_BLANK>GENCODE tags</A> for the transcript"; } else if (sameString(txStatDtf->table, "knownCanonical") && sameString(txStatDtf->field, "transcript")) { tag = "CANONICAL"; description = "If present, the transcript is the 'canonical' transcript of the gene " "(generally the longest isoform of the gene)"; isBoolean = TRUE; } else if (sameString(txStatDtf->table, "refSeqStatus") && sameString(txStatDtf->field, "status")) { tag = "REFSEQ_STATUS"; description = "<A HREF=" REFSEQ_STATUS_DOC_URL " " "TARGET=_BLANK>RefSeq status</A> of the transcript"; } else { errAbort("addTxStatusExtras: Unrecognized {table,field}: {%s,%s}", txStatDtf->table, txStatDtf->field); } char *column = annoStreamDbColumnNameFromDtf(database, geneTrack, txStatDtf); annoFormatVepAddExtraItem(vepOut, (struct annoStreamer *)gpVarGrator, tag, description, column, isBoolean); } } void doQuery() /* Translate simple form inputs into anno* components and execute query. */ { dyInfo = dyStringNew(0); char *chrom = NULL; uint start = 0, end = 0; if (sameString(regionType, hgvaRegionTypeRange)) getCartPosOrDie(&chrom, &start, &end); struct annoAssembly *assembly = hAnnoGetAssembly(database); boolean isCommandLine = (cgiOptionalString("cgiSpoof") != NULL); char *geneTrack = cartString(cart, "hgva_geneTrack"); struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList); if (geneTdb == NULL) { warn("Can't find tdb for gene track %s", geneTrack); if (! isCommandLine) doUi(); return; } int maxVarRows = cartUsualInt(cart, "hgva_variantLimit", 10); struct annoStreamer *primary = NULL; char *primaryLongLabel = NULL; char *variantTrack = cartString(cart, "hgva_variantTrack"); struct slName *commentList = NULL; if (sameString(variantTrack, hgvaSampleVariants)) { primary = makeSampleVariantsStreamer(assembly, geneTdb, maxVarRows); primaryLongLabel = hgvaSampleVariantsLabel; // Sample variants can't always be made within the currently selected position range, // so just for these, force search to be genome-wide. chrom = NULL; start = 0; end = 0; } else if (sameString(variantTrack, hgvaUseVariantIds)) { // Override search position if cart position doesn't include all variants: primary = makeVariantIdStreamer(assembly, maxVarRows, &chrom, &start, &end, &commentList); primaryLongLabel = hgvaVariantIdsLabel; } else if (sameString(variantTrack, hgvaUseHgvs)) { primary = makeHgvsStreamer(assembly, maxVarRows, &chrom, &start, &end, &commentList); primaryLongLabel = hgvaHgvsLabel; } else if (sameString(variantTrack, hgvaUseVariantFileOrUrl)) { char *fileOrUrl = cartString(cart, hgvaVariantFileOrUrl); char *type = cartOptionalString(cart, hgvaVariantFileOrUrlType); primary = hAnnoStreamerFromBigFileUrl(fileOrUrl, NULL, assembly, maxVarRows, type); primaryLongLabel = hgvaVariantFileOrUrlLabel; } else { struct trackDb *varTdb = getVariantTrackDb(variantTrack); if (varTdb == NULL) { if (! isCommandLine) doUi(); return; } primary = hAnnoStreamerFromTrackDb(assembly, varTdb->table, varTdb, chrom, maxVarRows, NULL); primaryLongLabel = varTdb->longLabel; } enum annoGratorOverlap geneOverlapRule = agoMustOverlap; struct joinerDtf *txStatusExtras = getTxStatusExtras(database, geneTrack); struct jsonElement *gpConfig = configForStreamer(database, geneTdb, txStatusExtras); struct annoGrator *gpVarGrator = hAnnoGratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom, ANNO_NO_LIMIT, primary->asObj, geneOverlapRule, gpConfig); setGpVarFuncFilter(gpVarGrator); // Some grators may be used as both filters and output values. To avoid making // multiple grators for the same source, hash them by trackName: struct hash *gratorsByName = hashNew(8); struct annoGrator *snpGrator = NULL; char *snpDesc = NULL; if (cartUsualBoolean(cart, "hgva_rsId", TRUE)) snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc); // Now construct gratorList in the order in which annoFormatVep wants to see them, // i.e. first the gpVar, then the snpNNN, then whatever else: struct annoGrator *gratorList = NULL; slAddHead(&gratorList, gpVarGrator); if (snpGrator != NULL) slAddHead(&gratorList, snpGrator); // Text or HTML output? char *outFormat = cartUsualString(cart, "hgva_outFormat", "vepTab"); boolean doHtml = sameString(outFormat, "vepHtml"); // Initialize VEP formatter: struct annoFormatter *vepOut = annoFormatVepNew("stdout", doHtml, primary, primaryLongLabel, (struct annoStreamer *)gpVarGrator, geneTdb->longLabel, (struct annoStreamer *)snpGrator, snpDesc, assembly); addTxStatusExtras(vepOut, geneTrack, gpVarGrator, txStatusExtras); boolean haveRegulatory = FALSE; addOutputTracks(&gratorList, gratorsByName, vepOut, assembly, chrom, doHtml, &haveRegulatory); adjustGpVarOverlapRule(gpVarGrator, haveRegulatory); setHgvsOutOptions(gpVarGrator, geneTdb->track, vepOut); addFilterTracks(&gratorList, gratorsByName, assembly, chrom); slReverse(&gratorList); if (doHtml) { webStart(cart, database, "Annotated Variants in VEP/HTML format"); } else if (! isCommandLine) { // Undo the htmlPushEarlyHandlers() because after this point they make ugly text: popWarnHandler(); popAbortHandler(); textOpen(); webStartText(); } if (isNotEmpty(dyInfo->string)) puts(dyInfo->string); struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut); struct slName *comment; for (comment = commentList; comment != NULL; comment = comment->next) vepOut->comment(vepOut, comment->name); if (chrom != NULL) annoGratorQuerySetRegion(query, chrom, start, end); annoGratorQueryExecute(query); annoGratorQueryFree(&query); if (doHtml) webEnd(); else if (! isCommandLine) textOutClose(&compressPipeline, NULL); } struct hgPositions *lookupPosition(struct dyString *dyWarn) /* Look up position (aka range) if need be. Return a container of matching tables and positions. * Warnings/errors are appended to dyWarn. */ { char *range = windowsToAscii(cloneString(cartUsualString(cart, hgvaRange, ""))); range = trimSpaces(range); if (isEmpty(range)) range = hDefaultPos(database); struct hgPositions *hgp = hgFindSearch(cart, &range, NULL, NULL, NULL, cgiScriptName(), dyWarn); cartSetString(cart, hgvaRange, range); return hgp; } int main(int argc, char *argv[]) /* Process command line. */ { long enteredMainTime = clock1000(); if (hIsPrivateHost()) pushCarefulMemHandler(LIMIT_2or6GB); cgiSpoof(&argc, argv); boolean isCommandLine = (cgiOptionalString("cgiSpoof") != NULL); if (!isCommandLine) htmlPushEarlyHandlers(); /* Make errors legible during initialization. */ oldVars = hashNew(10); boolean startQuery = (cgiUsualString("hgva_startQuery", NULL) != NULL); if (startQuery) { if (isCommandLine) // No HTTP header for command-line use. cart = cartForSession(hUserCookie(), excludeVars, oldVars); else cart = cartAndCookieNoContent(hUserCookie(), excludeVars, oldVars); } else cart = cartAndCookie(hUserCookie(), excludeVars, oldVars); // Try to deal with virt chrom position used by hgTracks. if (startsWith( MULTI_REGION_CHROM, cartUsualString(cart, "position", "")) || startsWith(OLD_MULTI_REGION_CHROM, cartUsualString(cart, "position", ""))) cartSetString(cart, "position", cartUsualString(cart, "nonVirtPosition", "")); /* Set up global variables. */ getDbAndGenome(cart, &database, &genome, oldVars); chromAliasSetup(database); initGenbankTableNames(database); regionType = cartUsualString(cart, hgvaRegionType, hgvaRegionTypeGenome); if (isEmpty(cartOptionalString(cart, hgvaRange))) cartSetString(cart, hgvaRange, hDefaultPos(database)); int timeout = cartUsualInt(cart, "udcTimeout", 300); if (udcCacheTimeout() < timeout) udcSetCacheTimeout(timeout); knetUdcInstall(); char *range = trimSpaces(windowsToAscii(cartUsualString(cart, hgvaRange, ""))); cartSetLastPosition(cart, range, oldVars); cartTrackDbInit(cart, &fullTrackList, &fullGroupList, TRUE); struct dyString *dyWarn = dyStringNew(0); boolean noShort = (cartOptionalString(cart, "noShort") != NULL); // is this the second page of results struct hgPositions *hgp = lookupPosition(dyWarn); if (hgp->singlePos && isEmpty(dyWarn->string) && !noShort) { if (startQuery) doQuery(); else if (! isCommandLine) doUi(); } else { if (startQuery) // Need Content-type (html) cartWebStartHeader(cart, database, "Variant Annotation Integrator"); else cartWebStart(cart, database, "Variant Annotation Integrator"); if (isNotEmpty(dyWarn->string)) { if (noShort) // we're on the second page of results hgp->posCount = 0; // hgFindSearch gives us a bogus hgp if the warn string is set else warn("%s", dyWarn->string); } if ((hgp->posCount > 1) || noShort) // if we're on the second page we want to put out HTML even if there are no results. { hgPositionsHtml(database, hgp, hgVaiName(), cart); cartSetString(cart, hgvaRange, range); // we need to reset the position because lookupPosition above trashes it } else doMainPage(); cartWebEnd(); } cartCheckout(&cart); if (! isCommandLine) cgiExitTime("hgVai", enteredMainTime); return 0; }