e46073f856770bdfef4f7637eea8f9f9297aa139 chmalee Tue Nov 19 15:57:08 2019 -0800 Initial commit of new track type vcfPhased trio. A line with ticks, one per haplotype per sample in the VCF, as specified by trackDb variables. diff --git src/hg/lib/vcfUi.c src/hg/lib/vcfUi.c index 045f92c..4847cfa 100644 --- src/hg/lib/vcfUi.c +++ src/hg/lib/vcfUi.c @@ -1,425 +1,536 @@ /* vcfUi - Variant Call Format user interface controls that are shared * between more than one CGI. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "cheapcgi.h" #include "errCatch.h" #include "hCommon.h" #include "hui.h" #include "jsHelper.h" #include "vcf.h" #include "vcfUi.h" #include "knetUdc.h" #include "udc.h" INLINE char *nameOrDefault(char *thisName, char *defaultVal) /* If thisName is not a placeholder value, return it; otherwise return default. */ { if (isNotEmpty(thisName) && !sameString(thisName, ".")) return thisName; return defaultVal; } #define VCF_HAPLOSORT_DEFAULT_DESC "middle variant in viewing window" static void vcfCfgHaplotypeCenterHiddens(char *track, char *ctrName, char *ctrChrom, int ctrPos) /* Make hidden form inputs and button for setting the center variant for haplotype * clustering/sorting in hgTracks. */ { char cartVar[1024]; safef(cartVar, sizeof(cartVar), "%s.centerVariantChrom", track); cgiMakeHiddenVar(cartVar, ctrChrom); safef(cartVar, sizeof(cartVar), "%s.centerVariantPos", track); char ctrPosStr[16]; safef(ctrPosStr, sizeof(ctrPosStr), "%d", ctrPos); cgiMakeHiddenVar(cartVar, ctrPosStr); safef(cartVar, sizeof(cartVar), "%s.centerVariantName", track); cgiMakeHiddenVar(cartVar, ctrName); } char *vcfHaplotypeOrSample(struct cart *cart) /* Return "Sample" if the current organism is uniploid (like SARS-CoV-2), "Haplotype" otherwise. */ { // We should make a better way of determining whether the organism is diploid, // but for now this will prevent David from being bothered by diploid terminology // when viewing SARS-CoV-2 variants: return sameOk(cartOptionalString(cart, "db"), "wuhCor1") ? "Sample" : "Haplotype"; } void vcfCfgHaplotypeCenter(struct cart *cart, struct trackDb *tdb, char *track, boolean parentLevel, struct vcfFile *vcff, char *thisName, char *thisChrom, int thisPos, char *formName) /* If vcff has genotype data, show status and controls for choosing the center variant * for haplotype clustering/sorting in hgTracks. */ { if (vcff != NULL && vcff->genotypeCount > 1) { printf("using "); char *centerChrom = cartOptionalStringClosestToHome(cart, tdb, parentLevel, "centerVariantChrom"); if (isEmpty(centerChrom)) { // Unspecified in cart -- describe the default action printf(VCF_HAPLOSORT_DEFAULT_DESC " as anchor.\n"); if (isNotEmpty(thisChrom)) { // but we do have a candidate, so offer to make it the center: puts(""); vcfCfgHaplotypeCenterHiddens(track, thisName, thisChrom, thisPos); char label[256]; safef(label, sizeof(label), "Use %s", nameOrDefault(thisName, "this variant")); cgiMakeButton("setCenterSubmit", label); printf(" as anchor\n"); } else { printf(""); char *hapOrSample = vcfHaplotypeOrSample(cart); if (sameString(hapOrSample, "Sample")) { puts("Samples are clustered by similarity around a central variant. " "Samples are reordered for display using the clustering tree, which is " "drawn in the left label area."); } else { puts("If this mode is selected and genotypes are phased or homozygous, " "then each genotype is split into two independent haplotypes. " "These local haplotypes are clustered by similarity around a central variant. " "Haplotypes are reordered for display using the clustering tree, which is " "drawn in the left label area. " "Local haplotype blocks can often be identified using this display."); } printf("
To anchor the sorting to a particular variant, " "click on the variant in the genome browser, " "and then click on the 'Use this variant' button on the next page." "\n"); } } else { // Describe the one specified in cart. int centerPos = cartUsualIntClosestToHome(cart, tdb, parentLevel, "centerVariantPos", -1); char *centerName = cartStringClosestToHome(cart, tdb, parentLevel, "centerVariantName"); if (isNotEmpty(thisChrom)) { // These form inputs are for either "use me" or clear: vcfCfgHaplotypeCenterHiddens(track, thisName, thisChrom, thisPos); // Is this variant the same as the center variant specified in cart? if (sameString(thisChrom, centerChrom) && sameString(thisName, centerName) && thisPos == centerPos) printf("this variant as anchor.\n"); else { // make a "use me" button printf("%s at %s:%d as anchor.\n\n", nameOrDefault(centerName, "variant"), centerChrom, centerPos+1); char label[256]; safef(label, sizeof(label), "Use %s", nameOrDefault(thisName, "this variant")); cgiMakeButton("replaceCenterSubmit", label); printf(" as anchor\n"); } } else { // Form inputs (in case the clear button is clicked) vcfCfgHaplotypeCenterHiddens(track, centerName, centerChrom, centerPos); printf("%s at %s:%d as anchor.\n", nameOrDefault(centerName, "variant"), centerChrom, centerPos+1); } // Make a clear button that modifies the hiddens using onClick puts(""); struct dyString *onClick = dyStringNew(0); dyStringPrintf(onClick, "updateOrMakeNamedVariable(%s, '%s.centerVariantChrom', ''); ", formName, track); dyStringPrintf(onClick, "updateOrMakeNamedVariable(%s, '%s.centerVariantName', ''); ", formName, track); dyStringPrintf(onClick, "updateOrMakeNamedVariable(%s, '%s.centerVariantPos', 0);", formName, track); dyStringPrintf(onClick, "document.%s.submit(); return false;", formName); cgiMakeButtonWithOnClick("clearCenterSubmit", "Clear selection", NULL, onClick->string); printf(" (use " VCF_HAPLOSORT_DEFAULT_DESC ")\n"); } } } static void vcfCfgHaplotypeMethod(struct cart *cart, struct trackDb *tdb, char *track, boolean parentLevel, struct vcfFile *vcff) /* If vcff has genotype data, offer the option of whether to cluster or just use the order * of genotypes in the VCF file. For clustering, show status and controls for choosing the * center variant for haplotype clustering/sorting in hgTracks. */ { if (vcff != NULL && vcff->genotypeCount > 1) { printf("\n", vcfHaplotypeOrSample(cart)); // If trackDb specifies a treeFile, offer that as an option char *hapMethod = cartOrTdbString(cart, tdb, VCF_HAP_METHOD_VAR, VCF_DEFAULT_HAP_METHOD); char *hapMethodTdb = trackDbSetting(tdb, VCF_HAP_METHOD_VAR); char varName[1024]; safef(varName, sizeof(varName), "%s." VCF_HAP_METHOD_VAR, track); if (hapMethodTdb && startsWithWord("treeFile", hapMethodTdb)) { puts(""); } printf(""); puts("
" "%s sorting order:
"); cgiMakeRadioButton(varName, VCF_HAP_METHOD_TREE_FILE, startsWithWord(VCF_HAP_METHOD_TREE_FILE, hapMethod)); printf("using the tree specified in file associated with track
"); cgiMakeRadioButton(varName, VCF_HAP_METHOD_CENTER_WEIGHTED, sameString(hapMethod, VCF_HAP_METHOD_CENTER_WEIGHTED)); printf(""); vcfCfgHaplotypeCenter(cart, tdb, track, parentLevel, vcff, NULL, NULL, 0, "mainForm"); puts("
"); cgiMakeRadioButton(varName, VCF_HAP_METHOD_FILE_ORDER, sameString(hapMethod, VCF_HAP_METHOD_FILE_ORDER)); puts("using the order in which samples appear in the underlying VCF file
"); jsInlineF("$('input[type=radio][name=\"%s\"]').change(function() { " "if (this.value == '"VCF_HAP_METHOD_CENTER_WEIGHTED"') {" " $('#leafShapeContainer').show();" "} else {" " $('#leafShapeContainer').hide();" "}});\n", varName); } } //TODO: share this code w/hgTracks, hgc in hg/lib/vcfFile.c static struct vcfFile *vcfHopefullyOpenHeader(struct cart *cart, struct trackDb *tdb) /* Defend against network errors and return the vcfFile object with header data, or NULL. */ { knetUdcInstall(); if (udcCacheTimeout() < 300) udcSetCacheTimeout(300); char *fileOrUrl = trackDbSetting(tdb, "bigDataUrl"); if (isEmpty(fileOrUrl)) { char *db = cartString(cart, "db"); char *table = tdb->table; char *dbTableName = trackDbSetting(tdb, "dbTableName"); struct sqlConnection *conn; if (isCustomTrack(tdb->track) && isNotEmpty(dbTableName)) { conn = hAllocConn(CUSTOM_TRASH); table = dbTableName; } else conn = hAllocConnTrack(db, tdb); char *chrom = cartOptionalString(cart, "c"); if (chrom != NULL) fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom); if (fileOrUrl == NULL) fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, table, hDefaultChrom(db)); hFreeConn(&conn); } if (fileOrUrl == NULL) return NULL; int vcfMaxErr = 100; struct vcfFile *vcff = NULL; /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { if (startsWithWord("vcfTabix", tdb->type)) vcff = vcfTabixFileMayOpen(fileOrUrl, NULL, 0, 0, vcfMaxErr, -1); else vcff = vcfFileMayOpen(fileOrUrl, NULL, 0, 0, vcfMaxErr, -1, FALSE); } errCatchEnd(errCatch); if (errCatch->gotError) { if (isNotEmpty(errCatch->message->string)) warn("unable to open %s: %s", fileOrUrl, errCatch->message->string); } errCatchFree(&errCatch); return vcff; } static void vcfCfgHapClusterEnable(struct cart *cart, struct trackDb *tdb, char *name, boolean parentLevel) /* Let the user enable/disable haplotype sorting display. */ { boolean hapClustEnabled = cartOrTdbBoolean(cart, tdb, VCF_HAP_ENABLED_VAR, TRUE); char cartVar[1024]; safef(cartVar, sizeof(cartVar), "%s." VCF_HAP_ENABLED_VAR, name); cgiMakeCheckBox(cartVar, hapClustEnabled); printf("Enable %s sorting display
\n", vcfHaplotypeOrSample(cart)); } static void vcfCfgHapClusterColor(struct cart *cart, struct trackDb *tdb, char *name, boolean parentLevel) /* Let the user choose how to color the sorted haplotypes. */ { printf("Allele coloring scheme:
\n"); char *colorBy = cartOrTdbString(cart, tdb, VCF_HAP_COLORBY_VAR, VCF_DEFAULT_HAP_COLORBY); char varName[1024]; safef(varName, sizeof(varName), "%s." VCF_HAP_COLORBY_VAR, name); cgiMakeRadioButton(varName, VCF_HAP_COLORBY_ALTONLY, sameString(colorBy, VCF_HAP_COLORBY_ALTONLY)); printf("reference alleles invisible, alternate alleles in black
\n"); cgiMakeRadioButton(varName, VCF_HAP_COLORBY_REFALT, sameString(colorBy, VCF_HAP_COLORBY_REFALT)); printf("reference alleles in blue, alternate alleles in red
\n"); cgiMakeRadioButton(varName, VCF_HAP_COLORBY_BASE, sameString(colorBy, VCF_HAP_COLORBY_BASE)); printf("first base of allele (A = red, C = blue, G = green, T = magenta)
\n"); } static void vcfCfgHapClusterTreeAngle(struct cart *cart, struct trackDb *tdb, char *name, boolean parentLevel) /* Let the user choose branch shape. */ { // This option applies only to center-weighted clustering; don't show option when some other // method is selected. char *hapMethod = cartOrTdbString(cart, tdb, VCF_HAP_METHOD_VAR, VCF_DEFAULT_HAP_METHOD); printf("
\n", differentString(hapMethod, VCF_HAP_METHOD_CENTER_WEIGHTED) ? " style='display: none;'" : ""); printf("%s clustering tree leaf shape:
\n", vcfHaplotypeOrSample(cart)); char *treeAngle = cartOrTdbString(cart, tdb, VCF_HAP_TREEANGLE_VAR, VCF_DEFAULT_HAP_TREEANGLE); char varName[1024]; safef(varName, sizeof(varName), "%s." VCF_HAP_TREEANGLE_VAR, name); cgiMakeRadioButton(varName, VCF_HAP_TREEANGLE_TRIANGLE, sameString(treeAngle, VCF_HAP_TREEANGLE_TRIANGLE)); printf("draw branches whose samples are all identical as <
\n"); cgiMakeRadioButton(varName, VCF_HAP_TREEANGLE_RECTANGLE, sameString(treeAngle, VCF_HAP_TREEANGLE_RECTANGLE)); printf("draw branches whose samples are all identical as [
\n"); puts("
"); } static void vcfCfgHapClusterHeight(struct cart *cart, struct trackDb *tdb, struct vcfFile *vcff, char *name, boolean parentLevel) /* Let the user specify a height for the track. */ { if (vcff != NULL && vcff->genotypeCount > 1) { printf("%s sorting display height: \n", vcfHaplotypeOrSample(cart)); int cartHeight = cartOrTdbInt(cart, tdb, VCF_HAP_HEIGHT_VAR, VCF_DEFAULT_HAP_HEIGHT); char varName[1024]; safef(varName, sizeof(varName), "%s." VCF_HAP_HEIGHT_VAR, name); cgiMakeIntVarInRange(varName, cartHeight, "Height (in pixels) of track", 5, "4", "2500"); puts("
"); } } static void vcfCfgHapCluster(struct cart *cart, struct trackDb *tdb, struct vcfFile *vcff, char *name, boolean parentLevel) /* Show controls for haplotype-sorting display, which only makes sense to do when * the VCF file describes multiple genotypes. */ { char *hapOrSample = vcfHaplotypeOrSample(cart); printf("

%s sorting display

\n", hapOrSample); vcfCfgHapClusterEnable(cart, tdb, name, parentLevel); vcfCfgHaplotypeMethod(cart, tdb, name, parentLevel, vcff); vcfCfgHapClusterTreeAngle(cart, tdb, name, parentLevel); vcfCfgHapClusterColor(cart, tdb, name, parentLevel); vcfCfgHapClusterHeight(cart, tdb, vcff, name, parentLevel); } static void vcfCfgMinQual(struct cart *cart, struct trackDb *tdb, struct vcfFile *vcff, char *name, boolean parentLevel) /* If checkbox is checked, apply minimum value filter to QUAL column. */ { char cartVar[1024]; safef(cartVar, sizeof(cartVar), "%s." VCF_APPLY_MIN_QUAL_VAR, name); boolean applyFilter = cartOrTdbBoolean(cart, tdb, VCF_APPLY_MIN_QUAL_VAR, VCF_DEFAULT_APPLY_MIN_QUAL); cgiMakeCheckBox(cartVar, applyFilter); printf("Exclude variants with Quality/confidence score (QUAL) score less than\n"); double minQual = cartOrTdbDouble(cart, tdb, VCF_MIN_QUAL_VAR, VCF_DEFAULT_MIN_QUAL); safef(cartVar, sizeof(cartVar), "%s." VCF_MIN_QUAL_VAR, name); cgiMakeDoubleVar(cartVar, minQual, 10); printf("
\n"); } static void vcfCfgFilterColumn(struct cart *cart, struct trackDb *tdb, struct vcfFile *vcff, char *name, boolean parentLevel) /* Show controls for filtering by value of VCF's FILTER column, which uses values defined * in the header. */ { int filterCount = slCount(vcff->filterDefs); if (filterCount < 1) return; printf("Exclude variants with these FILTER values:
\n"); char cartVar[1024]; safef(cartVar, sizeof(cartVar), "%s."VCF_EXCLUDE_FILTER_VAR, name); if (slCount(vcff->filterDefs) > 1) { jsMakeCheckboxGroupSetClearButton(cartVar, TRUE); puts(" "); jsMakeCheckboxGroupSetClearButton(cartVar, FALSE); } char *values[filterCount]; char *labels[filterCount]; int i; struct vcfInfoDef *filt; for (i=0, filt=vcff->filterDefs; filt != NULL; i++, filt = filt->next) { values[i] = filt->key; struct dyString *dy = dyStringNew(0); dyStringAppend(dy, filt->key); if (isNotEmpty(filt->description)) dyStringPrintf(dy, " (%s)", filt->description); labels[i] = dyStringCannibalize(&dy); } struct slName *selectedValues = NULL; if (cartListVarExistsAnyLevel(cart, tdb, FALSE, VCF_EXCLUDE_FILTER_VAR)) selectedValues = cartOptionalSlNameListClosestToHome(cart, tdb, FALSE, VCF_EXCLUDE_FILTER_VAR); cgiMakeCheckboxGroupWithVals(cartVar, labels, values, filterCount, selectedValues, 1); } static void vcfCfgMinAlleleFreq(struct cart *cart, struct trackDb *tdb, struct vcfFile *vcff, char *name, boolean parentLevel) /* Show input for minimum allele frequency, if we can extract it from the VCF INFO column. */ { printf("Minimum minor allele frequency (if INFO column includes AF or AC+AN):\n"); double cartMinFreq = cartOrTdbDouble(cart, tdb, VCF_MIN_ALLELE_FREQ_VAR, VCF_DEFAULT_MIN_ALLELE_FREQ); char varName[1024]; safef(varName, sizeof(varName), "%s." VCF_MIN_ALLELE_FREQ_VAR, name); cgiMakeDoubleVarInRange(varName, cartMinFreq, "minor allele frequency between 0.0 and 0.5", 5, "0.0", "0.5"); puts("
"); } +static struct slPair *vcfPhasedGetSamplesFromTdb(struct trackDb *tdb, boolean hideOtherSamples) +/* Get the different VCF Phased Trio setings out of trackDb onto a list */ +{ +// cloneString here because we will be munging the result if there are alternate labels +char *childSampleMaybeAlias = cloneString(trackDbLocalSetting(tdb, VCF_PHASED_CHILD_SAMPLE_SETTING)); +char *parentSamplesMaybeAlias = cloneString(trackDbLocalSetting(tdb, VCF_PHASED_PARENTS_SAMPLE_SETTING)); +char *samples[VCF_PHASED_MAX_OTHER_SAMPLES+1]; // for now only allow at most two parents +int numOthers = 0; +if (parentSamplesMaybeAlias && !hideOtherSamples) + { + numOthers = chopCommas(cloneString(parentSamplesMaybeAlias), samples); + if (numOthers > VCF_PHASED_MAX_OTHER_SAMPLES) + { + warn("More than %d other samples specified for phased trio", VCF_PHASED_MAX_OTHER_SAMPLES); + numOthers = VCF_PHASED_MAX_OTHER_SAMPLES; + } + // shove child into middle of array, and if there are two parents, scoot the second one to the end + int lastParentIx = VCF_PHASED_MAX_OTHER_SAMPLES - 1; + if (samples[lastParentIx] != NULL) + samples[VCF_PHASED_MAX_OTHER_SAMPLES] = cloneString(samples[lastParentIx]); + samples[lastParentIx] = cloneString(childSampleMaybeAlias); + } +else + samples[0] = cloneString(childSampleMaybeAlias); + +boolean gotAlias = strchr(samples[0], '|') != NULL; // default to whatever is first +struct slPair *ret = NULL; +int i; +for (i = 0; i < numOthers+1; i++) + { + char *val = strchr(samples[i], '|'); + boolean foundAlias = val != NULL; + if (val != NULL) + { + if (foundAlias != gotAlias) + errAbort("Either all samples have aliases or none."); + else + *val++ = 0; + } + char *name = samples[i]; + struct slPair *temp = slPairNew(cloneString(name), cloneString(val)); + slAddHead(&ret, temp); + } +slReverse(&ret); +return ret; +} + +struct slPair *vcfPhasedGetSampleOrder(struct cart *cart, struct trackDb *tdb) +/* Parse out a trio sample order from either trackDb or the cart */ +{ +char sampleOrderVar[1028],hideParentsVar[1028]; +safef(sampleOrderVar, sizeof(sampleOrderVar), "%s.%s", tdb->track, VCF_PHASED_SAMPLE_ORDER_VAR); +safef(hideParentsVar, sizeof(hideParentsVar), "%s.%s", tdb->track, VCF_PHASED_HIDE_OTHER_VAR); +boolean hideOtherSamples = cartUsualBoolean(cart, hideParentsVar, FALSE); +char *cartOrder = cartOptionalString(cart, sampleOrderVar); +struct slPair *tdbOrder = vcfPhasedGetSamplesFromTdb(tdb,hideOtherSamples); +if (cartOrder != NULL && !hideOtherSamples) + { + struct slName *name; + struct slName *fromCart = slNameListFromComma(cartOrder); + struct slPair *ret = NULL; + for (name = fromCart; name != NULL; name = name->next) + { + struct slPair *temp = slPairFind(tdbOrder, name->name); + slAddHead(&ret, temp); + } + slReverse(ret); + return ret; + } +else + return tdbOrder; +} + +static boolean hasSampleAliases(struct trackDb *tdb) +/* Check whether trackDb has aliases for the sample names */ +{ +struct slPair *nameVals = vcfPhasedGetSamplesFromTdb(tdb,FALSE); +return nameVals->val != NULL; +} + +static void vcfCfgPhasedTrioUi(struct cart *cart, struct trackDb *tdb, struct vcfFile *vcff, char *name) +/* Put up the phased trio specific config settings */ +{ +if (hasSampleAliases(tdb)) + { + printf("Label samples by:"); + char defaultLabel[1024], aliasLabel[1024]; + safef(defaultLabel, sizeof(defaultLabel), "%s.%s", tdb->track, VCF_PHASED_DEFAULT_LABEL_VAR); + safef(aliasLabel, sizeof(aliasLabel), "%s.%s", tdb->track, VCF_PHASED_ALIAS_LABEL_VAR); + boolean isDefaultChecked = cartUsualBoolean(cart, defaultLabel, TRUE); + boolean isAliasChecked = cartUsualBoolean(cart, aliasLabel, FALSE); + cgiMakeCheckBox(defaultLabel, isDefaultChecked); + printf("VCF file sample names  "); + cgiMakeCheckBox(aliasLabel, isAliasChecked); + printf("aliases"); + printf("
"); + } +if (trackDbSetting(tdb,VCF_PHASED_PARENTS_SAMPLE_SETTING)) + { + printf("Hide parent/other sample(s)"); + char hideVarName[1024]; + safef(hideVarName, sizeof(hideVarName), "%s.%s", tdb->track, VCF_PHASED_HIDE_OTHER_VAR); + boolean hidingOtherSamples = cartUsualBoolean(cart, hideVarName, FALSE); + cgiMakeCheckBox(hideVarName, hidingOtherSamples); + } +} + void vcfCfgUi(struct cart *cart, struct trackDb *tdb, char *name, char *title, boolean boxed) /* VCF: Variant Call Format. redmine #3710 */ { boxed = cfgBeginBoxAndTitle(tdb, boxed, title); printf("", boxed ? " width='100%'" : ""); struct vcfFile *vcff = vcfHopefullyOpenHeader(cart, tdb); if (vcff != NULL) { boolean parentLevel = isNameAtParentLevel(tdb, name); - if (vcff->genotypeCount > 1) + if (vcff->genotypeCount > 1 && !sameString(tdb->type, "vcfPhasedTrio")) { vcfCfgHapCluster(cart, tdb, vcff, name, parentLevel); } + if (sameString(tdb->type, "vcfPhasedTrio")) + { + vcfCfgPhasedTrioUi(cart, tdb, vcff, name); + } if (differentString(tdb->track,"evsEsp6500")) { puts("

Filters

"); vcfCfgMinQual(cart, tdb, vcff, name, parentLevel); vcfCfgFilterColumn(cart, tdb, vcff, name, parentLevel); } vcfCfgMinAlleleFreq(cart, tdb, vcff, name, parentLevel); } else { printf("Sorry, couldn't access VCF file.
\n"); } puts(""); if (boxed && fileExists(hHelpFile("hgVcfTrackHelp"))) printf("VCF configuration help"); printf(""); if (!boxed && fileExists(hHelpFile("hgVcfTrackHelp"))) printf("

VCF " "configuration help

"); cfgEndBox(boxed); }