be693595e590bce1d7d9b5e0d34451047c3170f2 chmalee Thu Jul 30 15:52:52 2020 -0700 Add 2 new color options for trios, color by predicted functional affect and de novo child variants, refs #25983 diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 425adcf..836efc5 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -2469,43 +2469,107 @@ c->next = c->next->next; else c = c->next; } } } // now fill out a distance matrix based on the cwaDistance between all the pairs in hapArray struct hapDistanceMatrix *hapDistMatrix = fillOutDistanceMatrix(hapArray, vcff, sample, &helper, gtCount); unsigned int hapCount = 2 * gtCount; unsigned int *hapOrder = needMem(sizeof(unsigned int) * hapCount); setHapOrderFromMatrix(hapOrder, hapCount, hapDistMatrix, hapArray, vcff, sample, sampleDrawOrder); return hapOrder; } -#define VCFPHASED_UNPHASED_COLOR MG_RED +static void setTransmitOrder(unsigned int *gtHapOrder, char **transmitOrder, int gtHapCount, struct vcfRecord *rec, char **sampleOrder, char *childSample) +/* Fill out an array of "transmitted", "untransmitted" strings for mouseover and color lookup */ +{ +int i; +int childSampleIx = stringArrayIx(childSample, sampleOrder, gtHapCount / 2); +for (i = 0; i < gtHapCount; i++) + { + int gtIx = gtHapOrder[i]; + struct vcfGenotype *gt = &(rec->genotypes[gtIx >> 1]); + int alleleSampleIx = stringArrayIx(gt->id, sampleOrder, gtHapCount / 2); + if (alleleSampleIx != childSampleIx) + if (alleleSampleIx > childSampleIx) + { + transmitOrder[i] = i % 2 == 0 ? "transmitted" : "untransmitted"; + } + else + { + transmitOrder[i] = i % 2 == 0 ? "untransmitted" : "transmitted"; + } + else + transmitOrder[i] = "child"; + } +for (i = 0; i < gtHapCount; i++) + fprintf(stderr, "%s, ", transmitOrder[i]); +fprintf(stderr, "\n"); +} + +enum phasedColorMode + { + noColorMode, + mendelDiffMode, + deNovoOnlyMode, + phasedFunctionMode + }; +static enum phasedColorMode getPhasedColorMode(struct trackDb *tdb) +/* Get the coloring mode for phased trio tracks */ +{ +enum phasedColorMode colorMode = noColorMode; +char *colorBy = cartOrTdbString(cart, tdb, VCF_PHASED_COLORBY_VAR, VCF_PHASED_COLORBY_VAR); +if (sameString(colorBy, VCF_PHASED_COLORBY_MENDEL_DIFF)) + colorMode = mendelDiffMode; +else if (sameString(colorBy, VCF_PHASED_COLORBY_DE_NOVO)) + colorMode = deNovoOnlyMode; +else + { + char *geneTrack = cartOrTdbString(cart, tdb, "geneTrack", NULL); + if (isNotEmpty(geneTrack) && sameString(colorBy, VCF_PHASED_COLORBY_FUNCTION)) + colorMode = phasedFunctionMode; + } +return colorMode; +} -static int getChildAlleleColor(struct track *track, struct vcfRecord *rec, int childHapIx, - int alleleIx, int nameIx, int gtHapCount, unsigned int *gtHapOrder, - char *sampleOrder[]) -/* Return the color we should use for this variant, depending on whether the allele - * of the child matches what the parent transmitted or if there was some kind of - * 'mendelian inconsistency' */ +static int getTickColor(struct track *track, struct vcfRecord *rec, int alleleIx, int nameIx, int gtHapCount, unsigned int *gtHapOrder, char *childSampleName, char *sampleOrder[], enum phasedColorMode colorMode, enum soTerm funcTerm) +/* Return the color we should use for this variant */ { // if there are no parents to draw then no concept of transmitted vs unstransmitted +Color col = MG_BLACK; +if (colorMode == noColorMode) + col = MG_BLACK; +else if (colorMode == phasedFunctionMode) + { + col = colorFromSoTerm(funcTerm); + } +else + { if (gtHapCount <= 2) - return MG_BLACK; + col = MG_BLACK; + else + { + char *sampleName = sampleOrder[nameIx]; + if (!sameString(sampleName, childSampleName)) // parents only have shading in funcMode + col = MG_BLACK; + else // maybe draw child ticks red + { + struct vcfGenotype *childGt = &(rec->genotypes[gtHapOrder[alleleIx] >> 1]); + int childHapIx = gtHapOrder[alleleIx] & 1 ? childGt->hapIxB : childGt->hapIxA; int parIx = alleleIx; int numSamples = gtHapCount / 2; boolean isEven = (alleleIx % 2) == 0; const int adjacentLineSet = 1; // how many haplotype "lines" away is the second parent if we are drawing in either // parent1,parent2,child or child,parent1,parent2 order: const int otherLineSet = 4; if (nameIx == 0) { if (!isEven) parIx = alleleIx + adjacentLineSet; else if (numSamples > 2) parIx = alleleIx + otherLineSet; } else if (nameIx == 1) @@ -2513,85 +2577,103 @@ if (isEven) parIx = alleleIx - adjacentLineSet; else if (numSamples > 2) // don't compare the allele the child to a missing parent parIx = alleleIx + adjacentLineSet; } else { if (isEven) parIx = alleleIx - adjacentLineSet; else parIx = alleleIx - otherLineSet; } struct vcfGenotype *parentGt = &(rec->genotypes[gtHapOrder[parIx] >> 1]); if (parentGt->isPhased || (parentGt->hapIxA == 1 && parentGt->hapIxB == 1)) { - int parentAlleleIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxB : parentGt->hapIxA; - if (rec->alleles[childHapIx] != rec->alleles[parentAlleleIx]) // if they disagree mark as red - return MG_RED; + int transmittedIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxB : parentGt->hapIxA; + int untransmittedIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxA : parentGt->hapIxB; + col = MG_BLACK; + if (colorMode == mendelDiffMode) + { + if (rec->alleles[childHapIx] != rec->alleles[transmittedIx] && + rec->alleles[childHapIx] == rec->alleles[untransmittedIx]) + col = MG_RED; + } else - return MG_BLACK; + { + if (rec->alleles[childHapIx] != rec->alleles[transmittedIx] && + rec->alleles[childHapIx] != rec->alleles[untransmittedIx]) + col = MG_RED; + } + } } -return MG_BLACK; + } + } +return col; } -void vcfPhasedDrawOneRecord(struct track *track, struct hvGfx *hvg, struct vcfRecord *rec, void *item, - unsigned int *gtHapOrder, int gtHapCount, int xOff, int yOffsets[], - char *sampleOrder[], char *childSample, boolean highlightChildDiffs, - double scale) +void vcfPhasedDrawOneRecord(struct track *track, struct hvGfx *hvg, struct vcfRecord *rec, + void *item, unsigned int *gtHapOrder, int gtHapCount, int xOff, + int yOffsets[], char *sampleOrder[], char *childSample, double scale, + char *transmitOrder[], enum phasedColorMode colorMode, + enum soTerm funcTerm) // Draw a record's haplotypes on the appropriate lines { int i; int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; struct pgSnpVcfStartEnd *psvs = (struct pgSnpVcfStartEnd *)item; int w = x2-x1; if (w < 1) w = 1; int tickHeight = track->itemHeight(track, track->items); for (i = 0; i < gtHapCount ; i++) { struct vcfGenotype *gt = &(rec->genotypes[gtHapOrder[i] >> 1]); int nameIx = stringArrayIx(gt->id, sampleOrder, track->customInt); struct dyString *mouseover = dyStringNew(0); + int tickColor = getTickColor(track, rec, i, nameIx, gtHapCount, gtHapOrder, childSample, sampleOrder, colorMode, funcTerm); if (gt->isPhased || (gt->hapIxA == 1 && gt->hapIxB == 1)) // if phased or homozygous alt { int alIx = gtHapOrder[i] & 1 ? gt->hapIxB : gt->hapIxA; - int tickColor = MG_BLACK; - if (sameString(childSample, gt->id) && highlightChildDiffs) - tickColor = getChildAlleleColor(track, rec, alIx, i, nameIx, gtHapCount, gtHapOrder, sampleOrder); dyStringPrintf(mouseover, "%s ", gt->id); if (alIx != 0) // non-reference allele { if (sameString(childSample, gt->id)) // we're drawing child ticks { dyStringPrintf(mouseover, "allele: %s", rec->alleles[alIx]); } else { - dyStringPrintf(mouseover, "%s allele: %s", i == 0 || i == 5 ? "untransmitted" : "transmitted", rec->alleles[alIx]); + if (gt->isPhased) + dyStringPrintf(mouseover, "likely %s allele: %s", transmitOrder[i], rec->alleles[alIx]); + else + { + int otherIx = (alIx == gt->hapIxB ? gt->hapIxA : gt->hapIxB); + dyStringPrintf(mouseover, "unphased alleles: %s/%s", rec->alleles[alIx], rec->alleles[otherIx]); + } } int y = yOffsets[i] - (tickHeight/2); hvGfxBox(hvg, x1, y, w, tickHeight, tickColor); vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, y, w, tickHeight, track); } } else if (gt->hapIxA != 0 || gt->hapIxB != 0)// draw the tick between the two haplotype lines { int yMid = ((yOffsets[2*nameIx] + yOffsets[(2*nameIx)+1]) / 2); // midpoint of two haplotype lines - hvGfxBox(hvg, x1, yMid - (tickHeight / 2), w, tickHeight, VCFPHASED_UNPHASED_COLOR); - dyStringPrintf(mouseover, "%s Unphased genotype: %s/%s", gt->id, rec->alleles[0], rec->alleles[1]); + hvGfxBox(hvg, x1, yMid - (tickHeight / 2), w, tickHeight, tickColor); + dyStringPrintf(mouseover, "%s unphased genotype: %s/%s", gt->id, rec->alleles[0], rec->alleles[1]); vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, yMid, w, tickHeight, track); } } } static void vcfPhasedAddLabel(struct track *track, struct hvGfx *hvg, char *label, int trackY, int labelY, MgFont *font, Color color) // Add a VCF Sample name to the left label of the haplotype representation { int clipXBak, clipYBak, clipWidthBak, clipHeightBak; struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, leftLabelX, trackY, leftLabelWidth, track->height); hvGfxTextRight(hvgLL, leftLabelX, labelY, leftLabelWidth, track->lineHeight, color, @@ -2645,61 +2727,74 @@ useAliasLabel && hasAlias ? (char *)name->val : ""); vcfPhasedAddLabel(track, hvg, label->string, yOff, round(((y1 + y2) / 2) - (track->lineHeight / 2)), font, MG_BLACK); } } static void vcfPhasedDrawItems(struct track *track, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) /* Split samples' chromosomes (haplotypes), cluster them by parents, and * draw them all along a line representing each chromosome*/ { struct vcfFile *vcff = track->extraUiData; if (vcff->records == NULL) return; +struct txInfo *txiList = NULL; +struct seqWindow *gSeqWin = chromSeqWindowNew(database, chromName, seqStart, seqEnd); +enum phasedColorMode colorMode = getPhasedColorMode(track->tdb); +if (colorMode == phasedFunctionMode) + txiList = txInfoLoad(gSeqWin, track->tdb); const double scale = scaleForPixels(width); boolean hideOtherSamples = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_HIDE_OTHER_VAR, FALSE); struct slPair *pair, *sampleNames = vcfPhasedGetSampleOrder(cart, track->tdb, FALSE, hideOtherSamples); int gtCount = slCount(sampleNames); -int yOffsets[gtCount * 2]; // y offsets of each haplotype line +int gtHapCount = gtCount * 2; +int yOffsets[gtHapCount]; // y offsets of each haplotype line char *sampleOrder[gtCount]; // order of sampleName lines int i; for (pair = sampleNames, i = 0; pair != NULL && i < gtCount; pair = pair->next, i++) sampleOrder[i] = pair->name; char *childSample = cloneString(trackDbSetting(track->tdb, VCF_PHASED_CHILD_SAMPLE_SETTING)); char *pt = strchr(childSample, '|'); if (pt != NULL) *pt = '\0'; // set up the "haplotype" lines and the transparent yellow box to delineate samples vcfPhasedSetupHaplotypesLines(track, hvg, xOff, yOff, width, yOffsets, sampleNames, childSample, font); // maybe sort the variants by haplotype then draw ticks -unsigned int *hapOrder = needMem(sizeof(short) * gtCount * 2); +unsigned int *hapOrder = needMem(sizeof(short) * gtHapCount); int nRecords = slCount(vcff->records); int centerIx = getCenterVariantIx(track, seqStart, seqEnd, vcff->records); int startIx = 0; int endIx = nRecords; hapOrder = computeHapDist(vcff, centerIx, startIx, endIx, childSample, gtCount, sampleOrder); -boolean highlightChildDiffs = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_HIGHLIGHT_INCONSISTENT, FALSE); struct vcfRecord *rec = NULL; struct slList *item = NULL; + +// array of "trans", "untrans", etc for mouseovers +char *transmitOrder[gtHapCount]; +setTransmitOrder(hapOrder, transmitOrder, gtHapCount, vcff->records, sampleOrder, childSample); + for (rec = vcff->records, item = track->items; rec != NULL && item != NULL; rec = rec->next, item = item->next) { - vcfPhasedDrawOneRecord(track, hvg, rec, item, hapOrder, gtCount * 2, xOff, yOffsets, sampleOrder, childSample, highlightChildDiffs, scale); + enum soTerm funcTerm = soUnknown; + if (colorMode == phasedFunctionMode) + funcTerm = functionForRecord(rec, gSeqWin, txiList); + vcfPhasedDrawOneRecord(track, hvg, rec, item, hapOrder, gtHapCount, xOff, yOffsets, sampleOrder, childSample, scale, transmitOrder, colorMode, funcTerm); } } static int vcfPhasedItemHeight(struct track *tg, void *item) { return tg->heightPer; } int vcfPhasedTrackHeight(struct track *tg, enum trackVisibility vis) { const struct vcfFile *vcff = tg->extraUiData; // when doing composite track height, vcfPhasedLoadItems won't have been called yet! if (vis == tvDense) return pgSnpHeight(tg, vis); if (!vcff || vcff->records == NULL)