7f3ea18de77e114f05f283db03f117151e1e199b angie Fri May 13 16:06:16 2011 -0700 Feature #3711 (VCF center-weighted haplotype clustering): David request:make the variant used as the center really stand out visually. diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 2c6994d..f3c697f 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -183,47 +183,45 @@ struct hapCluster *cL = (struct hapCluster *)ht->left->itemOrCluster; struct hapCluster *cR = (struct hapCluster *)ht->right->itemOrCluster; if (cL->leafCount >= cR->leafCount) { rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd); rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd); } else { rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd); rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd); } } } -static unsigned short *clusterChroms(const struct vcfFile *vcff, unsigned short *retGtHapEnd) +static unsigned short *clusterChroms(const struct vcfFile *vcff, int centerIx, + unsigned short *retGtHapEnd) /* Given a bunch of VCF records with phased genotypes, build up one haplotype string * per chromosome that is the sequence of alleles in all variants (simplified to one base * per variant). Each individual/sample will have two haplotype strings (unless haploid * like Y or male X). Independently cluster the haplotype strings using hacTree with the * center-weighted alpha functions above. Return an array of genotype+haplotype indices * in the order determined by the hacTree, and set retGtHapEnd to its length/end. */ { int len = slCount(vcff->records); -// Use the median variant in the window as the center; would be even nicer to allow -// the user to choose a variant (or position) to use as center: -int center = len / 2; // Should alpha depend on len? Should the penalty drop off with distance? Seems like // straight-up exponential will cause the signal to drop to nothing pretty quickly... double alpha = 0.5; struct lm *lm = lmInit(0); -struct cwaExtraData helper = { center, len, alpha, lm }; +struct cwaExtraData helper = { centerIx, len, alpha, lm }; int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc. int gtCount = vcff->genotypeCount; // Make an slList of hapClusters, but allocate in a big block so I can use // array indexing. struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * ploidy); int i; for (i=0; i < ploidy * gtCount; i++) { hapArray[i] = lmHapCluster(&helper); if (i > 0) hapArray[i-1]->next = hapArray[i]; } boolean haveHaploid = FALSE; int varIx; struct vcfRecord *rec; @@ -300,47 +298,57 @@ return shadesOfGray[5]; // Copying pgSnp color scheme here, using first base of allele which is not ideal for multibase // but allows us to simplify it to 5 colors: else if (allele[0] == 'A') return MG_RED; else if (allele[0] == 'C') return MG_BLUE; else if (allele[0] == 'G') return darkGreenColor; else if (allele[0] == 'T') return MG_MAGENTA; else return shadesOfGray[5]; } -INLINE Color colorFromRefAlt(struct vcfGenotype *gt, int hapIx, boolean grayUnphasedHet) -/* Color allele red for alternate allele, blue for reference allele. */ +INLINE Color colorFromRefAlt(struct vcfGenotype *gt, int hapIx, boolean grayUnphasedHet, + boolean isCenter) +/* Color allele red for alternate allele, blue for reference allele -- + * except for special center variant, make it yellow/green for contrast. */ { if (grayUnphasedHet && !gt->isPhased && gt->hapIxA != gt->hapIxB) return shadesOfGray[5]; int alIx = hapIx ? gt->hapIxB : gt->hapIxA; +if (isCenter) + return alIx ? MG_YELLOW : MG_GREEN; return alIx ? MG_RED : MG_BLUE; } INLINE int drawOneHap(struct vcfGenotype *gt, int hapIx, char *ref, char *altAlleles[], int altCount, - struct hvGfx *hvg, int x1, int y, int w, int itemHeight, int lineHeight) + struct hvGfx *hvg, int x1, int y, int w, int itemHeight, int lineHeight, + boolean isCenter) /* Draw a base-colored box for genotype[hapIx]. Return the new y offset. */ { -Color color = colorHapByRefAlt ? colorFromRefAlt(gt, hapIx, TRUE) : +Color color = colorHapByRefAlt ? colorFromRefAlt(gt, hapIx, TRUE, isCenter) : colorFromGt(gt, hapIx, ref, altAlleles, altCount, TRUE); +if (w == 1) + { + x1--; + w = 3; + } hvGfxBox(hvg, x1, y, w, itemHeight+1, color); y += itemHeight+1; return y; } INLINE char *gtSummaryString(struct vcfRecord *rec, char **altAlleles, int altCount) // Make pgSnp-like mouseover text, but with genotype counts instead of allele counts. // NOTE 1: Returned string is statically allocated, don't free it! // NOTE 2: if revCmplDisp is set, this reverse-complements rec->ref and altAlleles! { static struct dyString *dy = NULL; if (dy == NULL) dy = dyStringNew(0); dyStringClear(dy); const struct vcfFile *vcff = rec->file; @@ -361,73 +369,99 @@ if (revCmplDisp) { reverseComplement(rec->ref, strlen(rec->ref)); for (i=0; i < altCount; i++) reverseComplement(altAlleles[i], strlen(altAlleles[i])); } dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", rec->ref, rec->ref, gtRefRefCount, rec->ref, altAlleles[0], gtRefAltCount, altAlleles[0], altAlleles[0], gtAltAltCount); if (gtOtherCount > 0) dyStringPrintf(dy, " other:%d", gtOtherCount); return dy->string; } -static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd, - struct hvGfx *hvg, int xOff, int yOff, int width, - MgFont *font, Color color, enum trackVisibility vis) -/* Split samples' chromosomes (haplotypes), cluster them by center-weighted - * alpha similarity, and draw in the order determined by clustering. */ +static void drawOneRec(struct vcfRecord *rec, boolean isCenter, + unsigned short *gtHapOrder, int gtHapEnd, + struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width) +/* Draw a stack of genotype bars for this record */ { -const struct vcfFile *vcff = tg->extraUiData; -if (vcff->records == NULL) - return; -unsigned short gtHapEnd = 0; -unsigned short *gtHapOrder = clusterChroms(vcff, >HapEnd); -struct dyString *tmp = dyStringNew(0); -struct vcfRecord *rec; +static struct dyString *tmp = NULL; +if (tmp == NULL) + tmp = dyStringNew(0); +char *altAlleles[256]; +int altCount; const int lineHeight = tg->lineHeight; const int itemHeight = tg->heightPer; const double scale = scaleForPixels(width); -for (rec = vcff->records; rec != NULL; rec = rec->next) - { - dyStringClear(tmp); - dyStringAppend(tmp, rec->alt); - char *altAlleles[256]; - int altCount = chopCommas(tmp->string, altAlleles); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w < 1) w = 1; int y = yOff; +dyStringClear(tmp); +dyStringAppend(tmp, rec->alt); +altCount = chopCommas(tmp->string, altAlleles); int gtHapOrderIx; for (gtHapOrderIx = 0; gtHapOrderIx < gtHapEnd; gtHapOrderIx++) { int gtHapIx = gtHapOrder[gtHapOrderIx]; int hapIx = gtHapIx & 1; int gtIx = gtHapIx >>1; struct vcfGenotype *gt = &(rec->genotypes[gtIx]); y = drawOneHap(gt, hapIx, rec->ref, altAlleles, altCount, - hvg, x1, y, w, itemHeight, lineHeight); + hvg, x1, y, w, itemHeight, lineHeight, isCenter); } mapBoxHgcOrHgGene(hvg, rec->chromStart, rec->chromEnd, x1, yOff, w, tg->height, tg->track, rec->name, gtSummaryString(rec, altAlleles, altCount), NULL, TRUE, NULL); } -// left labels? + +static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd, + struct hvGfx *hvg, int xOff, int yOff, int width, + MgFont *font, Color color, enum trackVisibility vis) +/* Split samples' chromosomes (haplotypes), cluster them by center-weighted + * alpha similarity, and draw in the order determined by clustering. */ +{ +const struct vcfFile *vcff = tg->extraUiData; +if (vcff->records == NULL) + return; +unsigned short gtHapEnd = 0; +// Use the median variant in the window as the center; would be even nicer to allow +// the user to choose a variant (or position) to use as center: +int ix, centerIx = (slCount(vcff->records)-1) / 2; +unsigned short *gtHapOrder = clusterChroms(vcff, centerIx, >HapEnd); +struct vcfRecord *rec, *centerRec = NULL; +for (rec = vcff->records, ix=0; rec != NULL; rec = rec->next, ix++) + { + boolean isCenter = (ix == centerIx); + drawOneRec(rec, isCenter, gtHapOrder, gtHapEnd, tg, hvg, xOff, yOff, width); + if (isCenter) + centerRec = rec; + } +// Draw the center rec on top, outlined with black lines, to make sure it is very visible: +drawOneRec(centerRec, TRUE, gtHapOrder, gtHapEnd, tg, hvg, xOff, yOff, width); +const double scale = scaleForPixels(width); +int x1 = round((double)(centerRec->chromStart-winStart)*scale) + xOff; +int x2 = round((double)(centerRec->chromEnd-winStart)*scale) + xOff; +int yBot = yOff + tg->height - 2; +hvGfxLine(hvg, x1-2, yOff, x1-2, yBot, MG_BLACK); +hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK); +hvGfxLine(hvg, x2+2, yOff, x2+2, yBot, MG_BLACK); +hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK); } static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis) /* Return height of haplotype graph (2 * #samples * lineHeight); * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */ { // Should we make it single-height when on chrY? const struct vcfFile *vcff = tg->extraUiData; if (vcff->records == NULL) return 0; int ploidy = 2; tg->height = ploidy * vcff->genotypeCount * tg->lineHeight; return tg->height; }