019c763e1dbf97788976ddb5582d4f73679bdd9e angie Fri Nov 16 19:33:23 2012 -0800 VCF with mixed haploid status, for example on chrX where females are diploid andmales are haploid, was having gtHapIx assigned in two different ways, not good. Same formula for gtHapIx now applies to both diploid and haploid genotypes. We no longer have the invariant that the number of clustered haplotypes is the same as the max gtHapIx number, but it all works, trust me. :) diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index f49c5ae..15f50ad 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -396,80 +396,77 @@ } boolean haveHaploid = FALSE; int varIx; struct vcfRecord *rec; for (varIx = 0, rec = vcff->records; rec != NULL && varIx < endIx; varIx++, rec = rec->next) { if (varIx < startIx) continue; int countIx = varIx - startIx; int gtIx; for (gtIx=0; gtIx < gtCount; gtIx++) { struct vcfGenotype *gt = &(rec->genotypes[gtIx]); struct hapCluster *c1 = hapArray[gtIx]; struct hapCluster *c2 = hapArray[gtCount + gtIx]; // hardwired ploidy=2 + c1->gtHapIx = gtIx << 1; + c1->leafCount = 1; if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB)) { - // first chromosome: - c1->leafCount = 1; + // first haplotype's counts: if (gt->hapIxA < 0) c1->unkCounts[countIx] = 1; else if (gt->hapIxA == 0) c1->refCounts[countIx] = 1; if (gt->isHaploid) - { haveHaploid = TRUE; - c1->gtHapIx = gtIx; - } else { - c1->gtHapIx = gtIx << 1; + // got second haplotype, fill in its counts: c2->gtHapIx = (gtIx << 1) | 1; c2->leafCount = 1; if (gt->hapIxB < 0) c2->unkCounts[countIx] = 1; else if (gt->hapIxB == 0) c2->refCounts[countIx] = 1; } } else { // Missing data or unphased heterozygote, don't use haplotype info for clustering - c1->leafCount = c2->leafCount = 1; - c1->gtHapIx = gtIx << 1; c2->gtHapIx = (gtIx << 1) | 1; + c2->leafCount = 1; c1->unkCounts[countIx] = c2->unkCounts[countIx] = 1; } } if (haveHaploid) { // Some array items will have an empty cluster for missing hap2 -- // trim those from the linked list. struct hapCluster *c = hapArray[0]; while (c != NULL && c->next != NULL) { if (c->next->leafCount == 0) c->next = c->next->next; else c = c->next; } } } struct hacTree *ht = hacTreeFromItems((struct slList *)(hapArray[0]), lm, cwaDistance, cwaMerge, cwaCmp, &helper); -unsigned short *gtHapOrder = needMem(vcff->genotypeCount * 2 * sizeof(unsigned short)); +unsigned short *gtHapOrder = needMem(vcff->genotypeCount * ploidy * sizeof(unsigned short)); rSetGtHapOrder(ht, gtHapOrder, retGtHapEnd); *retTree = ht; return gtHapOrder; } INLINE Color pgSnpColor(char *allele) /* Color allele by first base according to pgSnp palette. */ { if (allele[0] == 'A') return revCmplDisp ? MG_MAGENTA : MG_RED; else if (allele[0] == 'C') return revCmplDisp ? darkGreenColor : MG_BLUE; else if (allele[0] == 'G') return revCmplDisp ? MG_BLUE : darkGreenColor; else if (allele[0] == 'T') @@ -650,58 +647,45 @@ hapHeight -= extraPixel*2; } double hapsPerPix = (double)gtHapCount / hapHeight; int pixIx; for (pixIx = 0; pixIx < hapHeight; pixIx++) { int gtHapOrderIxStart = (int)(hapsPerPix * pixIx); int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1)); if (gtHapOrderIxEnd == gtHapOrderIxStart) gtHapOrderIxEnd++; int unks = 0, refs = 0, alts = 0; int gtHapOrderIx; for (gtHapOrderIx = gtHapOrderIxStart; gtHapOrderIx < gtHapOrderIxEnd; gtHapOrderIx++) { int gtHapIx = gtHapOrder[gtHapOrderIx]; - int ploidy = (sameString(chromName, "chrY")) ? 1 : 2; - int hapIx = (ploidy == 1) ? 0 : gtHapIx & 1; - int gtIx = (ploidy == 1) ? gtHapIx : gtHapIx >>1; + int hapIx = gtHapIx & 1; + int gtIx = gtHapIx >>1; struct vcfGenotype *gt = &(rec->genotypes[gtIx]); - if (ploidy == 2) - { - if (!gt->isPhased && gt->hapIxA != gt->hapIxB) - unks++; - else + if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB)) { int alIx = hapIx ? gt->hapIxB : gt->hapIxA; if (alIx < 0) unks++; else if (alIx > 0) alts++; else refs++; } - } else - { - if (gt->hapIxA < 0) unks++; - else if (gt->hapIxA > 0) - alts++; - else - refs++; - } } int y = yOff + extraPixel + pixIx; Color col; if (colorMode == baseMode) col = colorByBase(refs, alts, unks, rec->alleles[0], rec->alleles[1]); else if (colorMode == refAltMode) col = colorByRefAlt(refs, alts, unks); else col = colorByAltOnly(refs, alts, unks); if (col != MG_WHITE) hvGfxLine(hvg, x1, y, x2, y, col); } int yBot = yOff + tg->height - CLIP_PAD - 1; if (isCenter) { @@ -906,59 +890,58 @@ hvGfxLine(hvg, x, y, labelEnd, y, purple); addClusterMapItem(ht, x, y, labelEnd, y+1, th); } return y; } struct yFromNodeHelper /* Pre-computed mapping from cluster nodes' gtHapIx to pixel heights. */ { unsigned short gtHapCount; unsigned short *gtHapIxToPxStart; unsigned short *gtHapIxToPxEnd; }; void initYFromNodeHelper(struct yFromNodeHelper *helper, int yOff, int height, - unsigned short gtHapCount, unsigned short *gtHapOrder) + unsigned short gtHapCount, unsigned short *gtHapOrder, + int genotypeCount) /* Build a mapping of genotype and haplotype to pixel y coords. */ { helper->gtHapCount = gtHapCount; -helper->gtHapIxToPxStart = needMem(gtHapCount * sizeof(unsigned short)); -helper->gtHapIxToPxEnd = needMem(gtHapCount * sizeof(unsigned short)); +helper->gtHapIxToPxStart = needMem(genotypeCount * 2 * sizeof(unsigned short)); +helper->gtHapIxToPxEnd = needMem(genotypeCount * 2 * sizeof(unsigned short)); double pxPerHap = (double)height / gtHapCount; int i; for (i = 0; i < gtHapCount; i++) { int yStart = round(i * pxPerHap); int yEnd = round((i+1) * pxPerHap); if (yEnd == yStart) yEnd++; int gtHapIx = gtHapOrder[i]; helper->gtHapIxToPxStart[gtHapIx] = yOff + yStart; helper->gtHapIxToPxEnd[gtHapIx] = yOff + yEnd; } } static int yFromHapNode(const struct slList *itemOrCluster, void *extraData, enum yRetType yType) /* Extract the gtHapIx from hapCluster (hacTree node item), find out its relative order * and translate that to a pixel height. */ { unsigned short gtHapIx = ((const struct hapCluster *)itemOrCluster)->gtHapIx; struct yFromNodeHelper *helper = extraData; -if (gtHapIx >= helper->gtHapCount) - errAbort("vcfTrack.c: gtHapIx %d out of range [0,%d).", gtHapIx, helper->gtHapCount); int y; if (yType == yrtStart) y = helper->gtHapIxToPxStart[gtHapIx]; else if (yType == yrtEnd) y = helper->gtHapIxToPxEnd[gtHapIx]; else y = (helper->gtHapIxToPxStart[gtHapIx] + helper->gtHapIxToPxEnd[gtHapIx]) / 2; return y; } void initTitleHelper(struct titleHelper *th, char *track, int startIx, int centerIx, int endIx, int nRecords, struct vcfFile *vcff) /* Set up info including arrays of ref & alt alleles for cluster mouseover. */ { th->track = track; @@ -1069,31 +1052,32 @@ } for (rec = vcff->records, ix=0; rec != NULL; rec = rec->next, ix++) { boolean isClustered = (ix >= startIx && ix < endIx); if (ix != centerIx) drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, isClustered, FALSE, colorMode); } // Draw the center rec on top, outlined with black lines, to make sure it is very visible: drawOneRec(centerRec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, TRUE, TRUE, colorMode); // Draw as much of the tree as can fit in the left label area: int extraPixel = (colorMode == altOnlyMode) ? 1 : 0; int hapHeight = tg->height- CLIP_PAD - 2*extraPixel; struct yFromNodeHelper yHelper = {0, NULL, NULL}; -initYFromNodeHelper(&yHelper, yOff+extraPixel, hapHeight, gtHapCount, gtHapOrder); +initYFromNodeHelper(&yHelper, yOff+extraPixel, hapHeight, gtHapCount, gtHapOrder, + vcff->genotypeCount); struct titleHelper titleHelper = { NULL, 0, 0, 0, 0, NULL, NULL }; initTitleHelper(&titleHelper, tg->track, startIx, centerIx, endIx, nRecords, vcff); char *treeAngle = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, VCF_HAP_TREEANGLE_VAR, VCF_DEFAULT_HAP_TREEANGLE); boolean drawRectangle = sameString(treeAngle, VCF_HAP_TREEANGLE_RECTANGLE); drawTreeInLabelArea(ht, hvg, yOff+extraPixel, hapHeight+CLIP_PAD, &yHelper, &titleHelper, drawRectangle); } static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis) /* Return height of haplotype graph (2 * #samples * lineHeight); * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */ { const struct vcfFile *vcff = tg->extraUiData; if (vcff->records == NULL)