519e0946826199d1d9792fa8df5972843fce021c angie Tue Aug 9 14:39:38 2011 -0700 Feature #2821 (VCF parser): improved representation of alleles:parse ref and comma-sep'd alt allele string into count and array inside record, so callers don't all have to parse the comma-sep'd alternate allele string. diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 9b1d521..f9aa9df 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -293,133 +293,118 @@ while (c != NULL && c->next != NULL) { if (c->next->leafCount == 0) c->next = c->next->next; c = c->next; } } } struct hacTree *ht = hacTreeFromItems((struct slList *)(hapArray[0]), lm, cwaDistance, cwaMerge, cwaCmp, &helper); unsigned short *gtHapOrder = needMem(vcff->genotypeCount * 2 * sizeof(unsigned short)); rSetGtHapOrder(ht, gtHapOrder, retGtHapEnd); return gtHapOrder; } -INLINE char *hapIxToAllele(int hapIx, char *refAllele, char *altAlleles[]) -/* Look up allele by index into reference allele and alternate allele(s). */ -{ -return (hapIx == 0) ? refAllele : altAlleles[hapIx-1]; -} - //#*** unused... add UI option... -INLINE Color colorFromGt(struct vcfGenotype *gt, int ploidIx, char *refAllele, - char *altAlleles[], int altCount, boolean grayUnphasedHet) +INLINE Color colorFromGt(struct vcfGenotype *gt, int ploidIx, char **alleles, + boolean grayUnphasedHet) /* Color allele by base. */ { int hapIx = ploidIx ? gt->hapIxB : gt->hapIxA; -char *allele = hapIxToAllele(hapIx, refAllele, altAlleles); +char *allele = alleles[hapIx]; if (gt->isHaploid && hapIx > 0) return shadesOfGray[5]; if (grayUnphasedHet && !gt->isPhased && gt->hapIxA != gt->hapIxB) return shadesOfGray[5]; // Copying pgSnp color scheme here, using first base of allele which is not ideal for multibase // but allows us to simplify it to 5 colors: else if (allele[0] == 'A') return MG_RED; else if (allele[0] == 'C') return MG_BLUE; else if (allele[0] == 'G') return darkGreenColor; else if (allele[0] == 'T') return MG_MAGENTA; else return shadesOfGray[5]; } -INLINE char *gtSummaryString(struct vcfRecord *rec, char **altAlleles, int altCount) +INLINE char *gtSummaryString(struct vcfRecord *rec) // Make pgSnp-like mouseover text, but with genotype counts instead of allele counts. -// NOTE 1: Returned string is statically allocated, don't free it! -// NOTE 2: if revCmplDisp is set, this reverse-complements rec->ref and altAlleles! +// NOTE: Returned string is statically allocated, don't free it! { static struct dyString *dy = NULL; if (dy == NULL) dy = dyStringNew(0); +else dyStringClear(dy); +if (rec->alleleCount < 2) + return ""; const struct vcfFile *vcff = rec->file; int gtRefRefCount = 0, gtRefAltCount = 0, gtAltAltCount = 0, gtOtherCount = 0; int i; for (i=0; i < vcff->genotypeCount; i++) { struct vcfGenotype *gt = &(rec->genotypes[i]); if (gt->hapIxA == 0 && gt->hapIxB == 0) gtRefRefCount++; else if (gt->hapIxA == 1 && gt->hapIxB == 1) gtAltAltCount++; else if ((gt->hapIxA == 0 && gt->hapIxB == 1) || (gt->hapIxA == 1 && gt->hapIxB == 0)) gtRefAltCount++; else gtOtherCount++; } // These are pooled strings! Restore when done. if (revCmplDisp) { - reverseComplement(rec->ref, strlen(rec->ref)); - for (i=0; i < altCount; i++) - reverseComplement(altAlleles[i], strlen(altAlleles[i])); + for (i=0; i < rec->alleleCount; i++) + reverseComplement(rec->alleles[i], strlen(rec->alleles[i])); } - -dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", rec->ref, rec->ref, gtRefRefCount, - rec->ref, altAlleles[0], gtRefAltCount, - altAlleles[0], altAlleles[0], gtAltAltCount); +dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", rec->alleles[0], rec->alleles[0], gtRefRefCount, + rec->alleles[0], rec->alleles[1], gtRefAltCount, + rec->alleles[1], rec->alleles[1], gtAltAltCount); if (gtOtherCount > 0) dyStringPrintf(dy, " other:%d", gtOtherCount); // Restore original values of pooled strings. if (revCmplDisp) { - reverseComplement(rec->ref, strlen(rec->ref)); - for (i=0; i < altCount; i++) - reverseComplement(altAlleles[i], strlen(altAlleles[i])); + for (i=0; i < rec->alleleCount; i++) + reverseComplement(rec->alleles[i], strlen(rec->alleles[i])); } return dy->string; } // This is initialized when we start drawing: static Color purple = 0; static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, int gtHapEnd, struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width, boolean isCenter) /* Draw a stack of genotype bars for this record */ { -static struct dyString *tmp = NULL; -if (tmp == NULL) - tmp = dyStringNew(0); -char *altAlleles[256]; -int altCount; const double scale = scaleForPixels(width); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w <= 1) { x1--; w = 3; } -dyStringClear(tmp); -dyStringAppend(tmp, rec->alt); -altCount = chopCommas(tmp->string, altAlleles); double hapsPerPix = (2 * (double)rec->file->genotypeCount / tg->height); int pixIx; for (pixIx = 0; pixIx < tg->height; pixIx++) { int gtHapOrderIxStart = round(hapsPerPix * pixIx); int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1)); if (gtHapOrderIxEnd == gtHapOrderIxStart) gtHapOrderIxEnd++; int unks = 0, refs = 0, alts = 0; int gtHapOrderIx; for (gtHapOrderIx = gtHapOrderIxStart; gtHapOrderIx < gtHapOrderIxEnd; gtHapOrderIx++) { int gtHapIx = gtHapOrder[gtHapOrderIx]; int hapIx = gtHapIx & 1; int gtIx = gtHapIx >>1; @@ -436,31 +421,31 @@ } } const int fudgeFactor = 4; Color col = MG_BLACK; if (unks > (refs + alts)) col = shadesOfGray[5]; else if (alts > fudgeFactor * refs) col = MG_RED; else if (refs > fudgeFactor * alts) col = MG_BLUE; else col = purple; int y = yOff + pixIx; hvGfxLine(hvg, x1, y, x2, y, col); } -char *mouseoverText = gtSummaryString(rec, altAlleles, altCount); +char *mouseoverText = gtSummaryString(rec); if (isCenter) { // Thick black lines to distinguish this variant: int yBot = yOff + tg->height - 2; hvGfxBox(hvg, x1-3, yOff, 3, tg->height, MG_BLACK); hvGfxBox(hvg, x2, yOff, 3, tg->height, MG_BLACK); hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK); hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK); // Special mouseover instructions: static struct dyString *dy = NULL; if (dy == NULL) dy = dyStringNew(0); dyStringPrintf(dy, "%s Haplotypes sorted on ", mouseoverText); char cartVar[512]; safef(cartVar, sizeof(cartVar), "%s.centerVariantChrom", tg->tdb->track);