239f153403dc75bf2a57c67ef04dcc7c38e8ecff angie Wed Mar 5 14:03:23 2014 -0800 In addition to trimming the left padding base from VCF indels,now we trim identical bases on the right of all alleles, if there are any. I've seen multiple cases of user VCF with indels that have inflated size due to including right flanking bases; when trying to determine functional consequences, it's misleading to have an indel sized larger than it has to be. diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index dd9d086..fedb8ad 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -153,50 +153,52 @@ return; struct vcfRecord *rec, *nextRec, *newList = NULL; for (rec = vcff->records; rec != NULL; rec = nextRec) { nextRec = rec->next; if (! ((gotQualFilter && minQualFail(rec, minQual)) || (gotFilterFilter && filterColumnFail(rec, filterValues)) || (gotMinFreqFilter && minFreqFail(rec, minFreq)) )) slAddHead(&newList, rec); } slReverse(&newList); vcff->records = newList; } -struct pgSnpVcfStart -/* This extends struct pgSnp by tacking on an original VCF chromStart at the end, +struct pgSnpVcfStartEnd +/* This extends struct pgSnp by tacking on an original VCF chromStart and End at the end, * for use by indelTweakMapItem below. This can be cast to pgs. */ { struct pgSnp pgs; unsigned int vcfStart; + unsigned int vcfEnd; }; static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb) /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp * because it contains pointers into vcff's records' chrom. */ { struct pgSnp *pgsList = NULL; struct vcfRecord *rec; int maxLen = 33; int maxAlCount = 5; for (rec = vcff->records; rec != NULL; rec = rec->next) { - struct pgSnpVcfStart *psvs = needMem(sizeof(*psvs)); + struct pgSnpVcfStartEnd *psvs = needMem(sizeof(*psvs)); psvs->vcfStart = vcfRecordTrimIndelLeftBase(rec); + psvs->vcfEnd = vcfRecordTrimAllelesRight(rec); struct pgSnp *pgs = pgSnpFromVcfRecord(rec); memcpy(&(psvs->pgs), pgs, sizeof(*pgs)); pgs = (struct pgSnp *)psvs; // leak mem // Insertion sequences can be quite long; abbreviate here for display. int len = strlen(pgs->name); if (len > maxLen) { int maxAlLen = (maxLen / min(rec->alleleCount, maxAlCount)) - 1; pgs->name[0] = '\0'; int i; for (i = 0; i < rec->alleleCount; i++) { if (i > 0) safencat(pgs->name, len+1, "/", 1); if (i >= maxAlCount) @@ -531,49 +533,50 @@ else dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", refAl, refAl, gtRefRefCount, refAl, altAl1, gtRefAltCount, altAl1, altAl1, gtAltAltCount); if (gtUnkCount > 0) dyStringPrintf(dy, " unknown:%d", gtUnkCount); if (gtOtherCount > 0) dyStringPrintf(dy, " other:%d", gtOtherCount); } void mapBoxForCenterVariant(struct vcfRecord *rec, struct hvGfx *hvg, struct track *tg, int xOff, int yOff, int width) /* Special mouseover for center variant */ { struct dyString *dy = dyStringNew(0); unsigned int chromStartMap = vcfRecordTrimIndelLeftBase(rec); +unsigned int chromEndMap = vcfRecordTrimAllelesRight(rec); gtSummaryString(rec, dy); dyStringAppend(dy, " Haplotypes sorted on "); char *centerChrom = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "centerVariantChrom"); if (centerChrom == NULL || !sameString(chromName, centerChrom)) dyStringAppend(dy, "middle variant by default. "); else dyStringAppend(dy, "this variant. "); dyStringAppend(dy, "To anchor sorting to a different variant, click on that variant and " "then click on the 'Use this variant' button below the variant name."); const double scale = scaleForPixels(width); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w <= 1) { x1--; w = 3; } -mapBoxHgcOrHgGene(hvg, chromStartMap, rec->chromEnd, x1, yOff, w, tg->height, tg->track, +mapBoxHgcOrHgGene(hvg, chromStartMap, chromEndMap, x1, yOff, w, tg->height, tg->track, rec->name, dy->string, NULL, TRUE, NULL); } // These are initialized when we start drawing, then constant. static Color purple = 0; static Color undefYellow = 0; enum hapColorMode { altOnlyMode, refAltMode, baseMode }; static Color colorByAltOnly(int refs, int alts, int unks) @@ -609,30 +612,31 @@ return pgSnpColor(altAl); if (refs > fudgeFactor * alts) return pgSnpColor(refAl); return shadesOfGray[5]; } // tg->height needs an extra pixel at the bottom; it's eaten by the clipping rectangle: #define CLIP_PAD 1 static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, unsigned short gtHapCount, struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width, boolean isClustered, boolean isCenter, enum hapColorMode colorMode) /* Draw a stack of genotype bars for this record */ { unsigned int chromStartMap = vcfRecordTrimIndelLeftBase(rec); +unsigned int chromEndMap = vcfRecordTrimAllelesRight(rec); const double scale = scaleForPixels(width); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w <= 1) { x1--; w = 3; } // When coloring mode is altOnly, we draw one extra pixel row at the top & one at bottom // to show the locations of variants, since the reference alleles are invisible: int extraPixel = 0; int hapHeight = tg->height - CLIP_PAD; if (colorMode == altOnlyMode) { @@ -693,31 +697,31 @@ } else { // Thick black lines to distinguish this variant: hvGfxBox(hvg, x1-3, yOff, 3, tg->height, MG_BLACK); hvGfxBox(hvg, x2, yOff, 3, tg->height, MG_BLACK); hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK); hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK); } // Mouseover was handled already by mapBoxForCenterVariant } else { struct dyString *dy = dyStringNew(0); gtSummaryString(rec, dy); - mapBoxHgcOrHgGene(hvg, chromStartMap, rec->chromEnd, x1, yOff, w, tg->height, tg->track, + mapBoxHgcOrHgGene(hvg, chromStartMap, chromEndMap, x1, yOff, w, tg->height, tg->track, rec->name, dy->string, NULL, TRUE, NULL); } if (colorMode == altOnlyMode) hvGfxLine(hvg, x1, yBot, x2, yBot, (isClustered ? purple : shadesOfGray[5])); } static int getCenterVariantIx(struct track *tg, int seqStart, int seqEnd, struct vcfRecord *records) // If the user hasn't specified a local variant/position to use as center, // just use the median variant in window. { int defaultIx = (slCount(records)-1) / 2; char *centerChrom = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "centerVariantChrom"); if (centerChrom != NULL && sameString(chromName, centerChrom)) { @@ -1108,32 +1112,33 @@ tg->totalHeight = vcfHapClusterTotalHeight; tg->itemHeight = tgFixedItemHeight; tg->itemName = vcfHapClusterTrackName; tg->mapItemName = vcfHapClusterTrackName; tg->itemStart = tgItemNoStart; tg->itemEnd = tgItemNoEnd; tg->mapsSelf = TRUE; tg->extraUiData = vcff; } static void indelTweakMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* Pass the original vcf chromStart to pgSnpMapItem, so if we have trimmed an identical * first base from item's alleles and start, we will still pass the correct start to hgc. */ { -struct pgSnpVcfStart *psvs = item; -pgSnpMapItem(tg, hvg, item, itemName, mapItemName, psvs->vcfStart, end, x, y, width, height); +struct pgSnpVcfStartEnd *psvs = item; +pgSnpMapItem(tg, hvg, item, itemName, mapItemName, psvs->vcfStart, psvs->vcfEnd, + x, y, width, height); } #ifdef USE_TABIX #ifdef KNETFILE_HOOKS #include "knetUdc.h" #include "udc.h" #endif//def KNETFILE_HOOKS static void vcfTabixLoadItems(struct track *tg) /* Load items in window from VCF file using its tabix index file. */ { char *fileOrUrl = NULL; /* Figure out url or file name. */