2c0f74b4fbfefc533d27bf3ccdc18e4ecfdecae7 angie Fri Oct 5 17:07:18 2012 -0700 Follow-up to ce70491d:1. Move hgTracks left-base-of-indel trimming code up to vcf.[ch] for sharing w/hgc. 2. Correct chromStart in hgTracks mapBox links to hgc when we have trimmed a left base. 3. In hgc, abbreviate long sequences (e.g. 40kb deletion) and show trimmed left base in parentheses for consistency with VCF file (and sometimes INFO fields that use left-inclusive coords/seqs). 4. In pgSnpFromVcfRecord, don't truncate long alleles because hgTracks and hgc do their own abbreviating. diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 856496a..11156ce 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -159,76 +159,53 @@ return; struct vcfRecord *rec, *nextRec, *newList = NULL; for (rec = vcff->records; rec != NULL; rec = nextRec) { nextRec = rec->next; if (! ((gotQualFilter && minQualFail(rec, minQual)) || (gotFilterFilter && filterColumnFail(rec, filterValues)) || (gotMinFreqFilter && minFreqFail(rec, minFreq)) )) slAddHead(&newList, rec); } slReverse(&newList); vcff->records = newList; } -static void trimIndelAlleles(struct vcfFile *vcff) -/* For indels, VCF includes the left neighboring base; for example, if the alleles are - * AA/- following a G base, then the VCF record will start one base to the left and have - * "GAA" and "G" as the alleles. That is not nice for display for two reasons: - * 1. Indels appear one base wider than their dbSNP entries. - * 2. In pgSnp display mode, the two alleles are always the same color. - * So here we take the liberty of trimming that left neighboring base for display. */ +struct pgSnpVcfStart +/* This extends struct pgSnp by tacking on an original VCF chromStart at the end, + * for use by indelTweakMapItem below. This can be cast to pgs. */ { -struct vcfRecord *rec; -for (rec = vcff->records; rec != NULL; rec = rec->next) - if (rec->alleleCount > 1) - { - boolean allSameFirstBase = TRUE; - char firstBase = rec->alleles[0][0]; - int i; - for (i = 1; i < rec->alleleCount; i++) - if (rec->alleles[i][0] != firstBase) - { - allSameFirstBase = FALSE; - break; - } - if (allSameFirstBase) - { - rec->chromStart++; - for (i = 0; i < rec->alleleCount; i++) - { - if (rec->alleles[i][1] == '\0') - rec->alleles[i] = vcfFilePooledStr(vcff, "-"); - else - rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1); - } - } - } -} + struct pgSnp pgs; + unsigned int vcfStart; +}; static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb) /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp * because it contains pointers into vcff's records' chrom. */ { struct pgSnp *pgsList = NULL; struct vcfRecord *rec; int maxLen = 33; int maxAlCount = 5; for (rec = vcff->records; rec != NULL; rec = rec->next) { + struct pgSnpVcfStart *psvs = needMem(sizeof(*psvs)); + psvs->vcfStart = vcfRecordTrimIndelLeftBase(rec); struct pgSnp *pgs = pgSnpFromVcfRecord(rec); + memcpy(&(psvs->pgs), pgs, sizeof(*pgs)); + pgs = (struct pgSnp *)psvs; // leak mem // Insertion sequences can be quite long; abbreviate here for display. int len = strlen(pgs->name); if (len > maxLen) { int maxAlLen = (maxLen / min(rec->alleleCount, maxAlCount)) - 1; pgs->name[0] = '\0'; int i; for (i = 0; i < rec->alleleCount; i++) { if (i > 0) safencat(pgs->name, len+1, "/", 1); if (i >= maxAlCount) { safecat(pgs->name, len+1, "..."); pgs->alleleCount = maxAlCount; @@ -623,30 +600,31 @@ if (alts > fudgeFactor * refs) return pgSnpColor(altAl); if (refs > fudgeFactor * alts) return pgSnpColor(refAl); return shadesOfGray[5]; } // tg->height needs an extra pixel at the bottom; it's eaten by the clipping rectangle: #define CLIP_PAD 1 static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, unsigned short gtHapCount, struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width, boolean isClustered, boolean isCenter, enum hapColorMode colorMode) /* Draw a stack of genotype bars for this record */ { +unsigned int chromStartMap = vcfRecordTrimIndelLeftBase(rec); const double scale = scaleForPixels(width); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w <= 1) { x1--; w = 3; } // When coloring mode is altOnly, we draw one extra pixel row at the top & one at bottom // to show the locations of variants, since the reference alleles are invisible: int extraPixel = 0; int hapHeight = tg->height - CLIP_PAD; if (colorMode == altOnlyMode) { @@ -714,37 +692,37 @@ { // Colorful outline to distinguish this variant: hvGfxLine(hvg, x1-1, yOff, x1-1, yBot, purple); hvGfxLine(hvg, x2+1, yOff, x2+1, yBot, purple); hvGfxLine(hvg, x1-1, yOff, x2+1, yOff, purple); hvGfxLine(hvg, x1-1, yBot, x2+1, yBot, purple); } else { // Thick black lines to distinguish this variant: hvGfxBox(hvg, x1-3, yOff, 3, tg->height, MG_BLACK); hvGfxBox(hvg, x2, yOff, 3, tg->height, MG_BLACK); hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK); hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK); } - // Mouseover is handled separately by mapBoxForCenterVariant + // Mouseover was handled already by mapBoxForCenterVariant } else { struct dyString *dy = dyStringNew(0); gtSummaryString(rec, dy); - mapBoxHgcOrHgGene(hvg, rec->chromStart, rec->chromEnd, x1, yOff, w, tg->height, tg->track, + mapBoxHgcOrHgGene(hvg, chromStartMap, rec->chromEnd, x1, yOff, w, tg->height, tg->track, rec->name, dy->string, NULL, TRUE, NULL); } if (colorMode == altOnlyMode) hvGfxLine(hvg, x1, yBot, x2, yBot, (isClustered ? purple : shadesOfGray[5])); } static int getCenterVariantIx(struct track *tg, int seqStart, int seqEnd, struct vcfRecord *records) // If the user hasn't specified a local variant/position to use as center, // just use the median variant in window. { int defaultIx = (slCount(records)-1) / 2; char *centerChrom = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "centerVariantChrom"); if (centerChrom != NULL && sameString(chromName, centerChrom)) { @@ -1155,61 +1133,70 @@ fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName); hFreeConn(&conn); } int vcfMaxErr = -1; struct vcfFile *vcff = NULL; boolean hapClustEnabled = cartUsualBooleanClosestToHome(cart, tg->tdb, FALSE, VCF_HAP_ENABLED_VAR, TRUE); /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { vcff = vcfTabixFileMayOpen(fileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1); if (vcff != NULL) { filterRecords(vcff, tg->tdb); - trimIndelAlleles(vcff); if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 && (tg->visibility == tvPack || tg->visibility == tvSquish)) vcfHapClusterOverloadMethods(tg, vcff); else { tg->items = vcfFileToPgSnp(vcff, tg->tdb); // pgSnp bases coloring/display decision on count of items: tg->customInt = slCount(tg->items); } // Don't vcfFileFree here -- we are using its string pointers! } } errCatchEnd(errCatch); if (errCatch->gotError || vcff == NULL) { if (isNotEmpty(errCatch->message->string)) tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); } +static void indelTweakMapItem(struct track *tg, struct hvGfx *hvg, void *item, + char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) +/* Pass the original vcf chromStart to pgSnpMapItem, so if we have trimmed an identical + * first base from item's alleles and start, we will still pass the correct start to hgc. */ +{ +struct pgSnpVcfStart *psvs = item; +pgSnpMapItem(tg, hvg, item, itemName, mapItemName, psvs->vcfStart, end, x, y, width, height); +} + void vcfTabixMethods(struct track *track) /* Methods for VCF + tabix files. */ { #ifdef KNETFILE_HOOKS knetUdcInstall(); #endif pgSnpMethods(track); +track->mapItem = indelTweakMapItem; // Disinherit next/prev flag and methods since we don't support next/prev: track->nextExonButtonable = FALSE; track->nextPrevExon = NULL; track->nextPrevItem = NULL; track->loadItems = vcfTabixLoadItems; track->canPack = TRUE; } #else // no USE_TABIX: // If code was not built with USE_TABIX=1, but there are vcfTabix tracks, display a message // in place of the tracks (instead of annoying "No track handler" warning messages). static void drawUseVcfTabixWarning(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color,