ce70491d6debd76c80dfc0e8ac4102a2315f815f angie Mon Sep 17 09:59:43 2012 -0700 VCF indel alleles always start with a reference base that is the same(e.g. for a dbSNP item with -/C, the VCF item starts with the reference base to the left like A/AC). Make the display consistent with dbSNP and other variation tracks by trimming the left neighboring base. diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 1af8b37..856496a 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -159,30 +159,65 @@ return; struct vcfRecord *rec, *nextRec, *newList = NULL; for (rec = vcff->records; rec != NULL; rec = nextRec) { nextRec = rec->next; if (! ((gotQualFilter && minQualFail(rec, minQual)) || (gotFilterFilter && filterColumnFail(rec, filterValues)) || (gotMinFreqFilter && minFreqFail(rec, minFreq)) )) slAddHead(&newList, rec); } slReverse(&newList); vcff->records = newList; } +static void trimIndelAlleles(struct vcfFile *vcff) +/* For indels, VCF includes the left neighboring base; for example, if the alleles are + * AA/- following a G base, then the VCF record will start one base to the left and have + * "GAA" and "G" as the alleles. That is not nice for display for two reasons: + * 1. Indels appear one base wider than their dbSNP entries. + * 2. In pgSnp display mode, the two alleles are always the same color. + * So here we take the liberty of trimming that left neighboring base for display. */ +{ +struct vcfRecord *rec; +for (rec = vcff->records; rec != NULL; rec = rec->next) + if (rec->alleleCount > 1) + { + boolean allSameFirstBase = TRUE; + char firstBase = rec->alleles[0][0]; + int i; + for (i = 1; i < rec->alleleCount; i++) + if (rec->alleles[i][0] != firstBase) + { + allSameFirstBase = FALSE; + break; + } + if (allSameFirstBase) + { + rec->chromStart++; + for (i = 0; i < rec->alleleCount; i++) + { + if (rec->alleles[i][1] == '\0') + rec->alleles[i] = vcfFilePooledStr(vcff, "-"); + else + rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1); + } + } + } +} + static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb) /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp * because it contains pointers into vcff's records' chrom. */ { struct pgSnp *pgsList = NULL; struct vcfRecord *rec; int maxLen = 33; int maxAlCount = 5; for (rec = vcff->records; rec != NULL; rec = rec->next) { struct pgSnp *pgs = pgSnpFromVcfRecord(rec); // Insertion sequences can be quite long; abbreviate here for display. int len = strlen(pgs->name); if (len > maxLen) { @@ -1120,30 +1155,31 @@ fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName); hFreeConn(&conn); } int vcfMaxErr = -1; struct vcfFile *vcff = NULL; boolean hapClustEnabled = cartUsualBooleanClosestToHome(cart, tg->tdb, FALSE, VCF_HAP_ENABLED_VAR, TRUE); /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { vcff = vcfTabixFileMayOpen(fileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1); if (vcff != NULL) { filterRecords(vcff, tg->tdb); + trimIndelAlleles(vcff); if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 && (tg->visibility == tvPack || tg->visibility == tvSquish)) vcfHapClusterOverloadMethods(tg, vcff); else { tg->items = vcfFileToPgSnp(vcff, tg->tdb); // pgSnp bases coloring/display decision on count of items: tg->customInt = slCount(tg->items); } // Don't vcfFileFree here -- we are using its string pointers! } } errCatchEnd(errCatch); if (errCatch->gotError || vcff == NULL) {