2c0f74b4fbfefc533d27bf3ccdc18e4ecfdecae7 angie Fri Oct 5 17:07:18 2012 -0700 Follow-up to ce70491d:1. Move hgTracks left-base-of-indel trimming code up to vcf.[ch] for sharing w/hgc. 2. Correct chromStart in hgTracks mapBox links to hgc when we have trimmed a left base. 3. In hgc, abbreviate long sequences (e.g. 40kb deletion) and show trimmed left base in parentheses for consistency with VCF file (and sometimes INFO fields that use left-inclusive coords/seqs). 4. In pgSnpFromVcfRecord, don't truncate long alleles because hgTracks and hgc do their own abbreviating. diff --git src/lib/vcf.c src/lib/vcf.c index 1176273..9acc900 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -657,30 +657,67 @@ struct vcfRecord *vcfNextRecord(struct vcfFile *vcff) /* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file. * Note: this does not store record in vcff->records! */ { char *words[VCF_MAX_COLUMNS]; int wordCount; if ((wordCount = lineFileChop(vcff->lf, words)) <= 0) return NULL; int expected = 8; if (vcff->genotypeCount > 0) expected = 9 + vcff->genotypeCount; lineFileExpectWords(vcff->lf, expected, wordCount); return vcfRecordFromRow(vcff, words); } +unsigned int vcfRecordTrimIndelLeftBase(struct vcfRecord *rec) +/* For indels, VCF includes the left neighboring base; for example, if the alleles are + * AA/- following a G base, then the VCF record will start one base to the left and have + * "GAA" and "G" as the alleles. That is not nice for display for two reasons: + * 1. Indels appear one base wider than their dbSNP entries. + * 2. In pgSnp display mode, the two alleles are always the same color. + * However, for hgTracks' mapBox we need the correct chromStart for identifying the + * record in hgc -- so return the original chromStart. */ +{ +unsigned int chromStartOrig = rec->chromStart; +struct vcfFile *vcff = rec->file; +if (rec->alleleCount > 1) + { + boolean allSameFirstBase = TRUE; + char firstBase = rec->alleles[0][0]; + int i; + for (i = 1; i < rec->alleleCount; i++) + if (rec->alleles[i][0] != firstBase) + { + allSameFirstBase = FALSE; + break; + } + if (allSameFirstBase) + { + rec->chromStart++; + for (i = 0; i < rec->alleleCount; i++) + { + if (rec->alleles[i][1] == '\0') + rec->alleles[i] = vcfFilePooledStr(vcff, "-"); + else + rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1); + } + } + } +return chromStartOrig; +} + static void vcfParseData(struct vcfFile *vcff, int maxRecords) /* Given a vcfFile into which the header has been parsed, and whose lineFile is positioned * at the beginning of a data row, parse and store all data rows from lineFile. */ { if (vcff == NULL) return; int recCount = 0; struct vcfRecord *record; while ((record = vcfNextRecord(vcff)) != NULL) { if (maxRecords >= 0 && recCount >= maxRecords) break; slAddHead(&(vcff->records), record); recCount++; }