2c0f74b4fbfefc533d27bf3ccdc18e4ecfdecae7 angie Fri Oct 5 17:07:18 2012 -0700 Follow-up to ce70491d:1. Move hgTracks left-base-of-indel trimming code up to vcf.[ch] for sharing w/hgc. 2. Correct chromStart in hgTracks mapBox links to hgc when we have trimmed a left base. 3. In hgc, abbreviate long sequences (e.g. 40kb deletion) and show trimmed left base in parentheses for consistency with VCF file (and sometimes INFO fields that use left-inclusive coords/seqs). 4. In pgSnpFromVcfRecord, don't truncate long alleles because hgTracks and hgc do their own abbreviating. diff --git src/inc/vcf.h src/inc/vcf.h index 1c55a25..16e7f3c 100644 --- src/inc/vcf.h +++ src/inc/vcf.h @@ -188,30 +188,39 @@ * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. */ struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end, int maxErr, int maxRecords); /* Open a VCF file that has been compressed and indexed by tabix and * parse VCF header, or return NULL if unable. If chrom is non-NULL, * seek to the position range and parse all lines in range into * vcff->records. If maxErr >= zero, then continue to parse until * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. */ struct vcfRecord *vcfRecordFromRow(struct vcfFile *vcff, char **words); /* Parse words from a VCF data line into a VCF record structure. */ +unsigned int vcfRecordTrimIndelLeftBase(struct vcfRecord *rec); +/* For indels, VCF includes the left neighboring base; for example, if the alleles are + * AA/- following a G base, then the VCF record will start one base to the left and have + * "GAA" and "G" as the alleles. That is not nice for display for two reasons: + * 1. Indels appear one base wider than their dbSNP entries. + * 2. In pgSnp display mode, the two alleles are always the same color. + * However, for hgTracks' mapBox we need the correct chromStart for identifying the + * record in hgc -- so return the original chromStart. */ + void vcfFileFree(struct vcfFile **vcffPtr); /* Free a vcfFile object. */ const struct vcfRecord *vcfFileFindVariant(struct vcfFile *vcff, char *variantId); /* Return all records with name=variantId, or NULL if not found. */ const struct vcfInfoElement *vcfRecordFindInfo(const struct vcfRecord *record, char *key); /* Find an INFO element, or NULL. */ struct vcfInfoDef *vcfInfoDefForKey(struct vcfFile *vcff, const char *key); /* Return infoDef for key, or NULL if it wasn't specified in the header or VCF spec. */ void vcfParseGenotypes(struct vcfRecord *record); /* Translate record->genotypesUnparsedStrings[] into proper struct vcfGenotype[]. * This destroys genotypesUnparsedStrings. */