c843f30cccc3610e26a26142ad0622f29402254b angie Wed Jan 11 09:54:24 2012 -0800 Bug #6529 (VCF internal rep doesn't do missing data): add flags for missingdata values to vcfInfoElement, so we know the difference between "0" and ".". This applies to both INFO column values and genotype (FORMAT) values. Also, vcfGenotype's hapIxA and hapIxB are now signed, with negative values indicating missing data. User reported bug in MLQ #6462. diff --git src/inc/vcf.h src/inc/vcf.h index d11c0f6..6394026 100644 --- src/inc/vcf.h +++ src/inc/vcf.h @@ -36,42 +36,44 @@ /* Definition of INFO column component from VCF header: */ { struct vcfInfoDef *next; char *key; // A short identifier, e.g. MQ for mapping quality int fieldCount; // The number of values to follow the id, or -1 if it varies enum vcfInfoType type; // The type of values that follow the id char *description; // Brief description of info }; struct vcfInfoElement /* A single INFO column component; each row's INFO column may contain multiple components. */ { char *key; // An identifier described by a struct vcfInfoDef int count; // Number of data values following id union vcfDatum *values; // Array of data values following id + bool *missingData; // Array of flags for missing data values ("." instead of number) }; struct vcfGenotype /* A single component of the optional GENOTYPE column. */ { char *id; // Name of individual/sample (pointer to vcfFile genotypeIds) or . - unsigned char hapIxA; // Index of one haplotype's allele: 0=reference, 1=alt, 2=other alt - unsigned char hapIxB; // Index of other haplotype's allele + char hapIxA; // Index of one haplotype's allele: 0=reference, 1=alt, 2=other alt + // *or* if negative, missing data + char hapIxB; // Index of other haplotype's allele, or if negative, missing data bool isPhased; // True if haplotypes are phased bool isHaploid; // True if there is only one haplotype (e.g. chrY) int infoCount; // Number of components named in FORMAT column - struct vcfInfoElement *infoElements; // Array of info components + struct vcfInfoElement *infoElements; // Array of info components for this genotype call }; struct vcfRecord /* A VCF data row (or list of rows). */ { struct vcfRecord *next; char *chrom; // Reference assembly sequence name unsigned int chromStart; // Start offset in chrom unsigned int chromEnd; // End offset in chrom char *name; // Variant name from ID column int alleleCount; // Number of alleles (reference + alternates) char **alleles; // Alleles: reference first then alternate alleles char *qual; // . or Phred-scaled score, i.e. -10log_10 P(call in ALT is wrong) int filterCount; // Number of ;-separated filter codes in FILTER column char **filters; // Code(s) described in header for failed filters (or PASS or .)