5c8f8925eb335bb375595e839368c3f7594ea7e4 angie Mon Mar 16 12:24:10 2020 -0700 Libify code that counts genotypes correctly from vcfClick.c and use it in vcfTrack.c. refs #24623, fixes #25165 diff --git src/inc/vcf.h src/inc/vcf.h index cc9d121..d2083d3 100644 --- src/inc/vcf.h +++ src/inc/vcf.h @@ -297,30 +297,45 @@ void vcfParseGenotypes(struct vcfRecord *record); /* Translate record->genotypesUnparsedStrings[] into proper struct vcfGenotype[]. * This destroys genotypesUnparsedStrings. */ const struct vcfGenotype *vcfRecordFindGenotype(struct vcfRecord *record, char *sampleId); /* Find the genotype and associated info for the individual, or return NULL. * This calls vcfParseGenotypes if it has not already been called. */ struct vcfInfoDef *vcfInfoDefForGtKey(struct vcfFile *vcff, const char *key); /* Look up the type of genotype FORMAT component key, in the definitions from the header, * and failing that, from the keys reserved in the spec. */ const struct vcfInfoElement *vcfGenotypeFindInfo(const struct vcfGenotype *gt, char *key); /* Find the genotype infoElement for key, or return NULL. */ +int vcfGenotypeIndex(int h0Ix, int h1Ix); +/* Return the index in a linear array of distinct genotypes, given two 0-based allele indexes. + * This follows the following convention used by GnomAD (GATK?), that has the advantage that + * gt indexes of small numbers don't change as the number of alleles increases, and also matches + * the ref/ref, ref/alt, alt/alt convention for biallelic variants: + * 0/0, + * 0/1, 1/1, + * 0/2, 1/2, 2/2, + * 0/3, 1/3, 2/3, 3/3, + * ... */ + +void vcfCountGenotypes(struct vcfRecord *rec, int **retGtCounts, int **retAlleleCounts, + int *retPhasedCount, int *retDiploidCount); +/* Tally genotypes and alleles for summary, adding 1 to rec->alleleCount to represent missing data */ + char *vcfFilePooledStr(struct vcfFile *vcff, char *str); /* Allocate memory for a string from vcff's shared string pool. */ #define VCF_NUM_COLS 10 struct asObject *vcfAsObj(); // Return asObject describing fields of VCF char *vcfGetSlashSepAllelesFromWords(char **words, struct dyString *dy); /* Overwrite dy with a /-separated allele string from VCF words, * skipping the extra initial base that VCF requires for indel alleles if necessary. * Return dy->string for convenience. */ void vcfRecordWriteNoGt(FILE *f, struct vcfRecord *rec); /* Write the first 8 columns of VCF rec to f. Genotype data will be ignored if present. */