5c8f8925eb335bb375595e839368c3f7594ea7e4 angie Mon Mar 16 12:24:10 2020 -0700 Libify code that counts genotypes correctly from vcfClick.c and use it in vcfTrack.c. refs #24623, fixes #25165 diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index 7f85d50..2e11288 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -312,128 +312,77 @@ } printf("</TD>"); } puts("</TR>"); } hTableEnd(); jsEndCollapsibleSection(); } static void ignoreEm(char *format, va_list args) /* Ignore warnings from genotype parsing -- when there's one, there * are usually hundreds more just like it. */ { } -static int genotypeIndex(int h0Ix, int h1Ix) -/* Return the index in a linear array of distinct genotypes, given two 0-based allele indexes. - * This follows the following convention used by GnomAD (GATK?), that has the advantage that - * gt indexes of small numbers don't change as the number of alleles increases, and also matches - * the ref/ref, ref/alt, alt/alt convention for biallelic variants: - * 0/0, - * 0/1, 1/1, - * 0/2, 1/2, 2/2, - * 0/3, 1/3, 2/3, 3/3, - * ... */ -{ -// Note that in that scheme, h0Ix <= h1Ix; if h0Ix > h1Ix, swap them. -if (h0Ix > h1Ix) - { - int tmp = h0Ix; - h0Ix = h1Ix; - h1Ix = tmp; - } -return (h1Ix * (h1Ix+1) / 2) + h0Ix; -} - -static int genotypeArraySize(int alleleCount) -/* Return the number of distinct genotypes given the number of alleles. That is the same as the - * array index of the first genotype that would follow for alleleCount+1. */ -{ -return genotypeIndex(0, alleleCount + 1); -} - static void vcfGenotypesDetails(struct vcfRecord *rec, struct trackDb *tdb, char **displayAls) /* Print summary of allele and genotype frequency, plus collapsible section * with table of genotype details. */ { struct vcfFile *vcff = rec->file; if (vcff->genotypeCount == 0) return; // Wrapper table for collapsible section: puts("<TABLE>"); pushWarnHandler(ignoreEm); vcfParseGenotypes(rec); popWarnHandler(); -// Tally genotypes and alleles for summary, adding 1 to rec->alleleCount to represent missing data: -int alCounts[rec->alleleCount + 1]; -zeroBytes(alCounts, sizeof alCounts); -int numGenotypes = genotypeArraySize(rec->alleleCount + 1); -int gtCounts[numGenotypes]; -zeroBytes(gtCounts, sizeof gtCounts); +int *gtCounts = NULL, *alCounts = NULL;; int phasedGts = 0, diploidCount = 0; -int i; -for (i = 0; i < vcff->genotypeCount; i++) - { - struct vcfGenotype *gt = &(rec->genotypes[i]); - if (gt->isPhased) - phasedGts++; - int hapIxA = gt->hapIxA; - if (hapIxA < 0) - hapIxA = rec->alleleCount; - alCounts[hapIxA]++; - if (!gt->isHaploid) - { - diploidCount++; - int hapIxB = gt->hapIxB; - if (hapIxB < 0) - hapIxB = rec->alleleCount; - alCounts[hapIxB]++; - gtCounts[genotypeIndex(hapIxA, hapIxB)]++; - } - } +vcfCountGenotypes(rec, >Counts, &alCounts, &phasedGts, &diploidCount); printf("<B>Genotype count:</B> %d", vcff->genotypeCount); -if (sameString(seqName, "chrY")) +if (diploidCount == 0) printf(" (haploid)"); -else if (sameString(seqName, "chrX")) +else if (diploidCount != vcff->genotypeCount) printf(" (%d phased, %d diploid, %d haploid)", phasedGts, diploidCount, vcff->genotypeCount - diploidCount); else printf(" (%d phased)", phasedGts); puts("<BR>"); int totalAlleles = vcff->genotypeCount + diploidCount; double refAf = (double)alCounts[0]/totalAlleles; printf("<B>Alleles:</B> %s: %d (%.3f%%)", displayAls[0], alCounts[0], 100*refAf); +int i; for (i = 1; i < rec->alleleCount; i++) { double altAf = (double)alCounts[i]/totalAlleles; printf("; %s: %d (%.3f%%)", displayAls[i], alCounts[i], 100*altAf); } if (alCounts[rec->alleleCount] > 0) printf("; unknown: %d (%.3f%%)", alCounts[rec->alleleCount], 100 * (double)alCounts[rec->alleleCount]/totalAlleles); puts("<BR>"); if (vcff->genotypeCount > 1 && diploidCount > 0) { printf("<B>Genotypes:</B> %s/%s: %d (%.3f%%)", displayAls[0], displayAls[0], gtCounts[0], 100*(double)gtCounts[0]/diploidCount); for (i = 1; i < rec->alleleCount + 1; i++) { int j; for (j = 0; j <= i; j++) { - int gtIx = genotypeIndex(j, i); + int gtIx = vcfGenotypeIndex(j, i); if (gtCounts[gtIx] > 0) { char *alJ = (j == rec->alleleCount) ? "?" : displayAls[j]; char *alI = (i == rec->alleleCount) ? "?" : displayAls[i]; printf("; %s/%s: %d (%.3f%%)", alJ, alI, gtCounts[gtIx], 100*(double)gtCounts[gtIx]/diploidCount); } } } printf("<BR>\n"); if (rec->alleleCount == 2) { boolean showHW = cartOrTdbBoolean(cart, tdb, VCF_SHOW_HW_VAR, FALSE); if (showHW) {