5c8f8925eb335bb375595e839368c3f7594ea7e4
angie
Mon Mar 16 12:24:10 2020 -0700
Libify code that counts genotypes correctly from vcfClick.c and use it in vcfTrack.c. refs #24623, fixes #25165
diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index 7f85d50..2e11288 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -312,128 +312,77 @@
}
printf("");
}
puts("");
}
hTableEnd();
jsEndCollapsibleSection();
}
static void ignoreEm(char *format, va_list args)
/* Ignore warnings from genotype parsing -- when there's one, there
* are usually hundreds more just like it. */
{
}
-static int genotypeIndex(int h0Ix, int h1Ix)
-/* Return the index in a linear array of distinct genotypes, given two 0-based allele indexes.
- * This follows the following convention used by GnomAD (GATK?), that has the advantage that
- * gt indexes of small numbers don't change as the number of alleles increases, and also matches
- * the ref/ref, ref/alt, alt/alt convention for biallelic variants:
- * 0/0,
- * 0/1, 1/1,
- * 0/2, 1/2, 2/2,
- * 0/3, 1/3, 2/3, 3/3,
- * ... */
-{
-// Note that in that scheme, h0Ix <= h1Ix; if h0Ix > h1Ix, swap them.
-if (h0Ix > h1Ix)
- {
- int tmp = h0Ix;
- h0Ix = h1Ix;
- h1Ix = tmp;
- }
-return (h1Ix * (h1Ix+1) / 2) + h0Ix;
-}
-
-static int genotypeArraySize(int alleleCount)
-/* Return the number of distinct genotypes given the number of alleles. That is the same as the
- * array index of the first genotype that would follow for alleleCount+1. */
-{
-return genotypeIndex(0, alleleCount + 1);
-}
-
static void vcfGenotypesDetails(struct vcfRecord *rec, struct trackDb *tdb, char **displayAls)
/* Print summary of allele and genotype frequency, plus collapsible section
* with table of genotype details. */
{
struct vcfFile *vcff = rec->file;
if (vcff->genotypeCount == 0)
return;
// Wrapper table for collapsible section:
puts("
");
pushWarnHandler(ignoreEm);
vcfParseGenotypes(rec);
popWarnHandler();
-// Tally genotypes and alleles for summary, adding 1 to rec->alleleCount to represent missing data:
-int alCounts[rec->alleleCount + 1];
-zeroBytes(alCounts, sizeof alCounts);
-int numGenotypes = genotypeArraySize(rec->alleleCount + 1);
-int gtCounts[numGenotypes];
-zeroBytes(gtCounts, sizeof gtCounts);
+int *gtCounts = NULL, *alCounts = NULL;;
int phasedGts = 0, diploidCount = 0;
-int i;
-for (i = 0; i < vcff->genotypeCount; i++)
- {
- struct vcfGenotype *gt = &(rec->genotypes[i]);
- if (gt->isPhased)
- phasedGts++;
- int hapIxA = gt->hapIxA;
- if (hapIxA < 0)
- hapIxA = rec->alleleCount;
- alCounts[hapIxA]++;
- if (!gt->isHaploid)
- {
- diploidCount++;
- int hapIxB = gt->hapIxB;
- if (hapIxB < 0)
- hapIxB = rec->alleleCount;
- alCounts[hapIxB]++;
- gtCounts[genotypeIndex(hapIxA, hapIxB)]++;
- }
- }
+vcfCountGenotypes(rec, >Counts, &alCounts, &phasedGts, &diploidCount);
printf("Genotype count: %d", vcff->genotypeCount);
-if (sameString(seqName, "chrY"))
+if (diploidCount == 0)
printf(" (haploid)");
-else if (sameString(seqName, "chrX"))
+else if (diploidCount != vcff->genotypeCount)
printf(" (%d phased, %d diploid, %d haploid)", phasedGts, diploidCount,
vcff->genotypeCount - diploidCount);
else
printf(" (%d phased)", phasedGts);
puts("
");
int totalAlleles = vcff->genotypeCount + diploidCount;
double refAf = (double)alCounts[0]/totalAlleles;
printf("Alleles: %s: %d (%.3f%%)", displayAls[0], alCounts[0], 100*refAf);
+int i;
for (i = 1; i < rec->alleleCount; i++)
{
double altAf = (double)alCounts[i]/totalAlleles;
printf("; %s: %d (%.3f%%)", displayAls[i], alCounts[i], 100*altAf);
}
if (alCounts[rec->alleleCount] > 0)
printf("; unknown: %d (%.3f%%)", alCounts[rec->alleleCount],
100 * (double)alCounts[rec->alleleCount]/totalAlleles);
puts("
");
if (vcff->genotypeCount > 1 && diploidCount > 0)
{
printf("Genotypes: %s/%s: %d (%.3f%%)",
displayAls[0], displayAls[0], gtCounts[0], 100*(double)gtCounts[0]/diploidCount);
for (i = 1; i < rec->alleleCount + 1; i++)
{
int j;
for (j = 0; j <= i; j++)
{
- int gtIx = genotypeIndex(j, i);
+ int gtIx = vcfGenotypeIndex(j, i);
if (gtCounts[gtIx] > 0)
{
char *alJ = (j == rec->alleleCount) ? "?" : displayAls[j];
char *alI = (i == rec->alleleCount) ? "?" : displayAls[i];
printf("; %s/%s: %d (%.3f%%)", alJ, alI, gtCounts[gtIx],
100*(double)gtCounts[gtIx]/diploidCount);
}
}
}
printf("
\n");
if (rec->alleleCount == 2)
{
boolean showHW = cartOrTdbBoolean(cart, tdb, VCF_SHOW_HW_VAR, FALSE);
if (showHW)
{