b280493ca2969a1de7f3dd8322f49fac095e77ac angie Tue Mar 12 16:40:33 2024 -0700 Call stripOldStudyVersions before addMissingRefAllele instead of after to avoid erroneous output when re-running on dbSnp 153 JSON. refs #33070 With this change, addMissingRefAllele is looking only at the latest version, and its added item does not get stripped back out due to having a default version of 0. Stripping the added ref allele back out caused downstream trouble in sortFrequencies; allele frequencies were lost, and then the Common subset included no indels (the variants that were missing ref alleles) for 153. diff --git src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c index 2d051b0..d477ced 100644 --- src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c +++ src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c @@ -367,30 +367,31 @@ } if (!gotRef) { struct spdiBed *firstSpdiB = props->freqSourceSpdis[ix]; struct alObs *firstObs = props->freqSourceObs[ix]; if (sumCounts > firstObs->totalCount) errAbort("addMissingRefAllele: %s, source %d, total_count is %d < sum of non-ref " "counts %d", rsId, ix, firstObs->totalCount, sumCounts); struct spdiBed *spdiB = spdiBedNewLm(firstSpdiB->chrom, firstSpdiB->chromStart, firstSpdiB->del, firstSpdiB->del, lm); struct alObs *obs; lmAllocVar(lm, obs); obs->allele = spdiB->del; obs->obsCount = firstObs->totalCount - sumCounts; obs->totalCount = firstObs->totalCount; + obs->studyVersion = firstObs->studyVersion; slAddHead(&props->freqSourceSpdis[ix], spdiB); slAddHead(&props->freqSourceObs[ix], obs); } } } static void stripOldStudyVersions(struct sharedProps *props, char *rsId) /* After frequency allele observations have been sorted into per-project lists, * make sure the alleles look like [ACGT]+ and the reported total_counts are consistent. * The sum of obsCounts may be less than total_count (no-calls), but not greater. */ { int ix; for (ix = 0; ix < props->freqSourceCount; ix++) { int maxStudyVersion = 0; @@ -520,32 +521,32 @@ else props->freqNotMapped = TRUE; } if (props->freqNotMapped) warn("Frequency report not mapped to own assembly for %s", props->name); if (validCount > 0) { if (!freqSourceOrder) props->freqSourceCount = slCount(unorderedSourceList); int ix; for (ix = 0; ix < props->freqSourceCount; ix++) { slReverse(&props->freqSourceSpdis[ix]); slReverse(&props->freqSourceObs[ix]); } - addMissingRefAllele(props, rsId, lm); stripOldStudyVersions(props, rsId); + addMissingRefAllele(props, rsId, lm); checkFreqSourceObs(props, rsId); props->biggestSourceIx = biggestSourceIx; } else { props->freqSourceCount = 0; props->freqSourceSpdis = NULL; props->freqSourceObs = NULL; props->freqIsRc = NULL; } } } static struct slInt *soTermStringIdToIdList(struct slName *soTermNames, struct lm *lm) /* Given a list of strings like "SO:0001627", convert them to enum soTerm, sort by functional