b280493ca2969a1de7f3dd8322f49fac095e77ac
angie
  Tue Mar 12 16:40:33 2024 -0700
Call stripOldStudyVersions before addMissingRefAllele instead of after to avoid erroneous output when re-running on dbSnp 153 JSON.  refs #33070
With this change, addMissingRefAllele is looking only at the latest version, and its added item does not get stripped back out due to having a default version of 0.  Stripping the added ref allele back out caused downstream trouble in sortFrequencies; allele frequencies were lost, and then the Common subset included no indels (the variants that were missing ref alleles) for 153.

diff --git src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c
index 2d051b0..d477ced 100644
--- src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c
+++ src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c
@@ -367,30 +367,31 @@
         }
     if (!gotRef)
         {
         struct spdiBed *firstSpdiB = props->freqSourceSpdis[ix];
         struct alObs *firstObs = props->freqSourceObs[ix];
         if (sumCounts > firstObs->totalCount)
             errAbort("addMissingRefAllele: %s, source %d, total_count is %d < sum of non-ref "
                      "counts %d", rsId, ix, firstObs->totalCount, sumCounts);
         struct spdiBed *spdiB = spdiBedNewLm(firstSpdiB->chrom, firstSpdiB->chromStart,
                                              firstSpdiB->del, firstSpdiB->del, lm);
         struct alObs *obs;
         lmAllocVar(lm, obs);
         obs->allele = spdiB->del;
         obs->obsCount = firstObs->totalCount - sumCounts;
         obs->totalCount = firstObs->totalCount;
+        obs->studyVersion = firstObs->studyVersion;
         slAddHead(&props->freqSourceSpdis[ix], spdiB);
         slAddHead(&props->freqSourceObs[ix], obs);
         }
     }
 }
 
 static void stripOldStudyVersions(struct sharedProps *props, char *rsId)
 /* After frequency allele observations have been sorted into per-project lists,
  * make sure the alleles look like [ACGT]+ and the reported total_counts are consistent.
  * The sum of obsCounts may be less than total_count (no-calls), but not greater. */
 {
 int ix;
 for (ix = 0;  ix < props->freqSourceCount;  ix++)
     {
     int maxStudyVersion = 0;
@@ -520,32 +521,32 @@
         else
             props->freqNotMapped = TRUE;
         }
     if (props->freqNotMapped)
         warn("Frequency report not mapped to own assembly for %s", props->name);
     if (validCount > 0)
         {
         if (!freqSourceOrder)
             props->freqSourceCount = slCount(unorderedSourceList);
         int ix;
         for (ix = 0;  ix < props->freqSourceCount; ix++)
             {
             slReverse(&props->freqSourceSpdis[ix]);
             slReverse(&props->freqSourceObs[ix]);
             }
-        addMissingRefAllele(props, rsId, lm);
         stripOldStudyVersions(props, rsId);
+        addMissingRefAllele(props, rsId, lm);
         checkFreqSourceObs(props, rsId);
         props->biggestSourceIx = biggestSourceIx;
         }
     else
         {
         props->freqSourceCount = 0;
         props->freqSourceSpdis = NULL;
         props->freqSourceObs = NULL;
         props->freqIsRc = NULL;
         }
     }
 }
 
 static struct slInt *soTermStringIdToIdList(struct slName *soTermNames, struct lm *lm)
 /* Given a list of strings like "SO:0001627", convert them to enum soTerm, sort by functional