acca3deffc05c4d8d11590a1cf3d893763254712
angie
  Thu Oct 31 13:43:05 2019 -0700
dbSnp153: Adding new ucscNotes suggested by Ana Benet: clinvar{Benign,Conflicting,Pathogenic}, rareAll, rareSome.  refs #23283

diff --git src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c
index d8ea261..de03bd2 100644
--- src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c
+++ src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c
@@ -940,30 +940,64 @@
         // There may be nothing to trim because they're usually minimal already and
         // stringIn catches false positive like A/TAAC, and that's fine.
         struct spdiBed *spdiB = spdiBedNewLm(bds->chrom, bds->chromStart, bds->ref, alt, lm);
         struct spdiBed *spdiBTrim = maybeTrimSpdi(spdiB, lm);
 
         maybeExpandRange(spdiBTrim, seqWin, spdiB, NULL, lm);
         if (spdiB->chromStart != bds->chromStart || spdiB->chromEnd != bds->chromEnd)
             errAbort("Range of %s (%s|%d|%d ref='%s', alt='%s') "
                      "could be expanded to %s|%d|%d\n",
                      bds->name, bds->chrom, bds->chromStart, bds->chromEnd, bds->ref, alt,
                      bds->chrom, spdiB->chromStart, spdiB->chromEnd);
         }
     }
 }
 
+static void addClinVarSigs(struct dyString *dyUcscNotes, struct sharedProps *props)
+/* If clinVarSigs indicate benign, pathogenic, or both (conflicting), add ucscNote. */
+{
+boolean isBenign = FALSE, isPathogenic = FALSE;
+struct slName *sig;
+for (sig = props->clinVarSigs;  sig != NULL;  sig = sig->next)
+    {
+    if (sameString(sig->name, "likely-benign") ||
+             sameString(sig->name, "benign") ||
+             sameString(sig->name, "benign-likely-benign"))
+        {
+        isBenign = TRUE;
+        }
+    else if (sameString(sig->name, "pathogenic") ||
+             sameString(sig->name, "likely-pathogenic") ||
+             sameString(sig->name, "pathogenic-likely-pathogenic"))
+        {
+        isPathogenic = TRUE;
+        }
+    else if (sameString(sig->name, "conflicting-interpretations-of-pathogenicity"))
+        {
+        isBenign = TRUE;
+        isPathogenic = TRUE;
+        break;
+        }
+    }
+if (isBenign && isPathogenic)
+    dyStringAppend(dyUcscNotes, bdsClinvarConflicting ",");
+else if (isBenign)
+    dyStringAppend(dyUcscNotes, bdsClinvarBenign ",");
+else if (isPathogenic)
+    dyStringAppend(dyUcscNotes, bdsClinvarPathogenic ",");
+}
+
 static boolean delMismatchesGenome(struct bigDbSnp *bds, struct seqWindow *seqWin)
 /* Return TRUE if bds->ref (spdi del) does not match assembly sequence at bds coords.
  * Sometimes the genome has an N and dbSNP has a more specific IUPAC character.
  * errAbort if there's some other kind of inconsistency. */
 {
 int refLen = strlen(bds->ref);
 uint refStart = bds->chromStart;
 uint refEnd = bds->chromEnd;
 if (refLen != refEnd - refStart)
     errAbort("Inconsistent ref and coords for %s: ref is '%s' (%d bases) "
              "but position is %s|%d|%d (%d bases)",
              bds->name, bds->ref, refLen, bds->chrom, refStart, refEnd, refEnd - refStart);
 char genomeRef[refLen+1];
 seqWindowCopy(seqWin, refStart, refLen, genomeRef, sizeof genomeRef);
 if (differentString(genomeRef, bds->ref))
@@ -1022,35 +1056,43 @@
             return TRUE;
         }
     }
 return FALSE;
 }
 
 static void addUcscNotes(struct sharedProps *props, boolean isRc, boolean isMultiMapper,
                          struct bigDbSnp *bds, struct seqWindow *seqWin,
                          struct dyString *dyUcscNotes, struct lm *lm)
 /* Record interesting conditions if applicable. */
 {
 if (props->class != bigDbSnpClassFromAlleles(bds))
     dyStringAppend(dyUcscNotes, bdsClassMismatch ",");
 if (props->clinVarAccs != NULL)
     dyStringAppend(dyUcscNotes, bdsClinvar ",");
+addClinVarSigs(dyUcscNotes, props);
 if (props->commonCount > 0)
     {
     dyStringAppend(dyUcscNotes, bdsCommonSome ",");
     if (props->rareCount == 0)
         dyStringAppend(dyUcscNotes, bdsCommonAll ",");
+    else
+        dyStringAppend(dyUcscNotes, bdsRareSome ",");
+    }
+else if (props->rareCount > 0 || props->freqSourceCount == 0)
+    {
+    dyStringAppend(dyUcscNotes, bdsRareSome ",");
+    dyStringAppend(dyUcscNotes, bdsRareAll ",");
     }
 if (isRc)
     dyStringAppend(dyUcscNotes, bdsRevStrand ",");
 if (isMultiMapper)
     dyStringAppend(dyUcscNotes, bdsMultiMap ",");
 checkRareRef(props, bds->ref, isRc, dyUcscNotes);
 if (delMismatchesGenome(bds, seqWin))
     dyStringAppend(dyUcscNotes, bdsRefMismatch ",");
 if (anyIupac(bds->ref))
     dyStringAppend(dyUcscNotes, bdsRefIsAmbiguous ",");
 int i;
 for (i = 0;  i < bds->altCount;  i++)
     if (anyIupac(bds->alts[i]))
         dyStringAppend(dyUcscNotes, bdsAltIsAmbiguous ",");
 if (hasAmbiguousFreqAllele(props))