acca3deffc05c4d8d11590a1cf3d893763254712 angie Thu Oct 31 13:43:05 2019 -0700 dbSnp153: Adding new ucscNotes suggested by Ana Benet: clinvar{Benign,Conflicting,Pathogenic}, rareAll, rareSome. refs #23283 diff --git src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c index d8ea261..de03bd2 100644 --- src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c +++ src/hg/snp/dbSnpJsonToTab/dbSnpJsonToTab.c @@ -940,30 +940,64 @@ // There may be nothing to trim because they're usually minimal already and // stringIn catches false positive like A/TAAC, and that's fine. struct spdiBed *spdiB = spdiBedNewLm(bds->chrom, bds->chromStart, bds->ref, alt, lm); struct spdiBed *spdiBTrim = maybeTrimSpdi(spdiB, lm); maybeExpandRange(spdiBTrim, seqWin, spdiB, NULL, lm); if (spdiB->chromStart != bds->chromStart || spdiB->chromEnd != bds->chromEnd) errAbort("Range of %s (%s|%d|%d ref='%s', alt='%s') " "could be expanded to %s|%d|%d\n", bds->name, bds->chrom, bds->chromStart, bds->chromEnd, bds->ref, alt, bds->chrom, spdiB->chromStart, spdiB->chromEnd); } } } +static void addClinVarSigs(struct dyString *dyUcscNotes, struct sharedProps *props) +/* If clinVarSigs indicate benign, pathogenic, or both (conflicting), add ucscNote. */ +{ +boolean isBenign = FALSE, isPathogenic = FALSE; +struct slName *sig; +for (sig = props->clinVarSigs; sig != NULL; sig = sig->next) + { + if (sameString(sig->name, "likely-benign") || + sameString(sig->name, "benign") || + sameString(sig->name, "benign-likely-benign")) + { + isBenign = TRUE; + } + else if (sameString(sig->name, "pathogenic") || + sameString(sig->name, "likely-pathogenic") || + sameString(sig->name, "pathogenic-likely-pathogenic")) + { + isPathogenic = TRUE; + } + else if (sameString(sig->name, "conflicting-interpretations-of-pathogenicity")) + { + isBenign = TRUE; + isPathogenic = TRUE; + break; + } + } +if (isBenign && isPathogenic) + dyStringAppend(dyUcscNotes, bdsClinvarConflicting ","); +else if (isBenign) + dyStringAppend(dyUcscNotes, bdsClinvarBenign ","); +else if (isPathogenic) + dyStringAppend(dyUcscNotes, bdsClinvarPathogenic ","); +} + static boolean delMismatchesGenome(struct bigDbSnp *bds, struct seqWindow *seqWin) /* Return TRUE if bds->ref (spdi del) does not match assembly sequence at bds coords. * Sometimes the genome has an N and dbSNP has a more specific IUPAC character. * errAbort if there's some other kind of inconsistency. */ { int refLen = strlen(bds->ref); uint refStart = bds->chromStart; uint refEnd = bds->chromEnd; if (refLen != refEnd - refStart) errAbort("Inconsistent ref and coords for %s: ref is '%s' (%d bases) " "but position is %s|%d|%d (%d bases)", bds->name, bds->ref, refLen, bds->chrom, refStart, refEnd, refEnd - refStart); char genomeRef[refLen+1]; seqWindowCopy(seqWin, refStart, refLen, genomeRef, sizeof genomeRef); if (differentString(genomeRef, bds->ref)) @@ -1022,35 +1056,43 @@ return TRUE; } } return FALSE; } static void addUcscNotes(struct sharedProps *props, boolean isRc, boolean isMultiMapper, struct bigDbSnp *bds, struct seqWindow *seqWin, struct dyString *dyUcscNotes, struct lm *lm) /* Record interesting conditions if applicable. */ { if (props->class != bigDbSnpClassFromAlleles(bds)) dyStringAppend(dyUcscNotes, bdsClassMismatch ","); if (props->clinVarAccs != NULL) dyStringAppend(dyUcscNotes, bdsClinvar ","); +addClinVarSigs(dyUcscNotes, props); if (props->commonCount > 0) { dyStringAppend(dyUcscNotes, bdsCommonSome ","); if (props->rareCount == 0) dyStringAppend(dyUcscNotes, bdsCommonAll ","); + else + dyStringAppend(dyUcscNotes, bdsRareSome ","); + } +else if (props->rareCount > 0 || props->freqSourceCount == 0) + { + dyStringAppend(dyUcscNotes, bdsRareSome ","); + dyStringAppend(dyUcscNotes, bdsRareAll ","); } if (isRc) dyStringAppend(dyUcscNotes, bdsRevStrand ","); if (isMultiMapper) dyStringAppend(dyUcscNotes, bdsMultiMap ","); checkRareRef(props, bds->ref, isRc, dyUcscNotes); if (delMismatchesGenome(bds, seqWin)) dyStringAppend(dyUcscNotes, bdsRefMismatch ","); if (anyIupac(bds->ref)) dyStringAppend(dyUcscNotes, bdsRefIsAmbiguous ","); int i; for (i = 0; i < bds->altCount; i++) if (anyIupac(bds->alts[i])) dyStringAppend(dyUcscNotes, bdsAltIsAmbiguous ","); if (hasAmbiguousFreqAllele(props))