aaf72102b545c05c42f66b7a3fc22d65b1ecf4fe angie Mon Aug 8 14:12:39 2016 -0700 Added recognition of a small subset of HGVS terms: coding (c.) SNVs relative to RefSeq NM_ or LRG transcript IDs, and protein (p.) simple substitutions relative to NP_. Also accepted (not HGVS but similar and popular): geneSymbol and abbreviated protein subst like "ALK G1494E". hgFind will map terms to the current genome if possible, and will display warnings about unrecognized accessions, out-of-bounds coordinates and mismatching reference alleles. refs #15071, #15554 diff --git src/lib/dnautil.c src/lib/dnautil.c index e12ad44..6c14a58 100644 --- src/lib/dnautil.c +++ src/lib/dnautil.c @@ -1170,15 +1170,48 @@ void dnaUtilOpen() /* Initialize stuff herein. */ { static boolean opened = FALSE; if (!opened) { checkSizeTypes(); initNtVal(); initAaVal(); initNtChars(); initNtMixedCaseChars(); initNtCompTable(); opened = TRUE; } } + +boolean aaToArbitraryCodon(char aa, char *dest) +/* Reverse-translate aa back into one of its codons, return TRUE if successful. + * Writes 3 characters at the start of dest; does not null-terminate the codon string. */ +{ +int ix; +for (ix = 0; ix < ArraySize(codonTable); ix++) + { + if (toupper(aa) == codonTable[ix].protCode) + { + strncpy(dest, codonTable[ix].codon, 3); + return TRUE; + } + } +return FALSE; +} + +char aaAbbrToLetter(char *abbr) +/* Convert an AA abbreviation such as "Ala", "Asp" etc., to its single letter code + * such as "A", "D" etc. Return the null char '\0' if abbr is not found. */ +{ +// Lowercase for comparison. +char abbrLC[4]; +safencpy(abbrLC, sizeof(abbrLC), abbr, 3); +toLowerN(abbrLC, 3); +int ix; +for (ix = 0; ix < ArraySize(aminoAcidTable); ix++) + { + if (sameStringN(abbrLC, aminoAcidTable[ix].abbreviation, 3)) + return aminoAcidTable[ix].letter; + } +return '\0'; +}