7a876ed56fe41a491f39e3e1246514bfa3181a40 angie Tue Feb 21 11:56:03 2017 -0800 Tighten up regex for pseudoHgvs protein searches so that gene symbols ending in p followed by a number aren't diced up by the regex. fixes #18887 diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index 27b2d50..a035079 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -155,75 +155,75 @@ // substring numbering: // 0..................................................... whole matching string // 1................ accession and optional dot version // 2..... optional dot version // 3..... optional gene symbol in ()s // 4... optional gene symbol // 5... original start AA // 6... 1-based start position // 7................. optional range sep and AA+pos // 8... original end AA // 9... 1-based end position // 10..... change description // Pseudo-HGVS in common usage // Sometimes users give an NM_ accession, but a protein change. -#define pseudoHgvsNMPDotSubstExp "^" versionedRefSeqNMExp "[: p.]+" hgvsAminoAcidSubstExp +#define pseudoHgvsNMPDotSubstExp "^" versionedRefSeqNMExp "[ :]+p?\\.?" hgvsAminoAcidSubstExp // substring numbering: // 0..................................................... whole matching string // 1............... acc & optional dot version // 2........ optional dot version // 3..... optional gene sym in ()s // 4... optional gene symbol // 5..... original sequence // 6...... 1-based position // 7...... replacement sequence -#define pseudoHgvsNMPDotRangeExp "^" versionedRefSeqNMExp "[: p.]+" hgvsAaRangeExp +#define pseudoHgvsNMPDotRangeExp "^" versionedRefSeqNMExp "[ :]+p?\\.?" hgvsAaRangeExp // substring numbering: // 0..................................................... whole matching string // 1............... acc & optional dot version // 2........ optional dot version // 3..... optional gene sym in ()s // 4... optional gene symbol // 5... original start AA // 6... 1-based start position // 7.......... optional range sep and AA+pos // 8... original end AA // 9... 1-based end position // 10.... change description // Common: gene symbol followed by space and/or punctuation followed by protein change -#define pseudoHgvsGeneSymbolProtSubstExp "^" geneSymbolExp "[: p.]+" hgvsAminoAcidSubstExp +#define pseudoHgvsGeneSymbolProtSubstExp "^" geneSymbolExp "[ :]+p?\\.?" hgvsAminoAcidSubstExp // 0..................................................... whole matching string // 1................... gene symbol // 2..... original sequence // 3...... 1-based position // 4...... replacement sequence -#define pseudoHgvsGeneSymbolProtRangeExp "^" geneSymbolExp "[: p.]+" hgvsAaRangeExp +#define pseudoHgvsGeneSymbolProtRangeExp "^" geneSymbolExp "[ :]+p?\\.?" hgvsAaRangeExp // 0..................................................... whole matching string // 1................... gene symbol // 2... original start AA // 3... 1-based start position // 4................ optional range sep and AA+pos // 5... original end AA // 6... 1-based end position // 7..... change description // As above but omitting the protein change -#define pseudoHgvsGeneSymbolProtPosExp "^" geneSymbolExp "[: p.]+" posIntExp +#define pseudoHgvsGeneSymbolProtPosExp "^" geneSymbolExp "[ :]+p?\\.?" posIntExp // 0.......................... whole matching string // 1................... gene symbol // 2..... 1-based position // Gene symbol, maybe punctuation, and a clear "c." position (and possibly change) #define pseudoHgvsGeneSympolCDotPosExp "^" geneSymbolExp "[: ]+" hgvsCDotPosExp // 0..................................................... whole matching string // 1................... gene symbol // 2..... optional beginning of position exp // 3..... beginning of position exp static struct hgvsVariant *hgvsParseGDotPos(char *term) /* If term is parseable as an HGVS NC_...g. term, return the parsed representation, else NULL. */