7a876ed56fe41a491f39e3e1246514bfa3181a40
angie
  Tue Feb 21 11:56:03 2017 -0800
Tighten up regex for pseudoHgvs protein searches so that gene symbols ending in p followed by a number aren't diced up by the regex.  fixes #18887

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index 27b2d50..a035079 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -155,75 +155,75 @@
 // substring numbering:
 //      0.....................................................  whole matching string
 //      1................                                       accession and optional dot version
 //                 2.....                                       optional dot version
 //                         3.....                               optional gene symbol in ()s
 //                          4...                                optional gene symbol
 //                                 5...                         original start AA
 //                                       6...                   1-based start position
 //                                          7.................  optional range sep and AA+pos
 //                                            8...              original end AA
 //                                                 9...         1-based end position
 //                                                     10.....  change description
 
 // Pseudo-HGVS in common usage
 // Sometimes users give an NM_ accession, but a protein change.
-#define pseudoHgvsNMPDotSubstExp "^" versionedRefSeqNMExp "[: p.]+" hgvsAminoAcidSubstExp
+#define pseudoHgvsNMPDotSubstExp "^" versionedRefSeqNMExp "[ :]+p?\\.?" hgvsAminoAcidSubstExp
 // substring numbering:
 //      0.....................................................  whole matching string
 //      1...............                                        acc & optional dot version
 //             2........                                        optional dot version
 //                       3.....                                 optional gene sym in ()s
 //                        4...                                  optional gene symbol
 //                                   5.....                     original sequence
 //                                           6......            1-based position
 //                                                     7......  replacement sequence
 
-#define pseudoHgvsNMPDotRangeExp "^" versionedRefSeqNMExp "[: p.]+" hgvsAaRangeExp
+#define pseudoHgvsNMPDotRangeExp "^" versionedRefSeqNMExp "[ :]+p?\\.?" hgvsAaRangeExp
 // substring numbering:
 //      0.....................................................  whole matching string
 //      1...............                                        acc & optional dot version
 //             2........                                        optional dot version
 //                       3.....                                 optional gene sym in ()s
 //                        4...                                  optional gene symbol
 //                                 5...                         original start AA
 //                                       6...                   1-based start position
 //                                           7..........        optional range sep and AA+pos
 //                                             8...             original end AA
 //                                                  9...        1-based end position
 //                                                      10....  change description
 
 // Common: gene symbol followed by space and/or punctuation followed by protein change
-#define pseudoHgvsGeneSymbolProtSubstExp "^" geneSymbolExp "[: p.]+" hgvsAminoAcidSubstExp
+#define pseudoHgvsGeneSymbolProtSubstExp "^" geneSymbolExp "[ :]+p?\\.?" hgvsAminoAcidSubstExp
 //      0.....................................................  whole matching string
 //      1...................                                    gene symbol
 //                                   2.....                     original sequence
 //                                           3......            1-based position
 //                                                     4......  replacement sequence
 
-#define pseudoHgvsGeneSymbolProtRangeExp "^" geneSymbolExp "[: p.]+" hgvsAaRangeExp
+#define pseudoHgvsGeneSymbolProtRangeExp "^" geneSymbolExp "[ :]+p?\\.?" hgvsAaRangeExp
 //      0.....................................................  whole matching string
 //      1...................                                    gene symbol
 //                                 2...                         original start AA
 //                                       3...                   1-based start position
 //                                           4................  optional range sep and AA+pos
 //                                             5...             original end AA
 //                                                  6...         1-based end position
 //                                                       7.....  change description
 
 // As above but omitting the protein change
-#define pseudoHgvsGeneSymbolProtPosExp "^" geneSymbolExp "[: p.]+" posIntExp
+#define pseudoHgvsGeneSymbolProtPosExp "^" geneSymbolExp "[ :]+p?\\.?" posIntExp
 //      0..........................                             whole matching string
 //      1...................                                    gene symbol
 //                           2.....                             1-based position
 
 
 // Gene symbol, maybe punctuation, and a clear "c." position (and possibly change)
 #define pseudoHgvsGeneSympolCDotPosExp "^" geneSymbolExp "[: ]+" hgvsCDotPosExp
 //      0.....................................................  whole matching string
 //      1...................                                    gene symbol
 //                           2.....                             optional beginning of position exp
 //                                   3.....                     beginning of position exp
 
 
 static struct hgvsVariant *hgvsParseGDotPos(char *term)
 /* If term is parseable as an HGVS NC_...g. term, return the parsed representation, else NULL. */