467f4d3064ca7f047614ce75ebc8eb48b156ae4d angie Mon Dec 11 15:36:25 2017 -0800 Support NT_ and NW_ accessions in genomic HGVS terms (not just NC; some alts are NT/NW). diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index 0557301..fd6ad07 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -21,43 +21,43 @@ if (pHgvs && *pHgvs) { struct hgvsVariant *hgvs = *pHgvs; freez(&hgvs->seqAcc); freez(&hgvs->seqGeneSymbol); freez(&hgvs->changes); freez(pHgvs); } } // Regular expressions for HGVS-recognized sequence accessions: LRG or versioned RefSeq: #define lrgTranscriptExp "(LRG_[0-9]+t[0-9]+)" #define lrgProteinExp "(LRG_[0-9]+p[0-9]+)" #define lrgRegionExp "(LRG_[0-9]+)" -// NC = RefSeq reference assembly chromosome +// NC = RefSeq reference assembly chromosome (NT = contig (e.g. alt), NW = patch) // NG = RefSeq incomplete genomic region (e.g. gene locus) // NM = RefSeq curated mRNA // NP = RefSeq curated protein // NR = RefSeq curated (non-coding) RNA #define geneSymbolExp "([A-Za-z0-9./_-]+)" #define optionalGeneSymbolExp "(\\(" geneSymbolExp "\\))?" #define versionedAccPrefixExp(p) "(" p "_[0-9]+(\\.[0-9]+)?)" optionalGeneSymbolExp // ........................ accession and optional dot version // ........... optional dot version // ...... optional gene symbol in ()s // .... optional gene symbol -#define versionedRefSeqNCExp versionedAccPrefixExp("[NX]C") +#define versionedRefSeqNCExp versionedAccPrefixExp("[NX][CTW]") #define versionedRefSeqNGExp versionedAccPrefixExp("[NX]G") #define versionedRefSeqNMExp versionedAccPrefixExp("[NX]M") #define versionedRefSeqNPExp versionedAccPrefixExp("[NX]P") #define versionedRefSeqNMRExp versionedAccPrefixExp("[NX][MR]") // Nucleotide position regexes // (c. = CDS, g. = genomic, m. = mitochondrial, n.= non-coding RNA, r. = RNA) #define posIntExp "([0-9]+)" #define hgvsGenoPosExp posIntExp "(_" posIntExp ")?" // ...... 1-based start position // ............. optional range separator and end position // ...... 1-based end position // n. terms can have exonic anchor base and intron offset for both start and end: #define offsetExp "([-+])" // c. terms may also have a UTR indicator before the anchor base (- for UTR5, * for UTR3)