6da76acba7f617920cc7eaa161b7d987362be78f markd Wed Feb 14 11:29:46 2018 -0800 hide patent GenBank mRNA sequences by default based on accession prefix diff --git src/hg/lib/genbank.c src/hg/lib/genbank.c index 553c1f8..149efee 100644 --- src/hg/lib/genbank.c +++ src/hg/lib/genbank.c @@ -387,15 +387,57 @@ if (geneStrand == '+') { genomeCds.startComplete = startComplete; genomeCds.endComplete = endComplete; } else { genomeCds.startComplete = endComplete; genomeCds.endComplete = startComplete; } if (psl->strand[1] == '-') reverseIntRange(&genomeCds.start, &genomeCds.end, psl->tSize); return genomeCds; } +/* Accession prefixes indicating patent sequences, taken from: + * https://www.ncbi.nlm.nih.gov/Sequin/acc.html + */ +static char *genbankPatentPrefixes[] = +{ + "E", "BD", "DD", "DI", "DJ", "DL", "DM", "FU", "FV", "FW", "FZ", "GB", + "HV", "HW", "HZ", "LF", "LG", "LV", "LX", "LY", "LZ", "MA", "MB", "A", + "AX", "CQ", "CS", "FB", "GM", "GN", "HA", "HB", "HC", "HD", "HH", "HI", + "JA", "JB", "JC", "JD", "JE", "LP", "LQ", "MP", "MQ", "MR", "MS", "I", + "AR", "DZ", "EA", "GC", "GP", "GV", "GX", "GY", "GZ", "HJ", "HK", "HL", + "KH", "MI", NULL +}; + +static struct hash *genbankPatentPrefixesHash = NULL; + +static void makeGenbenkPatentPrefixHash(void) +/* build hash of genbank accession prefixes on first uses */ +{ +int i; +genbankPatentPrefixesHash = hashNew(0); +for (i = 0; genbankPatentPrefixes[i] != NULL; i++) + hashAddInt(genbankPatentPrefixesHash, genbankPatentPrefixes[i], TRUE); +} + +boolean isGenbenkPatentAccession(char *acc) +/* Is this an accession prefix allocated to patent sequences. */ +{ +if (genbankPatentPrefixesHash == NULL) + makeGenbenkPatentPrefixHash(); + +if (strlen(acc) >= GENBANK_ACC_BUFSZ) + return FALSE; // too big, shouldn't happen + +// drop numeric part +char accbuf[GENBANK_ACC_BUFSZ]; +safecpy(accbuf, sizeof(accbuf), acc); +char *numPtr = skipToNumeric(accbuf); +if (numPtr == NULL) + return FALSE; // doesn't look like genbank acc +*numPtr = '\0'; +return hashLookup(genbankPatentPrefixesHash, accbuf) != NULL; +}