5f7b28612df1ff4e29c31020b33f8fee9c097b11 chmalee Tue Oct 10 12:35:00 2023 -0700 Check for refseq historical old table existence before querying them for hgvs terms, refs #26016 diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index 415becd..883da03 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -728,31 +728,31 @@ nmAcc); } else if (hTableExists(db, "refGene")) { // Trim .version if present since our genbank tables don't use versioned names. char *trimmed = cloneFirstWordByDelimiter(nmAcc, '.'); sqlSafef(query, sizeof(query), "select l.protAcc from %s l, refGene r " "where r.name = '%s' and l.mrnaAcc = r.name " "and l.protAcc != '' order by length(l.protAcc), l.protAcc", refLinkTable, trimmed); } else return NULL; struct sqlConnection *conn = hAllocConn(db); char *npAcc = sqlQuickString(conn, query); // if user passed in old versioned transcript, check in *Old tables: -if (npAcc == NULL && hDbHasNcbiRefSeq(db) && strchr(nmAcc, '.')) +if (npAcc == NULL && hDbHasNcbiRefSeqOld(db) && strchr(nmAcc, '.')) { sqlSafef(query, sizeof(query), "select protAcc from ncbiRefSeqLinkOld where id = '%s'", nmAcc); npAcc = sqlQuickString(conn, query); } hFreeConn(&conn); return npAcc; } static char *lrgProteinToTx(char *db, char *protAcc) /* Return the LRG_ transcript accession for protAcc. Each LRG_NpM has a corresponding LRG_NtM. */ { int accLen = strlen(protAcc); char txAcc[accLen+1]; safecpy(txAcc, sizeof(txAcc), protAcc); char *p = strrchr(txAcc, 'p'); @@ -857,31 +857,31 @@ struct sqlConnection *conn = hAllocConn(db); seq = sqlQuickString(conn, query); hFreeConn(&conn); freeMem(txAcc); } } else { if (hDbHasNcbiRefSeq(db)) { char query[2048]; sqlSafef(query, sizeof(query), "select seq from ncbiRefSeqPepTable " "where name = '%s'", acc); struct sqlConnection *conn = hAllocConn(db); seq = sqlQuickString(conn, query); - if (seq == NULL) + if (seq == NULL && hDbHasNcbiRefSeqOld(db)) { sqlSafef(query, sizeof(query), "select seq from ncbiRefSeqPepTableOld " "where name = '%s'", acc); seq = sqlQuickString(conn, query); } hFreeConn(&conn); } else { aaSeq *aaSeq = hGenBankGetPep(db, acc, NULL); if (aaSeq) seq = aaSeq->dna; } } return seq; @@ -1218,31 +1218,31 @@ seq = sqlQuickString(conn, query); hFreeConn(&conn); } } else if (startsWith("ENS", acc )) { // Construct it from the genome I guess? seq = getGencodeSeq(db, acc); } else { struct dnaSeq *cdnaSeq = NULL; if (hDbHasNcbiRefSeq(db)) { cdnaSeq = hDnaSeqGet(db, acc, "seqNcbiRefSeq", "extNcbiRefSeq"); - if (cdnaSeq == NULL) + if (cdnaSeq == NULL && hDbHasNcbiRefSeqOld(db)) cdnaSeq = hDnaSeqGet(db, acc, "seqNcbiRefSeqOld", "extNcbiRefSeqOld"); } else cdnaSeq = hGenBankGetMrna(db, acc, NULL); if (cdnaSeq) seq = dnaSeqCannibalize(&cdnaSeq); } return seq; } static boolean getCds(char *db, char *acc, struct genbankCds *retCds) /* Get the CDS info for genbank/LRG/ENS acc; return FALSE if not found or not applicable. */ { if (trackHubDatabase(db)) return FALSE; @@ -1264,31 +1264,31 @@ if (startsWith("LRG_", acc)) sqlSafef(query, sizeof(query), "select cds from lrgCds where id = '%s'", acc); else if (hDbHasNcbiRefSeq(db) && // This is a hack to allow us to fall back on refSeqAli if ncbiRefSeqPsl is incomplete: strchr(acc, '.')) sqlSafef(query, sizeof(query), "select cds from ncbiRefSeqCds where id = '%s'", acc); else sqlSafef(query, sizeof(query), "SELECT c.name FROM %s as c, %s as g WHERE (g.acc = '%s') AND " "(g.cds != 0) AND (g.cds = c.id)", cdsTable, gbCdnaInfoTable, acc); struct sqlConnection *conn = hAllocConn(db); char cdsBuf[2048]; cdsStr = sqlQuickQuery(conn, query, cdsBuf, sizeof(cdsBuf)); - if (isEmpty(cdsStr) && strchr(acc, '.')) + if (isEmpty(cdsStr) && strchr(acc, '.') && hDbHasNcbiRefSeqOld(db)) { sqlSafef(query, sizeof(query), "select cds from ncbiRefSeqCdsOld where id = '%s'", acc); cdsStr = sqlQuickQuery(conn, query, cdsBuf, sizeof(cdsBuf)); } hFreeConn(&conn); } if (isNotEmpty(cdsStr)) gotCds = (genbankCdsParse(cdsStr, retCds) && retCds->startComplete && retCds->start != retCds->end); return gotCds; } static char refBaseFromProt(char *change) /* If change starts with an amino acid 3-letter or 1-letter code then return the 1-letter code, @@ -1905,31 +1905,31 @@ } } } else { char *pslTable = pslTableForAcc(db, acc); if (pslTable && hTableExists(db, pslTable)) { if (hgvs->type == hgvstCoding) getCds(db, acc, cds); txAli = pslForQName(db, pslTable, acc); } // try the old alignments if we can't find it in the current alignments if (!txAli && sameString(pslTable, "ncbiRefSeqPsl")) { - if (hTableExists(db, "ncbiRefSeqPslOld")) + if (hDbHasNcbiRefSeqOld(db)) { txAli = pslForQName(db, "ncbiRefSeqPslOld", acc); if (txAli) pslTable = "ncbiRefSeqPslOld"; } // As of 9/26/16, ncbiRefSeqPsl is missing some items (#13673#note-443) -- so fall back // on UCSC alignments. if (!txAli && hTableExists(db, "refSeqAli")) { char *accNoVersion = cloneFirstWordByDelimiter(acc, '.'); if (hgvs->type == hgvstCoding) getCds(db, accNoVersion, cds); txAli = pslForQName(db, "refSeqAli", accNoVersion); if (txAli) { @@ -1994,31 +1994,31 @@ char *txAcc = NULL; if (startsWith("LRG_", acc)) txAcc = lrgProteinToTx(db, acc); else if (startsWith("ENS", acc)) txAcc = gencodeProteinToTx(db, acc); else if (startsWith("NP_", acc) || startsWith("XP_", acc)) { struct sqlConnection *conn = hAllocConn(db); char query[2048]; if (hDbHasNcbiRefSeq(db)) { sqlSafef(query, sizeof(query), "select mrnaAcc from ncbiRefSeqLink where protAcc = '%s'", acc); txAcc = sqlQuickString(conn, query); // user may have passed previous versioned transcript, check the *Old tables: - if (!txAcc) + if (!txAcc && hDbHasNcbiRefSeqOld(db)) { sqlSafef(query, sizeof(query), "select mrnaAcc from ncbiRefSeqLinkOld where protAcc = '%s'", acc); txAcc = sqlQuickString(conn, query); } } else if (hTableExists(db, "refGene")) { sqlSafef(query, sizeof(query), "select mrnaAcc from %s l, refGene r " "where l.protAcc = '%s' and r.name = l.mrnaAcc", refLinkTable, acc); txAcc = sqlQuickString(conn, query); } else return NULL; hFreeConn(&conn);