ce891d7726cb07920b5072bb5b4ca9faf44c6e61 angie Fri Sep 3 16:39:47 2021 -0700 Translating protein coords to CDS might result in coords that fall off the end of CDS but not off the end of the transcript. Avoid showing 3'UTR results for a protein search by requiring results in the coding region for protein searches. refs #28107 diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index 7c889e3..1c78e6a 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -1895,44 +1895,49 @@ if (hgvs->type == hgvstCoding) getCds(db, accNoVersion, cds); txAli = pslForQName(db, "refSeqAli", accNoVersion); if (txAli) { pslTable = "refSeqAli"; *pAcc = accNoVersion; } } if (txAli && retPslTable != NULL) *retPslTable = pslTable; } return txAli; } -static struct bed *hgvsMapNucToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable) +static struct bed *hgvsMapNucToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable, + boolean mustBeCds) /* Return a bed6 with the variant's span on the genome and strand, or NULL if unable to map. + * If mustBeCds, but we can't find cds or the variant coords are outside of it, return NULL. * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */ { if (hgvs->type == hgvstGenomic) return hgvsMapGDotToGenome(db, hgvs, retPslTable); char *acc = normalizeVersion(db, hgvs->seqAcc, NULL); if (isEmpty(acc)) return NULL; struct bed *region = NULL; char *pslTable = NULL; struct genbankCds cds; struct psl *txAli = getPslAndCds(db, hgvs, &acc, &cds, &pslTable); boolean gotCds = (cds.end > cds.start); +if (mustBeCds && + (!gotCds || hgvs->type != hgvstCoding || hgvs->start1-1 >= (cds.end - cds.start))) + return NULL; if (txAli && (hgvs->type != hgvstCoding || gotCds)) { int upstream = 0, downstream = 0; struct psl *mappedToGenome = mapPsl(hgvs, acc, txAli, &cds, &upstream, &downstream); if (mappedToGenome) { region = pslAndFriendsToRegion(mappedToGenome, hgvs, upstream, downstream); pslFree(&mappedToGenome); } pslFree(&txAli); } if (region) { adjustInsStartEnd(hgvs, ®ion->chromStart, ®ion->chromEnd); if (retPslTable) @@ -1967,47 +1972,47 @@ else return NULL; txAcc = sqlQuickString(conn, query); hFreeConn(&conn); } if (txAcc) { // Translate the p. to c. and map c. to the genome. struct hgvsVariant cDot; zeroBytes(&cDot, sizeof(cDot)); cDot.type = hgvstCoding; cDot.seqAcc = txAcc; cDot.start1 = ((hgvs->start1 - 1) * 3) + 1; cDot.end = ((hgvs->end - 1) * 3) + 3; cDot.changes = hgvs->changes; - region = hgvsMapNucToGenome(db, &cDot, retPslTable); + region = hgvsMapNucToGenome(db, &cDot, retPslTable, TRUE); freeMem(txAcc); } return region; } struct bed *hgvsMapToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable) /* Return a bed6 with the variant's span on the genome and strand, or NULL if unable to map. * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */ { if (hgvs == NULL) return NULL; struct bed *region = NULL; if (hgvs->type == hgvstProtein) region = hgvsMapPDotToGenome(db, hgvs, retPslTable); else - region = hgvsMapNucToGenome(db, hgvs, retPslTable); + region = hgvsMapNucToGenome(db, hgvs, retPslTable, FALSE); return region; } static int getDotVersion(char *acc) /* If acc ends in .<number> then return number; otherwise return -1. */ { char *p = strrchr(acc, '.'); if (p && isdigit(p[1])) return atoi(p+1); return -1; } struct bed *hgvsValidateAndMap(struct hgvsVariant *hgvs, char *db, char *term, struct dyString *dyWarn, char **retPslTable) /* If we have sequence for hgvs->seqAcc and the hgvs coords are within the bounds