ce891d7726cb07920b5072bb5b4ca9faf44c6e61
angie
  Fri Sep 3 16:39:47 2021 -0700
Translating protein coords to CDS might result in coords that fall off the end of CDS but not off the end of the transcript.  Avoid showing 3'UTR results for a protein search by requiring results in the coding region for protein searches.  refs #28107

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index 7c889e3..1c78e6a 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -1895,44 +1895,49 @@
         if (hgvs->type == hgvstCoding)
             getCds(db, accNoVersion, cds);
         txAli = pslForQName(db, "refSeqAli", accNoVersion);
         if (txAli)
             {
             pslTable = "refSeqAli";
             *pAcc = accNoVersion;
             }
         }
     if (txAli && retPslTable != NULL)
         *retPslTable = pslTable;
     }
 return txAli;
 }
 
-static struct bed *hgvsMapNucToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
+static struct bed *hgvsMapNucToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable,
+                                      boolean mustBeCds)
 /* Return a bed6 with the variant's span on the genome and strand, or NULL if unable to map.
+ * If mustBeCds, but we can't find cds or the variant coords are outside of it, return NULL.
  * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
 {
 if (hgvs->type == hgvstGenomic)
     return hgvsMapGDotToGenome(db, hgvs, retPslTable);
 char *acc = normalizeVersion(db, hgvs->seqAcc, NULL);
 if (isEmpty(acc))
     return NULL;
 struct bed *region = NULL;
 char *pslTable = NULL;
 struct genbankCds cds;
 struct psl *txAli = getPslAndCds(db, hgvs, &acc, &cds, &pslTable);
 boolean gotCds = (cds.end > cds.start);
+if (mustBeCds &&
+    (!gotCds || hgvs->type != hgvstCoding || hgvs->start1-1 >= (cds.end - cds.start)))
+    return NULL;
 if (txAli && (hgvs->type != hgvstCoding || gotCds))
     {
     int upstream = 0, downstream = 0;
     struct psl *mappedToGenome = mapPsl(hgvs, acc, txAli, &cds, &upstream, &downstream);
     if (mappedToGenome)
         {
         region = pslAndFriendsToRegion(mappedToGenome, hgvs, upstream, downstream);
         pslFree(&mappedToGenome);
         }
     pslFree(&txAli);
     }
 if (region)
     {
     adjustInsStartEnd(hgvs, &region->chromStart, &region->chromEnd);
     if (retPslTable)
@@ -1967,47 +1972,47 @@
     else
         return NULL;
     txAcc = sqlQuickString(conn, query);
     hFreeConn(&conn);
     }
 if (txAcc)
     {
     // Translate the p. to c. and map c. to the genome.
     struct hgvsVariant cDot;
     zeroBytes(&cDot, sizeof(cDot));
     cDot.type = hgvstCoding;
     cDot.seqAcc = txAcc;
     cDot.start1 = ((hgvs->start1 - 1) * 3) + 1;
     cDot.end = ((hgvs->end - 1) * 3) + 3;
     cDot.changes = hgvs->changes;
-    region = hgvsMapNucToGenome(db, &cDot, retPslTable);
+    region = hgvsMapNucToGenome(db, &cDot, retPslTable, TRUE);
     freeMem(txAcc);
     }
 return region;
 }
 
 struct bed *hgvsMapToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
 /* Return a bed6 with the variant's span on the genome and strand, or NULL if unable to map.
  * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
 {
 if (hgvs == NULL)
     return NULL;
 struct bed *region = NULL;
 if (hgvs->type == hgvstProtein)
     region = hgvsMapPDotToGenome(db, hgvs, retPslTable);
 else
-    region = hgvsMapNucToGenome(db, hgvs, retPslTable);
+    region = hgvsMapNucToGenome(db, hgvs, retPslTable, FALSE);
 return region;
 }
 
 static int getDotVersion(char *acc)
 /* If acc ends in .<number> then return number; otherwise return -1. */
 {
 char *p = strrchr(acc, '.');
 if (p && isdigit(p[1]))
     return atoi(p+1);
 return -1;
 }
 
 struct bed *hgvsValidateAndMap(struct hgvsVariant *hgvs, char *db, char *term,
                                struct dyString *dyWarn, char **retPslTable)
 /* If we have sequence for hgvs->seqAcc and the hgvs coords are within the bounds