f93b8662afd701763c24634879d05dc08b3178de max Fri Jun 5 02:24:16 2026 -0700 Add exon search: jump to GENE exon N from position box I'm comitting this thinking that the way that we implement searches leads to duplication of code that doesn't look great to me. While this feature looks good, the code duplication across C/JS should probably get reduced with a different approach to the "quick jump" way of the page. We have currently three ways to quick jump, I think: - chr:start-end - rsxxxxx - gene symbol + autosuggest pick - HGVS? They are recognized by both the javascript and the C code with regexes. I think all of these should be probably be only implemented in the C code. The JS only sends the current string to the C code and then gets back if this can be autocompleted and to which position and what to show in the autosuggest area. For example if you type "SOD1<space>e" the C code could send back "Continue typing to jump to exon" and once you're at "SOD1<space>exon 5" the C code sends back "Hit enter to jump to chrX:123123-123213". This would work with any type of identifier and the code would stay in the C code, not more duplication and it would be much clearer to the user what is recognized in the search box. Users can now type "TP53 exon 5" or "TP53:e.5[+/-offset]" in the genome browser position/search box to navigate directly to that exon. The ":e.N" notation follows the VICC Gene Fusion Specification. An optional intronic offset (":e.5+2") lands N bases past the exon boundary, useful for splice site inspection. C (hgFind.c): findGeneExon() resolves the query against the SQL genePred tables listed in the hg.conf "geneTracks" key (default: mane, ncbiRefSeqSelect, knownGene, ncbiRefSeq, ncbiRefSeqHistorical). bigGenePred tracks (e.g. mane) are supported via bigBedOpenExtraIndex. Uses the existing exonToPos() function for strand-aware exon lookup. fixSinglePos() is called so hgp->singlePos is populated for callers. hgApi.c: new cmd=geneExonToPos returns {"pos":"chrom:start-end"} JSON so JS can navigate in place without a full page redirect to hgSearch. Direct URL links (hgTracks?position=GENE+exon+N) also work because findGeneExon() is hooked into hgPositionsFind(). JS: autocomplete.js injects a local "Jump to exon N" suggestion as soon as the exon pattern is detected, or a hint item when the query is still partial ("GENE ex"). Selecting either navigates via hgApi. hgTracks.js routes the two new autocomplete item types to the hgApi call. utils.js adds the two regexes (geneExonExp, geneExonCoordExp). query.html: documents both syntaxes; the :e.N notation links to the VICC Gene Fusion Specification at fusions.cancervariants.org. diff --git src/hg/hgApi/hgApi.c src/hg/hgApi/hgApi.c index 04e72eea7f0..2031fda41a2 100644 --- src/hg/hgApi/hgApi.c +++ src/hg/hgApi/hgApi.c @@ -31,30 +31,31 @@ #include "common.h" #include "hdb.h" #include "mdb.h" #include "cheapcgi.h" #include "htmshell.h" #include "hPrint.h" #include "dystring.h" #include "hui.h" #include "search.h" #include "cv.h" #include "api.h" #include "chromAlias.h" #include "bigBed.h" #include "trackHub.h" #include "cart.h" +#include "hgFind.h" struct hash *oldVars = NULL; void doMiddle(struct cart *cart) { long enteredMainTime = clock1000(); struct dyString *output = dyStringNew(10000); setUdcCacheDir(); pushWarnHandler(htmlVaBadRequestAbort); pushAbortHandler(htmlVaBadRequestAbort); char *database = cgiString("db"); char *cmd = cgiString("cmd"); char *jsonp = cgiOptionalString("jsonp"); @@ -230,30 +231,60 @@ dyStringPrintf(output, "{\"error\": \"Couldn't find item: %s\"}", name); else { boolean found; int start, end; if (sameString(cmd, "codonToPos")) found = codonToPos(gp, num, &start, &end); else found = exonToPos(gp, num, &start, &end); if (found) dyStringPrintf(output, "{\"pos\": \"%s:%d-%d\"}", gp->chrom, start + 1, end); else dyStringPrintf(output, "{\"error\": \"%d is an invalid %s for this gene\"}", num, sameString(cmd, "codonToPos") ? "codon" : "exon"); } hFreeConn(&conn); } +else if (sameString(cmd, "geneExonToPos")) + { + /* Resolve "<symbol> exon <N>" or "<symbol>:e.<N>[+/-offset]" to a genomic position. + * Parameters: symbol, num, offset (optional). Returns {"pos": "chrom:start-end"} + * or {"error": "..."}. Reuses the hgFind exon search so all geneTracks tables apply. */ + char *symbol = cgiString("symbol"); + int num = cgiInt("num"); + int offset = cgiOptionalInt("offset", 0); + char term[256]; + if (offset != 0) + safef(term, sizeof term, "%s:e.%d%+d", symbol, num, offset); + else + safef(term, sizeof term, "%s exon %d", symbol, num); + /* Suppress any warn() calls (e.g. "gene has N exons") so they don't corrupt JSON. */ + pushSilentWarnHandler(); + struct hgPositions *hgp = hgPositionsFind(database, term, "", "hgApi", cart, FALSE, FALSE, NULL); + popWarnHandler(); + if (hgp && hgp->singlePos) + { + int s = hgp->singlePos->chromStart; + int e = hgp->singlePos->chromEnd; + if (s > e) { int tmp = s; s = e; e = tmp; } + char pos[256]; + safef(pos, sizeof pos, "%s:%d-%d", hgp->singlePos->chrom, s + 1, e); + dyStringPrintf(output, "{\"pos\": \"%s\"}", pos); + } + else + dyStringPrintf(output, "{\"error\": \"Exon %d of %s not found\"}", num, symbol); + freez(&hgp); + } else { warn("unknown cmd: %s",cmd); errAbort("Unsupported 'cmd' parameter"); } apiOut(dyStringContents(output), jsonp); cgiExitTime("hgApi", enteredMainTime); } /* Null terminated list of CGI Variables we don't want to save * permanently. */ char *excludeVars[] = {"fileSearch", "var", "showShortLabel", "showLongLabel", "track", "table", "name", "chrom", "cmd", "num", NULL,}; int main(int argc, char *argv[])