6af4f6e10518d33db653c55dadab86b3ed4617ec max Wed Jun 3 05:39:35 2026 -0700 hubApi/blat: fix duplicate itemsReturned, missing target strand, weak default penalty Three QA issues from Gerardo on #36315: - writeJsonOutput() emitted "itemsReturned" twice -- apiFinishOutput() already prints it from the global -- so drop the explicit jsonWriteNumber(). - Protein and translated queries came back with single-char strand because gfOutput only writes the target-strand character when reportTargetStrand is set; hgBlat sets it for translated queries, the API path didn't. Set it when bt.isTx so pslLoad sees the full two-char strand. - blatDelayFractionDefault was 0.3 -- QA's 40-request burst only added ~2.4s. Bumped to 3.0 for ~24s under the same burst; still tunable in hg.conf via hubApi.blatDelayFraction=. refs #36315 Co-Authored-By: Claude Sonnet 4.6 diff --git src/hg/hubApi/blat.c src/hg/hubApi/blat.c index 907623b2446..4dfe07a55a9 100644 --- src/hg/hubApi/blat.c +++ src/hg/hubApi/blat.c @@ -9,33 +9,36 @@ #include "dataApi.h" #include "blatServers.h" #include "fa.h" #include "dnautil.h" #include "dnaseq.h" #include "psl.h" #include "trashDir.h" #include "genoFind.h" #include "trackHub.h" #include "hubConnect.h" #include "hdb.h" #include "fuzzyFind.h" #include "botDelay.h" -/* Default BLAT bottleneck fraction -- 10x the hubApi-wide default of 0.03. +/* Default BLAT bottleneck fraction -- 100x the hubApi-wide default of 0.03. + * QA-tested at 0.3 (~2.4s extra over a 40-request burst) was too gentle to + * deter a single-key hammer; 3.0 lands a meaningful ~24s penalty for the + * same burst without breaking legitimate IGV-style polling. * Overridable in hg.conf with hubApi.blatDelayFraction=. */ -#define blatDelayFractionDefault 0.3 +#define blatDelayFractionDefault 3.0 /* Server lookup is provided by findBlatServer() in hg/lib/blatServers.c, * which is also used by hgBlat.c. */ /* Validated query-type token derived from the user's "type" arg. */ struct blatType { boolean isTx; /* translated query */ boolean isTxTx; /* both query and target translated (dna vs dnax) */ boolean txTxBoth; /* dnax query -- search both strands */ boolean qIsProt; enum gfType qType; enum gfType tType; }; @@ -265,31 +268,30 @@ jsonWriteListEnd(jw); } jsonWriteListStart(jw, "blat"); struct psl *psl; long long count = 0; for (psl = pslList; psl != NULL && count < maxItemsOutput; psl = psl->next, ++count) { if (jsonOutputArrays) writePslAsArray(jw, psl); else writePslAsObject(jw, psl); } jsonWriteListEnd(jw); itemsReturned = count; -jsonWriteNumber(jw, "itemsReturned", (long long)count); apiFinishOutput(0, NULL, jw); } void apiBlat(char *words[MAX_PATH_INFO]) /* '/blat' endpoint: run a BLAT alignment of userSeq against the requested * assembly's gfServer and return PSL hits as JSON. */ { char *extraArgs = verifyLegalArgs(argBlat); if (extraArgs) apiErrAbort(err400, err400Msg, "extraneous arguments found for function /blat '%s'", extraArgs); /* /blat is gated on an apiKey -- both for attribution and to anchor the * bot-bottleneck on something more stable than IP. Known programmatic * clients (IGV, rtracklayer, etc.) identified via bottleneck.except IPs or @@ -366,30 +368,36 @@ * Mirrors hgBlat's strategy so we benefit from the same gfOutputPsl path. */ struct tempName pslTn; trashDirFile(&pslTn, "apiBlat", "apiBlat", ".pslx"); int maxSeqCount = 25; char *optionMaxSeqCount = cfgOptionDefault("hgBlat.maxSequenceCount", NULL); if (isNotEmpty(optionMaxSeqCount)) maxSeqCount = atoi(optionMaxSeqCount); FILE *f = NULL; struct errCatch *ec = errCatchNew(); if (errCatchStart(ec)) { f = mustOpen(pslTn.forCgi, "w"); struct gfOutput *gvo = gfOutputPsl(0, bt.qIsProt, FALSE, f, FALSE, TRUE); + /* For translated/protein queries the target strand needs to go into the + * PSL strand column too -- otherwise pslLoad sees only the query strand + * and downstream consumers compute the wrong coords on minus-strand hits. + * hgBlat sets this for the same reason (hgBlat.c near gfAlignTrans*). */ + if (bt.isTx) + gvo->reportTargetStrand = TRUE; pslxWriteHead(f, bt.qType, bt.tType); struct gfConnection *conn = gfConnect(st->host, st->port, trackHubDatabaseToGenome(st->db), st->genomeDataDir); struct hash *tFileCache = gfFileCacheNew(); int minMatch = 0; /* let gfServer decide; matches hgBlat allResults path */ struct dnaSeq *seq; int singleMax = bt.isTx ? 10000 : 75000; int totalMax = singleMax * 2.5; int total = 0; int seqCount = 0; for (seq = seqList; seq != NULL; seq = seq->next) { if (++seqCount > maxSeqCount)