2197f6d5208aff4c48ccbe42e61a116d988ac392 max Tue May 19 08:23:54 2026 -0700 hubApi: add /blat endpoint with apiKey gating, format=hgblat, and known-agent bypass New src/hg/hubApi/blat.c implements /blat/<type> (dna, protein, transRna, transDna, guess) backed by the same gfServer logic as hgBlat. Key details: - Requires an apiKey for rate-limiting; botException() and botExceptionUserAgent() exempt IPs/user-agents in hg.conf (same policy as captcha bypass elsewhere in the browser stack). - Invalid apiKey returns a clean JSON 403 rather than an HTML 500 (pre-validated in hubApi.c main() before hgBotDelayTimeFrac runs). - Extra bot-delay fraction (default 0.3, 10x hubApi default) is configurable via hubApi.blatDelayFraction in hg.conf. - format=text/psl -> PSL text; format=hgblat -> byte-for-byte hgBlat?output=json shape; jsonOutputArrays=1 -> hubApi envelope with arrays (parallel to getData behaviour); default -> objects. - botExceptionUserAgent() carved out of cart.c's static isUserAgentException() into botDelay.c so non-cart callers can use it. - Cross-reference comments added in hgBlat.c and blat.c noting the shared logic so fixes get applied to both. refs #36315 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> diff --git src/hg/hubApi/tests/makefile src/hg/hubApi/tests/makefile index 95603d31479..6af366b24b3 100644 --- src/hg/hubApi/tests/makefile +++ src/hg/hubApi/tests/makefile @@ -17,31 +17,33 @@ SERVERNAME=\"https://genome-test.gi.ucsc.edu\" make test8\n\ \n\ Note: Your sandbox trackDb needs to be up to date to allow the tests\n\ to function correctly. If one fails, update that trackDb.\n" hgwdev:: SERVERNAME="https://api-test.gi.ucsc.edu" make test alpha:: SERVERNAME="https://genome-test.gi.ucsc.edu" make test beta:: SERVERNAME="https://apibeta.soe.ucsc.edu" make test all:: test0 listFunctions getFunctions listSchema getSequence wigData \ - search supportedTypes errorTests notSupported bugReports + search blat supportedTypes errorTests notSupported bugReports + +blat: blat01 listFunctions: list01 list02 list04 list05 list06 list07 list08 \ list10 list11 list12 list13 list14 list15 list16 list17 list18 \ list20 list21 list23 list24 list25 list27 list27b list27c list28 list29 \ list30 list31 list32 getFunctions: test8 test9 test10 test11 test12 test13 test14 \ test15 test16 test19 test21 test26 test27 test28 \ test29 test39 test40 test42 test43 test44 test45 test46 test47 \ test48 test49 test50 test51 test52 test53 test54 test55 test56 test57 \ test58 test59 multiTrack01 findGenome: findGenome01 listSchema: schema01 schema02 schema03 schema04 schema05 schema06 schema07 \ @@ -1137,30 +1139,56 @@ # wiggle data from a track hub bigWig file, with chrom, start, end wig24: setOutput @printf "### $@ '${SERVERNAME}/cgi-bin/hubApi/getData/track?track=gc5Base;chrom=chrCp;genome=araTha1;hubUrl=http://genome-test.gi.ucsc.edu/~hiram/hubs/Plants/hub.txt;end=5647;start=1234;jsonOutputArrays=1;maxItemsOutput=5'\n" @./jsonConsumer.pl -serverName="${SERVERNAME}" -endpoint="/getData/track" -hubUrl="http://genome-test.gi.ucsc.edu/~hiram/hubs/Plants/hub.txt" -genome="araTha1" -track="gc5Base" -chrom="chrCp" -start=1234 -end=5647 -jsonOutputArrays -maxItemsOutput=5 2>&1 | egrep -v "${excludeLines}" | sed -e 's#https://.*/getData#/getData#;' | gzip -c > testOutput/$@.gz @zdiff expected/$@.gz testOutput/$@.gz ############################################################################## ### search - test search function ############################################################################## # basic search just to see if it works: search01: setOutput @ printf "### $@ '${SERVERNAME}/search?search=brca;genome=hg38'\n" @ curl -L "${SERVERNAME}/search?search=brca;genome=hg38" 2> /dev/null | python -mjson.tool | egrep -c -n "\[|\]" | awk -v t="$@" '{if (($$1 < 30) || ($$1 > 50)) {printf "test %s failed, count %d not in range 30-40\n", t, $$1; exit 255;}}' +############################################################################## +### blat - test /blat endpoint +############################################################################## + +# /blat smoke test. Always verifies the apiKey gate (no secret required); +# if APIKEY is exported, additionally runs a live alignment of the +# demo sequence against hg38 and checks for a 'blat' array in the response. +blat01: setOutput + @printf "### $@ '${SERVERNAME}/blat/dna?genome=hg38;userSeq=GTCCTCGGAACCAGGACCTCGGCGTGGCCTAGCG'\n" + @curl -L -sG "${SERVERNAME}/blat/dna" \ + --data-urlencode "genome=hg38" \ + --data-urlencode "userSeq=GTCCTCGGAACCAGGACCTCGGCGTGGCCTAGCG" \ + 2>/dev/null | grep -q apiKey \ + || { echo "test $@ failed: missing-apiKey request did not return apiKey error"; exit 255; } + @if [ -n "$$APIKEY" ]; then \ + curl -L -sG "${SERVERNAME}/blat/dna" \ + --data-urlencode "apiKey=$$APIKEY" \ + --data-urlencode "genome=hg38" \ + --data-urlencode "userSeq=GTCCTCGGAACCAGGACCTCGGCGTGGCCTAGCG" \ + 2>/dev/null | python -mjson.tool | grep -q '"blat"' \ + || { echo "test $@ failed: live BLAT response has no blat[] field"; exit 255; }; \ + echo "### $@: apiKey gate OK; live BLAT OK"; \ + else \ + echo "### $@: apiKey gate OK (export APIKEY=<key> to also test live BLAT)"; \ + fi + ############################################################################## ### chrAlias - test chrom alias ############################################################################## # testing /getData/track?genome=ce11&track=gold&chrom=MT chrAlias01: setOutput @ printf "### $@ '${SERVERNAME}/getData/track?genome=ce11;track=gold;chrom=MT'\n" @./jsonConsumer.pl -serverName="${SERVERNAME}" -endpoint="/getData/track" -genome="ce11" -track="gold" -chrom="MT" 2>&1 | egrep -v "${excludeLines}" | sed -e 's#https://.*/getData#/getData#;' | gzip -c > testOutput/$@.gz @zdiff expected/$@.gz testOutput/$@.gz # testing /getData/track?genome=ce11&track=gold&chrom=NC_001328.1 chrAlias02: setOutput @ printf "### $@ '${SERVERNAME}/getData/track?genome=ce11;track=gold;chrom=NC_001328.1'\n" @./jsonConsumer.pl -serverName="${SERVERNAME}" -endpoint="/getData/track" -genome="ce11" -track="gold" -chrom="NC_001328.1" 2>&1 | egrep -v "${excludeLines}" | sed -e 's#https://.*/getData#/getData#;' | gzip -c > testOutput/$@.gz @zdiff expected/$@.gz testOutput/$@.gz