847b1a96e008aecae23d9f20e250c22efd10dbe1 hiram Mon Jun 15 09:45:56 2026 -0700 useful script to test performance and accuracy of the "findGenome" functions, can be used before and after changes to verify functionality remains correct. diff --git src/hg/hubApi/tests/findGenome.sh src/hg/hubApi/tests/findGenome.sh new file mode 100755 index 00000000000..3564abbca85 --- /dev/null +++ src/hg/hubApi/tests/findGenome.sh @@ -0,0 +1,336 @@ +#!/bin/bash + +# Test harness for findGenome API changes +# Tests search functionality before and after performance optimizations + +# set -e + +# Configuration +apiBinary="../hubApi" +testDir="findGenome_tests" +beforeDir="${testDir}/before" +afterDir="${testDir}/after" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Create test directories +mkdir -p "${beforeDir}" "${afterDir}" + +# Test cases array - each line: "testName|queryParams" +declare -a testCases=( + # Basic text searches + "basicSingleWord|q=human&maxItemsOutput=5" + "basicMultiWord|q=white%20rhino&maxItemsOutput=5" + "quotedPhrase|q=\"white%20rhino\"&maxItemsOutput=5" + + # Short word tests (affected by ft_min_word_len=3) + "shortAssemblyHg|q=hg&maxItemsOutput=10" + "shortAssemblyMm|q=mm&maxItemsOutput=10" + "shortAssemblyDm|q=dm&maxItemsOutput=10" + "shortWithWildcard|q=hg*&maxItemsOutput=10" + + # Operator tests + "plusOperator|q=+human&maxItemsOutput=5" + "minusOperator|q=human%20-mouse&maxItemsOutput=5" + "wildcardSearch|q=homo*&maxItemsOutput=5" + "complexOperators|q=+-mouse%20sapiens*%20rat&maxItemsOutput=10" + + # Filter tests + "filterReference|q=human&category=reference&maxItemsOutput=5" + "filterRepresentative|q=human&category=representative&maxItemsOutput=5" + "filterLatest|q=human&status=latest&maxItemsOutput=5" + "filterComplete|q=human&level=complete&maxItemsOutput=5" + "filterChromosome|q=human&level=chromosome&maxItemsOutput=5" + + # Browser existence filters + "browserMustExist|q=human&browser=mustExist&maxItemsOutput=5" + "browserNotExist|q=human&browser=notExist&maxItemsOutput=5" + "browserMayExist|q=human&browser=mayExist&maxItemsOutput=5" + + # Combined filters + "multiFilters|q=human&category=reference&status=latest&level=complete&maxItemsOutput=5" + "textPlusFilters|q=white%20rhino&browser=mustExist&status=latest&maxItemsOutput=3" + + # Edge cases + "noResults|q=zyxwvutsrq&maxItemsOutput=5" + "specialChars|q=C.%20elegans&maxItemsOutput=5" + "numbersYear|q=2020&maxItemsOutput=5" + + # Performance test cases + "largeResultSet|q=chromosome&maxItemsOutput=50" + "complexSearch|q=+human%20+reference%20-mouse&category=reference&status=latest&maxItemsOutput=20" + + # Specific assembly searches + "assemblyHg38|q=hg38&maxItemsOutput=3" + "assemblyMm39|q=mm39&maxItemsOutput=3" + "assemblyPrefix|q=GCA_*&maxItemsOutput=10" +) + +# Function to clear MySQL caches +clearSqlCaches() { + echo " Clearing SQL caches..." + + # Connect to MySQL and clear various caches + hgsql -e " + FLUSH QUERY CACHE; + FLUSH TABLES; + RESET QUERY CACHE; + FLUSH STATUS; + " hgcentraltest 2>/dev/null || { + echo " Warning: Could not clear SQL caches (may need admin privileges)" + } +} + +# Function to run a single test +runTest() { + local testName="$1" + local queryParams="$2" + local outputDir="$3" + local timingFile="$4" + + printf "# Running: '%s'\n" "${testName}" 1>&2 + + # Add timing measurement + local startTime=$(date +%s%N) + + # Run the API call - filter CGI headers with grep "^{" + if PATH_INFO="/findGenome" ${apiBinary} ${queryParams} | grep "^{" > "${outputDir}/${testName}.json" 2>"${outputDir}/${testName}.err"; then + local endTime=$(date +%s%N) + local durationMs=$(( (endTime - startTime) / 1000000 )) + echo "${testName}:${durationMs}" >> "$timingFile" + + # Validate JSON output + if ! python3 -m json.tool "${outputDir}/${testName}.json" >/dev/null 2>&1; then + echo -e "${RED}WARNING: Invalid JSON output for $testName${NC}" + fi + + return 0 + else + local endTime=$(date +%s%N) + local durationMs=$(( (endTime - startTime) / 1000000 )) + echo "${testName}:${durationMs}:ERROR" >> "$timingFile" + return 1 + fi +} + +# Function to extract key metrics from JSON +extractMetrics() { + local jsonFile="$1" + + if [ ! -f "$jsonFile" ]; then + echo "ERROR:missing_file" + return + fi + + # Extract key fields for comparison + python3 -c " +import json +import sys + +try: + with open('$jsonFile', 'r') as f: + data = json.load(f) + + # Extract comparable metrics + metrics = { + 'itemCount': data.get('itemCount', 0), + 'totalMatchCount': data.get('totalMatchCount', 0), + 'availableAssemblies': data.get('availableAssemblies', 0), + 'resultCount': len([k for k in data.keys() if k not in ['itemCount', 'totalMatchCount', 'availableAssemblies', 'q', 'browser', 'maxItemsLimit', 'category', 'status', 'level', 'liftable']]) + } + + # Sort assembly results for consistent comparison + assemblies = [] + for key, value in data.items(): + if isinstance(value, dict) and 'scientificName' in value: + assemblies.append(key) + + metrics['assemblyIds'] = sorted(assemblies) + + print(json.dumps(metrics, sort_keys=True)) + +except Exception as e: + print(f'ERROR:{str(e)}') +" 2>/dev/null +} + +# Function to compare two test results +compareResults() { + local testName="$1" + local beforeFile="${beforeDir}/${testName}.json" + local afterFile="${afterDir}/${testName}.json" + + local beforeMetrics=$(extractMetrics "$beforeFile") + local afterMetrics=$(extractMetrics "$afterFile") + + if [ "$beforeMetrics" = "$afterMetrics" ]; then + echo -e "${GREEN}✓${NC} $testName" + return 0 + else + echo -e "${RED}✗${NC} $testName - Results differ!" + echo " Before: $beforeMetrics" + echo " After: $afterMetrics" + return 1 + fi +} + +# Function to run all tests +runTestSuite() { + local phase="$1" + local outputDir="$2" + local timingFile="${outputDir}/timing.txt" + + echo -e "${BLUE}Running $phase tests...${NC}" + # Clear SQL caches before test run + clearSqlCaches + + # Clear timing file + > "$timingFile" + + local passed=0 + local failed=0 + + for testCase in "${testCases[@]}"; do + IFS='|' read -r testName queryParams <<< "$testCase" + + if runTest "$testName" "$queryParams" "$outputDir" "$timingFile"; then + ((passed++)) + else + ((failed++)) + echo -e "${RED}FAILED: $testName${NC}" + fi + done + + echo -e "${BLUE}$phase Results: ${GREEN}$passed passed${NC}, ${RED}$failed failed${NC}" + + # Show timing summary + if [ -f "$timingFile" ]; then + echo -e "${YELLOW}Timing Summary ($phase):${NC}" + sort -t: -k2 -n "$timingFile" | tail -5 | while IFS=':' read -r name time rest; do + echo " $name: ${time}ms" + done + fi +} + +# Function to compare timing performance +compareTiming() { + echo -e "${BLUE}Performance Comparison:${NC}" + + if [ -f "${beforeDir}/timing.txt" ] && [ -f "${afterDir}/timing.txt" ]; then + python3 -c " +import sys + +# Read timing data +beforeTimes = {} +afterTimes = {} + +with open('${beforeDir}/timing.txt', 'r') as f: + for line in f: + parts = line.strip().split(':') + if len(parts) >= 2 and parts[1].isdigit(): + beforeTimes[parts[0]] = int(parts[1]) + +with open('${afterDir}/timing.txt', 'r') as f: + for line in f: + parts = line.strip().split(':') + if len(parts) >= 2 and parts[1].isdigit(): + afterTimes[parts[0]] = int(parts[1]) + +# Calculate improvements +improvements = [] +for testName in beforeTimes: + if testName in afterTimes: + beforeTime = beforeTimes[testName] + afterTime = afterTimes[testName] + if beforeTime > 0: + improvement = ((beforeTime - afterTime) / beforeTime) * 100 + improvements.append((testName, beforeTime, afterTime, improvement)) + +# Sort by improvement percentage +improvements.sort(key=lambda x: x[3], reverse=True) + +# Show top improvements +print('Top Performance Improvements:') +for testName, beforeTime, afterTime, improvement in improvements[:10]: + if improvement > 0: + print(f' {testName}: {beforeTime}ms → {afterTime}ms ({improvement:+.1f}%)') + else: + print(f' {testName}: {beforeTime}ms → {afterTime}ms ({improvement:+.1f}%)') +" + fi +} + +# Main execution +main() { + echo -e "${BLUE}FindGenome API Test Harness${NC}" + echo "Testing ${#testCases[@]} test cases" + echo + + # Check if binary exists + if [ ! -x "$apiBinary" ]; then + echo -e "${RED}Error: $apiBinary not found or not executable${NC}" + echo "Please compile the hubApi binary first" + exit 1 + fi + + case "${1:-}" in + "before") + runTestSuite "BEFORE" "$beforeDir" + ;; + "after") + runTestSuite "AFTER" "$afterDir" + ;; + "compare") + if [ ! -d "$beforeDir" ] || [ ! -d "$afterDir" ]; then + echo -e "${RED}Error: Run 'before' and 'after' tests first${NC}" + exit 1 + fi + + echo -e "${BLUE}Comparing results...${NC}" + local differences=0 + + for testCase in "${testCases[@]}"; do + IFS='|' read -r testName queryParams <<< "$testCase" + if ! compareResults "$testName"; then + ((differences++)) + fi + done + + echo + if [ $differences -eq 0 ]; then + echo -e "${GREEN}✓ All tests match! Optimization preserved functionality.${NC}" + else + echo -e "${RED}✗ Found $differences differences${NC}" + fi + + compareTiming + ;; + "full") + echo "Running full test suite..." + runTestSuite "BEFORE" "$beforeDir" + echo + echo -e "${YELLOW}Now make your code changes and run: $0 after${NC}" + ;; + *) + echo "Usage: $0 {before|after|compare|full}" + echo + echo " before - Run tests before code changes" + echo " after - Run tests after code changes" + echo " compare - Compare before/after results" + echo " full - Run before tests and show next steps" + echo + echo "Example workflow:" + echo " 1. $0 before" + echo " 2. Make your code changes" + echo " 3. $0 after" + echo " 4. $0 compare" + ;; + esac +} + +main "$@"