04d9a71891504b2be49c6e72e9e2f80183a4b22b hiram Fri Aug 16 14:44:31 2024 -0700 eliminate allowAll argument, return used search string even when modified, return total assembly count available refs #32897 diff --git src/hg/hubApi/findGenome.c src/hg/hubApi/findGenome.c index 9cf70a9..b9328d5 100644 --- src/hg/hubApi/findGenome.c +++ src/hg/hubApi/findGenome.c @@ -10,31 +10,30 @@ #include "assemblyList.h" struct combinedSummary /* may have information from any of: asmSummary, genark or dbDb */ { struct combinedSummary *next; /* Next in singly linked list */ struct asmSummary *summary; /* from asmSummary table */ struct genark *genArk; /* from genark table */ struct dbDb *dbDb; /* from dbDb table */ }; /* will be initialized as this function begins */ static char *genarkTable = NULL; static char *asmListTable = NULL; static boolean statsOnly = FALSE; -static boolean allowAll = FALSE; /* default only show existing browsers*/ /* these three are radio button states, only one of these three can be TRUE */ static boolean browserMustExist = TRUE; /* default: browser must exist */ static boolean browserMayExist = FALSE; static boolean browserNotExist = FALSE; /* hgsql -e 'desc assemblyList;' hgcentraltest +----------------+---------------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +----------------+---------------------+------+-----+---------+-------+ | name | varchar(255) | NO | PRI | NULL | | | priority | int(10) unsigned | YES | | NULL | | | commonName | varchar(511) | YES | | NULL | | | scientificName | varchar(511) | YES | | NULL | | | taxId | int(10) unsigned | YES | | NULL | | @@ -112,102 +111,103 @@ /* add specific browserExists depending upon options */ if (browserMustExist) sqlDyStringPrintf(query, " AND browserExists=1"); else if (browserNotExist) sqlDyStringPrintf(query, " AND browserExists=0"); sqlDyStringPrintf(query, " ORDER BY priority LIMIT %d;", maxItemsOutput); struct sqlResult *sr = sqlGetResult(conn, query->string); itemCount = sqlJsonOut(jw, sr); sqlFreeResult(&sr); dyStringFree(&query); } } return itemCount; } -static long long oneWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw, long long *totalMatchCount) +static long long oneWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw, long long *totalMatchCount, boolean *prefixSearch) /* perform search on a single word, prepare json and return number of matches * and number of potential matches totalMatchCount */ { long long itemCount = 0; *totalMatchCount = 0; struct dyString *query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description) AGAINST ('%s' IN BOOLEAN MODE)", asmListTable, searchWord); if (browserMustExist) sqlDyStringPrintf(query, " AND browserExists=1"); else if (browserNotExist) sqlDyStringPrintf(query, " AND browserExists=0"); long long matchCount = sqlQuickLongLong(conn, query->string); -boolean prefixSearch = FALSE; +*prefixSearch = FALSE; /* assume not */ if (matchCount < 1) /* no match, add the * wild card match to make a prefix match */ { dyStringFree(&query); query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description) AGAINST ('%s*' IN BOOLEAN MODE)", asmListTable, searchWord); /* add specific browserExists depending upon options */ if (browserMustExist) sqlDyStringPrintf(query, " AND browserExists=1"); else if (browserNotExist) sqlDyStringPrintf(query, " AND browserExists=0"); matchCount = sqlQuickLongLong(conn, query->string); if (matchCount > 0) - prefixSearch = TRUE; + *prefixSearch = TRUE; } if (matchCount < 1) // nothing found, returning zero return itemCount; *totalMatchCount = matchCount; if (statsOnly) // only counting, nothing returned { // the LIMIT would limit results to maxItemsOutput itemCount = min(maxItemsOutput, matchCount); } // when less than totalMatchCount else { dyStringFree(&query); query = dyStringNew(64); - sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description) AGAINST ('%s%s' IN BOOLEAN MODE)", asmListTable, searchWord, prefixSearch ? "*" : ""); + sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description) AGAINST ('%s%s' IN BOOLEAN MODE)", asmListTable, searchWord, *prefixSearch ? "*" : ""); /* add specific browserExists depending upon options */ if (browserMustExist) sqlDyStringPrintf(query, " AND browserExists=1"); else if (browserNotExist) sqlDyStringPrintf(query, " AND browserExists=0"); sqlDyStringPrintf(query, " ORDER BY priority LIMIT %d;", maxItemsOutput); struct sqlResult *sr = sqlGetResult(conn, query->string); itemCount = sqlJsonOut(jw, sr); sqlFreeResult(&sr); dyStringFree(&query); } return itemCount; -} /* static long long oneWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw) */ +} /* static long long oneWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw, boolean *prefixSearch) */ static void elapsedTime(struct jsonWrite *jw) { long nowTime = clock1000(); long elapsedTimeMs = nowTime - enteredMainTime; jsonWriteNumber(jw, "elapsedTimeMs", elapsedTimeMs); } void apiFindGenome(char *pathString[MAX_PATH_INFO]) /* 'findGenome' function */ { char *searchString = cgiOptionalString(argGenomeSearchTerm); char *inputSearchString = cloneString(searchString); +boolean prefixSearch = FALSE; char *extraArgs = verifyLegalArgs(argFindGenome); genarkTable = genarkTableName(); asmListTable = assemblyListTableName(); if (extraArgs) apiErrAbort(err400, err400Msg, "extraneous arguments found for function /findGenome'%s'", extraArgs); boolean asmListExists = hTableExists("hgcentraltest", asmListTable); if (!asmListExists) apiErrAbort(err400, err400Msg, "table hgcentraltest.assemblyList does not exist for /findGenome"); boolean asmSummaryExists = hTableExists("hgcentraltest", "asmSummary"); if (!asmSummaryExists) apiErrAbort(err400, err400Msg, "table hgcentraltest.asmSummary does not exist for /findGenome"); @@ -227,99 +227,101 @@ else if (sameWord(browserExistString, "mayExist")) { browserMustExist = FALSE; browserMayExist = TRUE; browserNotExist = FALSE; } else if (sameWord(browserExistString, "notExist")) { browserMustExist = FALSE; browserMayExist = FALSE; browserNotExist = TRUE; } else apiErrAbort(err400, err400Msg, "unrecognized '%s=%s' argument, must be one of: mustExist, mayExist or notExist", argBrowser, browserExistString); } -char *allowAllString = cgiOptionalString(argAllowAll); -if (isNotEmpty(allowAllString)) - { - if (SETTING_IS_ON(allowAllString)) - allowAll = TRUE; - else if (SETTING_IS_OFF(allowAllString)) - allowAll = FALSE; - else - apiErrAbort(err400, err400Msg, "unrecognized '%s=%s' argument, can only be =1 or =0", argAllowAll, allowAllString); - } char *statsOnlyString = cgiOptionalString(argStatsOnly); if (isNotEmpty(statsOnlyString)) { if (SETTING_IS_ON(statsOnlyString)) statsOnly = TRUE; else if (SETTING_IS_OFF(statsOnlyString)) statsOnly = FALSE; else apiErrAbort(err400, err400Msg, "unrecognized '%s=%s' argument, can only be =1 or =0", argStatsOnly, statsOnlyString); } struct sqlConnection *conn = hConnectCentral(); if (!sqlTableExists(conn, asmListTable)) apiErrAbort(err500, err500Msg, "missing central.assemblyList table in function /findGenome'%s'", extraArgs); int wordCount = 0; /* verify number of words in search string is legal */ wordCount = chopByWhite(searchString, NULL, 0); if (wordCount < 1) apiErrAbort(err400, err400Msg, "search term '%s' does not contain a word ? for function /findGenome", argGenomeSearchTerm); if (wordCount > 5) apiErrAbort(err400, err400Msg, "search term '%s=%s' should not have more than 5 words for function /findGenome", argGenomeSearchTerm, searchString); struct jsonWrite *jw = apiStartOutput(); -jsonWriteString(jw, argGenomeSearchTerm, searchString); jsonWriteString(jw, argBrowser, browserExistString); -if (allowAll) - jsonWriteBoolean(jw, argAllowAll, allowAll); long long itemCount = 0; long long totalMatchCount = 0; char **words; AllocArray(words, wordCount); (void) chopByWhite(searchString, words, wordCount); if (1 == wordCount) - itemCount = oneWordSearch(conn, words[0], jw, &totalMatchCount); + itemCount = oneWordSearch(conn, words[0], jw, &totalMatchCount, &prefixSearch); else /* multiple word search */ itemCount = multipleWordSearch(conn, words, wordCount, jw, &totalMatchCount); +if (prefixSearch) + { + struct dyString *addedStar = dyStringNew(64); + dyStringPrintf(addedStar, "%s*", inputSearchString); + jsonWriteString(jw, argGenomeSearchTerm, dyStringCannibalize(&addedStar)); + } +else + jsonWriteString(jw, argGenomeSearchTerm, inputSearchString); + /* rules about what can be in the search string: * + sign before a word indicates the word must be in the result * - sign before a word indicates it must not be in the result * * at end of word makes the word be a prefix search * "double quotes" to group words together as a phrase to match exactly * < or > adjust the words contribution to the relevance value * >moreImportant string); +dyStringFree(&query); + elapsedTime(jw); if (statsOnly) jsonWriteBoolean(jw, "statsOnly", TRUE); if (itemCount) { jsonWriteNumber(jw, "itemCount", itemCount); jsonWriteNumber(jw, "totalMatchCount", totalMatchCount); + jsonWriteNumber(jw, "availableAssemblies", universeCount); if (totalMatchCount > itemCount) jsonWriteBoolean(jw, "maxItemsLimit", TRUE); apiFinishOutput(0, NULL, jw); } else apiErrAbort(err400, err400Msg, "no genomes found matching search term %s='%s' for endpoint: /findGenome", argGenomeSearchTerm, inputSearchString); hDisconnectCentral(&conn); }