6bd1b2f193f4cc37ac89a76582cb432e40988574 hiram Thu Sep 12 15:42:26 2024 -0700 better management of the addition of the compound words refs #32596 diff --git src/hg/hubApi/findGenome.c src/hg/hubApi/findGenome.c index 9876e79..a2bc3a4 100644 --- src/hg/hubApi/findGenome.c +++ src/hg/hubApi/findGenome.c @@ -99,59 +99,70 @@ jsonWriteObjectEnd(jw); ++itemCount; } return (itemCount); } /* MySQL FULLTEXT indexing has indexed 'words' as broken up by * word break characters, such as in the regular expression: '\W+' * or, in this case, checking the string with isalnum() function, * must all be isalnum() * Return: TRUE when there are word breaks * FALSE - the string is all one 'word' */ static boolean hasWordBreaks(char *s) /* Return TRUE if there is any word break in string. - * allowing characters _ * + - as those are special characters + * allowing - and + characters as first since those are + * special characters to the MySQL FULLTEXT search + * after that, allowing _ * + - as those are special characters * to the MySQL FULLTEXT search * The string has already been checked for the special prefix * characters of: " - + * or the special end character of: * */ { char c; +if (startsWith("-", s) || startsWith("+", s)) + s++; while ((c = *s++) != 0) { - if (c == '_' || c == '*' || c == '+' || c == '-') + if (c == '_') /* TBD: maybe dot . and apostrophe ' */ continue; if (! isalnum(c)) return TRUE; } return FALSE; } static char *quoteWords(char *s) /* given a string with word break characters, break it up into * a quoted string with the word break characters turned to single space */ { +char c; struct dyString *quoteString = dyStringNew(128); +/* start with the special MySQL characters if present at the beginning */ +if (startsWith("-", s) || startsWith("+", s)) + { + c = *s++; + dyStringPrintf(quoteString, "%c", c); + } +/* then continue with the " to start the quoted string */ dyStringPrintf(quoteString, "\""); -char c; int spaceCount = 0; while ((c = *s++) != 0) - if (isalnum(c) || c == '_' || c == '*' || c == '+' || c == '-') + if (isalnum(c) || c == '_') /* TBD: maybe dot . and apostrophe ' */ { dyStringPrintf(quoteString, "%c", c); spaceCount = 0; } else { if (spaceCount) continue; dyStringPrintf(quoteString, " "); ++spaceCount; } dyStringPrintf(quoteString, "\""); return dyStringCannibalize("eString); } @@ -441,37 +452,32 @@ if (isNotEmpty(refSeqCategory)) jsonWriteString(jw, argCategory, refSeqCategory); if (isNotEmpty(versionStatus)) jsonWriteString(jw, argStatus, versionStatus); if (isNotEmpty(assemblyLevel)) jsonWriteString(jw, argLevel, assemblyLevel); long long itemCount = 0; long long totalMatchCount = 0; char **words; AllocArray(words, wordCount); (void) chopByWhite(searchString, words, wordCount); if (1 == wordCount) { boolean doQuote = TRUE; - if (startsWith("\"", words[0])) - doQuote = FALSE; - if (startsWith("-", words[0])) - doQuote = FALSE; - if (startsWith("+", words[0])) - doQuote = FALSE; - if (endsWith(words[0], "*")) + /* already quoted, let it go as-is */ + if (startsWith("\"", words[0]) && endsWith(words[0],"\"")) doQuote = FALSE; if (doQuote && hasWordBreaks(words[0])) { char *quotedWords = quoteWords(words[0]); endResultSearchString = quotedWords; itemCount = oneWordSearch(conn, quotedWords, jw, &totalMatchCount, &prefixSearch); } else { itemCount = oneWordSearch(conn, words[0], jw, &totalMatchCount, &prefixSearch); } } else /* multiple word search */ itemCount = multipleWordSearch(conn, words, wordCount, jw, &totalMatchCount); if (prefixSearch) {