edef1384b0f93fc9c5abebba55ba9d943ab75a06 hiram Mon Sep 30 15:38:18 2024 -0700 now highlighting GenArk search terms in the results, thank you Angie for that template example, refs #33720 diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c index 7598059..fc6bed2 100644 --- src/hg/hgGateway/hgGateway.c +++ src/hg/hgGateway/hgGateway.c @@ -921,46 +921,103 @@ aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList); if (aHubMatchList == NULL) { // Try a looser query sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(concat('\t', label), concat('\t', hubUrl)))) from %s " "where track = '' and text like '%% %s%%'", hubSearchTableName, term); hubDbUrlList = sqlQuickList(conn, query); aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList); } } hDisconnectCentral(&conn); return aHubMatchList; } -static void writeGenarkMatches(struct jsonWrite *jw, struct gHubMatch *gHubMatchList) +static char *genarkBold(char *target, char *term) +/* given a string 'term', find it in target and add <b> </b> around it + * similar to boldTerm() for dbDb match highlighting */ +{ +int termLen = strlen(term); +int targetLen = strlen(target); +int resultSize = targetLen + strlen("<b></b>") + 1; +char result[resultSize]; +char *p = result; +int size = sizeof(result); +char *leftP = strstrNoCase(target, term); +if (leftP) /* found term in target */ + { + size_t offset = (size_t) (leftP - target); + safeAddN(&p, &size, target, offset); + safeAdd(&p, &size, "<b>"); + safeAddN(&p, &size, target+offset, termLen); + safeAdd(&p, &size, "</b>"); + safeAdd(&p, &size, target+offset+termLen); + } +else + safeAdd(&p, &size, target); + +return cloneStringZ(result, resultSize); +} + +static void writeGenarkMatches(struct jsonWrite *jw, struct gHubMatch *gHubMatchList, char *searchString) /* Write out JSON for each genark hub that matched the users term */ { +/* get the search terms in an array for bold highlighting */ +/* beware, the chopByWhite cannabilizes the string, thus the cloneString() */ +char *searchCopy = cloneString(searchString); +int searchWordCount = chopByWhite(searchCopy, NULL, 0); /* no cannabilizing yet */ +char **searchWords; +AllocArray(searchWords, searchWordCount); +(void) chopByWhite(searchCopy, searchWords, searchWordCount); +/* eliminate the potential extra characters before and aft on these words */ +for (int i = 0; i < searchWordCount; ++i) + { + /* remove trailing " or * characters, or beginning " + - characters */ + if ('"' == lastChar(searchWords[i]) || '*' == lastChar(searchWords[i])) + searchWords[i][strlen(searchWords[i])-1] = '\0'; + else if ('"' == searchWords[i][0] || + '+' == searchWords[i][0] || + '-' == searchWords[i][0] ) + searchWords[i] += 1; + } + struct gHubMatch *gHubMatch; for (gHubMatch = gHubMatchList; gHubMatch != NULL; gHubMatch = gHubMatch->next) { + char *comBold = genarkBold(gHubMatch->commonName, searchWords[0]); + char *sciBold = genarkBold(gHubMatch->scientificName, searchWords[0]); + for (int i = 1; i < searchWordCount; ++i) + { + char *savePtr = comBold; + comBold = genarkBold(savePtr, searchWords[i]); + freeMem(savePtr); + savePtr = sciBold; + sciBold = genarkBold(savePtr, searchWords[i]); + freeMem(savePtr); + } + jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "genome", gHubMatch->gcAccession); jsonWriteString(jw, "db", gHubMatch->asmName); jsonWriteString(jw, "hubUrl", gHubMatch->hubUrl); jsonWriteString(jw, "scientificName", gHubMatch->scientificName); // Add a category label for customized autocomplete-with-categories. jsonWriteString(jw, "category", "UCSC GenArk - bulk-annotated assemblies from NCBI Genbank/RefSeq"); jsonWriteString(jw, "value", gHubMatch->asmName); // Use just the db as label, since shortLabel is included in the category label. - jsonWriteStringf(jw, "label", "%s - %s", gHubMatch->commonName, gHubMatch->scientificName); + jsonWriteStringf(jw, "label", "%s - %s", comBold, sciBold); jsonWriteObjectEnd(jw); } } /* maximum limit of how many matches to display from genark */ #define GENARK_LIMIT 20 static struct gHubMatch *filterGenarkMatches(char *genarkHubUrl, struct genark *matchList) /* Turn the sql results into a struct gHubMatch list */ { struct genark *match; struct gHubMatch *ret = NULL; int c = 0; for (match = matchList; match != NULL; match = match->next) @@ -1009,65 +1066,68 @@ sqlFreeResult(&sr); if (ret) slReverse(&ret); return ret; } /* static struct gHubMatch *filterAssemblyListMatche */ static struct gHubMatch *searchGenark(char *term) /* Search through the genark table (or assemblyList table) for hubs matches term */ { char *genarkPrefix = cfgOption("genarkHubPrefix"); if (genarkPrefix == NULL) return NULL; +/* the chopByWhite is going to cannabilize the term string, make a copy */ +char *termCopy = cloneString(term); + struct sqlConnection *conn = hConnectCentral(); struct gHubMatch *gHubMatchList = NULL; char *genarkTbl = genarkTableName(); int colCount = genArkColumnCount(); -int termLength = strlen(term); +int termLength = strlen(termCopy); char *asmListTable = assemblyListTableName(); /* only allow the asmList query when the search term is more than 2 letters */ if ((termLength > 2) && sqlTableExists(conn, asmListTable)) { - int wordCount = chopByWhite(term, NULL, 0); + int wordCount = chopByWhite(termCopy, NULL, 0); if (1 == wordCount) { struct dyString *query = dyStringNew(64); - sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1", asmListTable, term); + sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1", asmListTable, termCopy); long long matchCount = sqlQuickLongLong(conn, query->string); dyStringFree(&query); boolean wildCard = FALSE; if (0 == matchCount) /* try prefix search */ { query = dyStringNew(64); - sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1", asmListTable, term); + sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1", asmListTable, termCopy); matchCount = sqlQuickLongLong(conn, query->string); dyStringFree(&query); if (matchCount > 0) wildCard = TRUE; } if (matchCount > 0) - gHubMatchList = filterAssemblyListMatches(conn, asmListTable, term, genarkPrefix, wildCard); + gHubMatchList = filterAssemblyListMatches(conn, asmListTable, termCopy, genarkPrefix, wildCard); } /* 1 == wordCout single word search */ else { /* multiple word search */ - char *matchAllWords = asmListMatchAllWords(term); + char *matchAllWords = asmListMatchAllWords(termCopy); gHubMatchList = filterAssemblyListMatches(conn, asmListTable, matchAllWords, genarkPrefix, FALSE); } /* multiple word search */ } /* termLength > 2 */ else if (sqlTableExists(conn, genarkTbl)) { char query[1024]; if (colCount > 6) { sqlSafef(query, sizeof(query), "select * from %s where " "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by priority", genarkTbl, term, term, term, term); } else { sqlSafef(query, sizeof(query), "select * from %s where " @@ -1107,32 +1167,32 @@ puts("Content-Type:text/javascript\n"); // Before accessing hubs, intialize udc cache location from hg.conf: setUdcCacheDir(); struct dbDb *dbDbList = hDbDbList(); struct dbDbMatch *matchList = searchDbDb(dbDbList, term); struct gHubMatch *gHubMatchList = searchGenark(term); struct aHubMatch *aHubMatchList = searchPublicHubs(dbDbList, term); struct jsonWrite *jw = jsonWriteNew(); jsonWriteListStart(jw, NULL); // Write out JSON for dbDb matches, if any; add category if we found assembly hub matches too. char *category = aHubMatchList ? "UCSC Genome Browser assemblies - annotation tracks curated by UCSC" : NULL; struct dbDbMatch *match; for (match = matchList; match != NULL; match = match->next) writeDbDbMatch(jw, match, term, category); -// Write out genark matches, if any -writeGenarkMatches(jw, gHubMatchList); +// Write out genark matches, if any, pass term so the matches can be highlighted +writeGenarkMatches(jw, gHubMatchList, term); // Write out assembly hub matches, if any. writeAssemblyHubMatches(jw, aHubMatchList); jsonWriteListEnd(jw); puts(jw->dy->string); jsonWriteFree(&jw); } int main(int argc, char *argv[]) /* Process CGI / command line. */ { /* Null terminated list of CGI Variables we don't want to save * permanently. */ char *excludeVars[] = {SEARCH_TERM, CARTJSON_COMMAND, NULL,}; cgiSpoof(&argc, argv); measureTiming = cgiOptionalInt("measureTiming", 0);