cc1669639a750c73b05f85272a97129de52abae2
angie
Mon Oct 10 15:40:06 2022 -0700
Don't limit search of dbDb.description to only terms starting with GC; instead, restrict search to the part within parentheses (skipping the assembly date) and restrict searches that would match too many institutions like Broad, WUGSC, Baylor, Agencourt unless people type in more letters of those.
refs #30108
diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c
index 56b81ef..45f4c14 100644
--- src/hg/hgGateway/hgGateway.c
+++ src/hg/hgGateway/hgGateway.c
@@ -495,30 +495,38 @@
INLINE void safeAdd(char **pDest, int *pSize, char *src)
/* Copy src into dest. Subtract len from *pSize and add len to *pDest,
* for building up a string bit by bit. */
{
safeAddN(pDest, pSize, src, strlen(src));
}
static char *boldTerm(char *target, char *term, int offset, enum dbDbMatchType type)
/* Return a string with term swapped in for term at offset.
* If offset is negative and type is ddmtSciName, treat term as an abbreviated species
* name (term = "G. species" vs. target = "Genus species"): bold the first letter of the
* genus and the matching portion of the species. */
{
int termLen = strlen(term);
int targetLen = strlen(target);
+if (type == ddmtDescription)
+ {
+ // Search of dbDb->description skips the date that precedes the actual description which is
+ // in parentheses. Adjust offset accordingly.
+ char *leftP = strchr(target, '(');
+ if (leftP)
+ offset += (leftP+1 - target);
+ }
if (offset + termLen > targetLen)
errAbort("boldTerm: invalid offset (%d) for term '%s' (length %d) in target '%s' (length %d)",
offset, term, termLen, target, targetLen);
else if (offset < 0 && type != ddmtSciName)
errAbort("boldTerm: negative offset (%d) given for type %d", offset, type);
// Allocate enough to have two bolded chunks:
int resultSize = targetLen + 2*strlen("") + 1;
char result[resultSize];
char *p = result;
int size = sizeof(result);
if (offset >= 0)
{
// The part of target before the term:
safeAddN(&p, &size, target, offset);
// The bolded term:
@@ -685,34 +693,41 @@
static struct dbDbMatch *searchDbDb(struct dbDb *dbDbList, char *term)
/* Search various fields of dbDb for matches to term and sort by relevance. */
{
struct dbDbMatch *matchList = NULL;
struct hash *matchHash = hashNew(0);
struct dbDb *dbDb;
for (dbDb = dbDbList; dbDb != NULL; dbDb = dbDb->next)
{
checkTerm(term, dbDb->name, ddmtDb, dbDb, matchHash, &matchList);
// Skip experimental stuff on hgwdev with bogus taxId unless the db name matches term.
if (dbDb->taxId >= 2)
{
checkTerm(term, dbDb->genome, ddmtGenome, dbDb, matchHash, &matchList);
checkTerm(term, dbDb->scientificName, ddmtSciName, dbDb, matchHash, &matchList);
}
- // dbDb.description is a little too much for autocomplete ("br" would match dozens
- // of Broad assemblies), but we do need to recognize "GRC".
- if (startsWith("GR", term))
- checkTerm(term, dbDb->description, ddmtDescription, dbDb, matchHash, &matchList);
+ // dbDb.description has dozens of matches for some institutions like Broad, so suppress
+ // it for search terms that would get too many probably unwanted matches.
+ if (! (startsWith(term, "BRO") || startsWith(term, "WU") || startsWith(term, "BAY") ||
+ startsWith(term, "AGE")))
+ {
+ // dbDb.description also starts with dates followed by actual description in parentheses,
+ // so search only the part in parentheses to avoid month prefix matches.
+ char *leftP = strchr(dbDb->description, '(');
+ char *toSearch = leftP ? leftP+1 : dbDb->description;
+ checkTerm(term, toSearch, ddmtDescription, dbDb, matchHash, &matchList);
+ }
}
slSort(&matchList, dbDbMatchCmp);
return matchList;
}
// Assembly hub match:
struct aHubMatch
// description of an assembly hub db
{
struct aHubMatch *next;
char *shortLabel; // hub shortLabel
char *hubUrl; // hub url
char *aDb; // assembly db hosted by hub
char *label; // label for this db