67ce69b2d6be585fcad3cb0dd5fc927e95c327ff chmalee Mon Oct 14 13:48:54 2019 -0700 Rework of hgHubConnect hub searching to not use trackDb or udc so searches can be sped up. The hubSearchText table now has an extra column for the parent track names (if any) of a search result to a track. hgHubConnect has been changed to use this field of the table instead of using trackDb. hubCrawl has been changed to generate this additional column, refs #23812 diff --git src/hg/lib/hubSearchText.c src/hg/lib/hubSearchText.c index 2ad3045..d85e08e 100644 --- src/hg/lib/hubSearchText.c +++ src/hg/lib/hubSearchText.c @@ -1,129 +1,133 @@ /* hubSearchText.c was originally generated by the autoSql program, which also * generated hubSearchText.h and hubSearchText.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "rangeTree.h" #include "hubSearchText.h" -char *hubSearchTextCommaSepFieldNames = "hubUrl,db,track,textLength,text"; +char *hubSearchTextCommaSepFieldNames = "hubUrl,db,track,label,parents,textLength,text"; /* definitions for textLength column */ static char *values_textLength[] = {"Short", "Long", "Meta", NULL}; static struct hash *valhash_textLength = NULL; void hubSearchTextStaticLoadWithNull(char **row, struct hubSearchText *ret) /* Load a row from hubSearchText table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->hubUrl = row[0]; ret->db = row[1]; ret->track = row[2]; ret->label = row[3]; -ret->textLength = sqlEnumParse(row[4], values_textLength, &valhash_textLength); -ret->text = row[5]; +ret->parents = row[4]; +ret->textLength = sqlEnumParse(row[5], values_textLength, &valhash_textLength); +ret->text = row[6]; } struct hubSearchText *hubSearchTextLoadWithNull(char **row) /* Load a hubSearchText from row fetched with select * from hubSearchText * from database. Dispose of this with hubSearchTextFree(). */ { struct hubSearchText *ret; AllocVar(ret); ret->hubUrl = cloneString(row[0]); ret->db = cloneString(row[1]); ret->track = cloneString(row[2]); ret->label = cloneString(row[3]); -ret->textLength = sqlEnumParse(row[4], values_textLength, &valhash_textLength); -ret->text = cloneString(row[5]); +ret->parents = cloneString(row[4]); +ret->textLength = sqlEnumParse(row[5], values_textLength, &valhash_textLength); +ret->text = cloneString(row[6]); return ret; } struct hubSearchText *hubSearchTextLoadAll(char *fileName) /* Load all hubSearchText from a whitespace-separated file. * Dispose of this with hubSearchTextFreeList(). */ { struct hubSearchText *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); -char *row[6]; +char *row[7]; while (lineFileRow(lf, row)) { el = hubSearchTextLoadWithNull(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct hubSearchText *hubSearchTextLoadAllByChar(char *fileName, char chopper) /* Load all hubSearchText from a chopper separated file. * Dispose of this with hubSearchTextFreeList(). */ { struct hubSearchText *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); -char *row[6]; +char *row[7]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = hubSearchTextLoadWithNull(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct hubSearchText *hubSearchTextCommaIn(char **pS, struct hubSearchText *ret) /* Create a hubSearchText out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new hubSearchText */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->hubUrl = sqlStringComma(&s); ret->db = sqlStringComma(&s); ret->track = sqlStringComma(&s); ret->label = sqlStringComma(&s); +ret->parents = sqlStringComma(&s); ret->textLength = sqlEnumComma(&s, values_textLength, &valhash_textLength); ret->text = sqlStringComma(&s); *pS = s; return ret; } void hubSearchTextFree(struct hubSearchText **pEl) /* Free a single dynamically allocated hubSearchText such as created * with hubSearchTextLoad(). */ { struct hubSearchText *el; if ((el = *pEl) == NULL) return; freeMem(el->hubUrl); freeMem(el->db); freeMem(el->track); freeMem(el->label); +freeMem(el->parents); freeMem(el->text); freez(pEl); } void hubSearchTextFreeList(struct hubSearchText **pList) /* Free a list of dynamically allocated hubSearchText's */ { struct hubSearchText *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; hubSearchTextFree(&el); } *pList = NULL; @@ -137,41 +141,46 @@ if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->db); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->track); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->label); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->parents); +if (sep == ',') fputc('"',f); +fputc(sep,f); +if (sep == ',') fputc('"',f); sqlEnumPrint(f, el->textLength, values_textLength); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->text); if (sep == ',') fputc('"',f); fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ + /* Restrictions on the size of context chunks returned by getTextContext() */ #define HST_MAXCONTEXTLENGTH 300 #define HST_CONTEXTRADIUS 70 static char *getTextContext(char *text, char *searchTerms) /* Look for instances of words from searchTerms in the supplied text, grab some surrounding * context, and stitch them together into a string that combines overlapping regions. */ { struct dyString *contextString = dyStringNew(0); struct rbTree *contextTree = rangeTreeNew(); char *splitTerms[1024]; char *cloneTerms = cloneString(searchTerms); if (isNotEmpty(cloneTerms)) strLower(cloneTerms); char *lowText = cloneString(text);