02b6efbee6c2613baa5294a4345458305b25d9d7 larrym Fri Mar 16 01:11:07 2012 -0700 use FULLTEXT index for t2g keyword searching diff --git src/hg/hgTracks/simpleTracks.c src/hg/hgTracks/simpleTracks.c index 5264393..32ca276 100644 --- src/hg/hgTracks/simpleTracks.c +++ src/hg/hgTracks/simpleTracks.c @@ -12128,143 +12128,110 @@ // end stuff copied from hgTracks.c enum trackVisibility trackVis = track->tdb->visibility; if (s != NULL) trackVis = hTvFromString(s); if (trackVis != tvHide) { track->visibility = tvDense; track->limitedVis = tvDense; track->limitedVisSet = TRUE; } track->nextItemButtonable = track->nextExonButtonable = FALSE; track->nextPrevItem = NULL; track->nextPrevExon = NULL; } -static void tokenizeAndAddToHash(struct hash *hash, char *str) -{ -// Pull all words out of a string and add them to an existence hash. -char *s; -str = htmlTextReplaceTagsWithChar(str, ' '); - -// strip out chars that crash kxTokenize -for(s = str; *s; s++) - { - if(*s < 32 || !isalnum(*s)) - *s = ' '; - } - -struct kxTok *kx = kxTokenize(str, FALSE); -for( ; kx != NULL; kx = kx->next) - { - char *str = kx->string; - toLowerN(str, strlen(str)); - hashAddInt(hash, str, 1); - } -} - char* t2gArticleTable(struct track *tg) /* return the name of the t2g articleTable, either * the value from the trackDb statement 'articleTable' * or the default value: <trackName>Article */ { char *articleTable = trackDbSetting(tg->tdb, "t2gArticleTable"); if (articleTable == NULL) { char buf[256]; safef(buf, sizeof(buf), "%sArticle", tg->track); articleTable = cloneString(buf); } return articleTable; } +static char *makeMysqlMatchStr(char *str) +{ +// return a string with all words prefixed with a '+' to force a boolean AND query +char *matchStr = needMem(strlen(str) * 2 + 1); +int i = 0; +while(*str) + { + for(;*str && isspace(*str);str++) + matchStr[i++] = *str; + if(*str) + { + matchStr[i++] = '+'; + for(; *str && !isspace(*str);str++) + matchStr[i++] = *str; + } + } +matchStr[i++] = 0; +return matchStr; +} + static void t2gLoadItems(struct track *tg) /* apply filter to t2g items */ { struct sqlConnection *conn = hAllocConn(database); -char *keyWords = cartOptionalString(cart, "t2gKeywords"); +char *keywords = cartOptionalString(cart, "t2gKeywords"); char *yearFilter = cartOptionalString(cart, "t2gYear"); char *articleTable = t2gArticleTable(tg); - -if(isEmpty(yearFilter)) +if(yearFilter != NULL && sameWord(yearFilter, "anytime")) + yearFilter = NULL; +if(isEmpty(yearFilter) && isEmpty(keywords)) loadGappedBed(tg); else { - // code based on loadGappedBed - char extra[256]; + char extra[2048], yearWhere[256], keywordsWhere[1024], prefix[256]; char **row; int rowOffset; struct linkedFeatures *lfList = NULL; struct trackDb *tdb = tg->tdb; int scoreMin = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMin", "0")); int scoreMax = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMax", "1000")); boolean useItemRgb = bedItemRgb(tdb); - safef(extra, sizeof(extra), "name in (select displayId from %s where %s.year >= '%s')", articleTable, articleTable, sqlEscapeString(yearFilter)); + + safef(prefix, sizeof(prefix), "name IN (SELECT displayId FROM %s WHERE", articleTable); + if(isNotEmpty(keywords)) + safef(keywordsWhere, sizeof(keywordsWhere), "MATCH (citation, title, authors, abstract) AGAINST ('%s' IN BOOLEAN MODE)", makeMysqlMatchStr(sqlEscapeString(keywords))); + if(isNotEmpty(yearFilter)) + safef(yearWhere, sizeof(yearWhere), "year >= '%s'", sqlEscapeString(yearFilter)); + if(isEmpty(keywords)) + safef(extra, sizeof(extra), "%s %s)", prefix, yearWhere); + else if(isEmpty(yearFilter)) + safef(extra, sizeof(extra), "%s %s)", prefix, keywordsWhere); + else + safef(extra, sizeof(extra), "%s %s AND %s)", prefix, yearWhere, keywordsWhere); struct sqlResult *sr = hExtendedRangeQuery(conn, tg->table, chromName, winStart, winEnd, extra, FALSE, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct bed *bed = bedLoad12(row+rowOffset); slAddHead(&lfList, bedMungToLinkedFeatures(&bed, tdb, 12, scoreMin, scoreMax, useItemRgb)); } sqlFreeResult(&sr); slReverse(&lfList); slSort(&lfList, linkedFeaturesCmp); tg->items = lfList; } - -if(isNotEmpty(keyWords)) - { - struct linkedFeatures *lf, *next, *newList = NULL; - for( lf = tg->items; lf != NULL; lf = next) - { - char query[512]; - struct sqlResult *sr; - char **row; - next = lf->next; - lf->next = NULL; - - // we should consider doing this more efficiently using a FULLTEXT based search in above item loading code. - safef(query, sizeof(query), "select authors, title, citation, abstract from %s where displayId = '%s'", articleTable, lf->name); - sr = sqlGetResult(conn, query); - if ((row = sqlNextRow(sr)) != NULL) - { - struct hash *hash = newHash(0); - boolean pass = TRUE; - struct kxTok *kx; - - tokenizeAndAddToHash(hash, row[0]); - tokenizeAndAddToHash(hash, row[1]); - tokenizeAndAddToHash(hash, row[2]); - tokenizeAndAddToHash(hash, row[3]); - - // we pass articles where keywords is a subset of words in article metadata. - kx = kxTokenize(keyWords, FALSE); - for( ; pass && kx != NULL; kx = kx->next) - { - toLowerN(kx->string, strlen(kx->string)); - pass = hashLookup(hash, kx->string) != NULL; - } - if(pass) - slAddTail(&newList, lf); - } - else - errAbort("Couldn't find article with displayId: '%s'", lf->name); - sqlFreeResult(&sr); - } - tg->items = newList; - } hFreeConn(&conn); } static void t2gMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* create mouse overs with titles for t2g bed features */ { if(!theImgBox || tg->limitedVis != tvDense || !tdbIsCompositeChild(tg->tdb)) { char query[1024], title[4096]; char *label = NULL; char *articleTable = t2gArticleTable(tg); if(!isEmpty(articleTable)) {