104810c1ffc3b9350af01e2f97f8e58328b9434d tdreszer Mon Mar 28 12:33:39 2011 -0700 Addressed a bug in description searches and slightly streamlined. diff --git src/hg/hgFileSearch/hgFileSearch.c src/hg/hgFileSearch/hgFileSearch.c index 8dd6b68..f24d423 100644 --- src/hg/hgFileSearch/hgFileSearch.c +++ src/hg/hgFileSearch/hgFileSearch.c @@ -1,562 +1,596 @@ #include "common.h" #include "hash.h" #include "cheapcgi.h" #include "htmshell.h" #include "jsHelper.h" #include "trackDb.h" #include "hdb.h" #include "web.h" #include "mdb.h" #include "hCommon.h" #include "hui.h" #include "fileUi.h" #include "searchTracks.h" #include "cart.h" #include "grp.h" #define FAKE_MDB_MULTI_SELECT_SUPPORT struct hash *trackHash = NULL; // Is this needed? boolean measureTiming = FALSE; /* DON'T EDIT THIS -- use CGI param "&measureTiming=." . */ #define FILE_SEARCH "hgfs_Search" #define FILE_SEARCH_FORM "fileSearch" #define FILE_SEARCH_CURRENT_TAB "fsCurTab" #define FILE_SEARCH_ON_FILETYPE "fsFileType" // These are common with trackSearch. Should they be? #define TRACK_SEARCH_SIMPLE "tsSimple" #define TRACK_SEARCH_ON_NAME "tsName" #define TRACK_SEARCH_ON_GROUP "tsGroup" #define TRACK_SEARCH_ON_DESCR "tsDescr" #define TRACK_SEARCH_SORT "tsSort" #define SUPPORT_COMPOSITE_SEARCH #ifdef SUPPORT_COMPOSITE_SEARCH //#define USE_TABS #endif///def SUPPORT_COMPOSITE_SEARCH #ifdef SUPPORT_COMPOSITE_SEARCH // make a matchString function to support "contains", "is" etc. and wildcards in contains // ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || #define MATCH_ON_EACH_WORD #ifdef MATCH_ON_EACH_WORD +#define MATCH_ON_WILDS +static boolean matchToken(char *string, char *token) +{ +if (string == NULL) + return (token == NULL); +if (token == NULL) + return TRUE; + +if (!strchr(token,'*') && !strchr(token,'?')) + return (strcasestr(string,token) != NULL); + +#ifdef MATCH_ON_WILDS +char wordWild[1024]; +safef(wordWild,sizeof wordWild,"*%s*",token); +return wildMatch(wordWild, string); + +// do this with regex ? Would require all sorts of careful parsing for ()., etc. +//safef(wordWild,sizeof wordWild,"^*%s*$",token); +//regex_t regEx; +//int err = regcomp(®Ex, token, REG_NOSUB | REG_ICASE); +//if(err != 0) // Compile the regular expression so that it can be used. Use: REG_EXTENDED ? +// { +// char buffer[128]; +// regerror(err, ®Ex, buffer, sizeof buffer); +// warn("ERROR: Invalid regular expression: [%s] %s\n",token,buffer); +// regfree(®Ex); +// return FALSE; +// } +//err = regexec(®Ex, mdbVar->val, 0, NULL, 0); +//regfree(®Ex); +//return (err == 0); + +#endif//def MATCH_ON_WILDS +} + static boolean doesNameMatch(struct trackDb *tdb, struct slName *wordList) // We parse str and look for every word at the start of any word in track description (i.e. google style). { if (tdb->html == NULL) return (wordList != NULL); struct slName *word = wordList; for(; word != NULL; word = word->next) { - char wordWild[256]; - safef(wordWild,sizeof wordWild,"*%s*",word->name); - if (!wildMatch(wordWild, tdb->shortLabel) - && !wildMatch(wordWild, tdb->longLabel)) + if (!matchToken(tdb->shortLabel,word->name) + && !matchToken(tdb->longLabel, word->name)) return FALSE; } return TRUE; } static boolean doesDescriptionMatch(struct trackDb *tdb, struct slName *wordList) // We parse str and look for every word at the start of any word in track description (i.e. google style). { +//static boolean tryitOneCycle=TRUE; if (tdb->html == NULL) return (wordList != NULL); +if (strchr(tdb->html,'\n')) + strSwapChar(tdb->html,'\n',' '); // DANGER: don't own memory. However, this CGI will use html for no other purpose + struct slName *word = wordList; for(; word != NULL; word = word->next) { - char wordWild[256]; - safef(wordWild,sizeof wordWild,"*%s*",word->name); - if (!wildMatch(wordWild, tdb->html)) + if (!matchToken(tdb->html,word->name)) return FALSE; } return TRUE; } #endif///def MATCH_ON_EACH_WORD static struct trackDb *tdbFilterBy(struct trackDb **pTdbList, char *name, char *description, char *group) // returns tdbs that match supplied criterion, leaving unmatched in list passed in { #ifdef MATCH_ON_EACH_WORD // Set the word lists up once struct slName *nameList = NULL; if (name) nameList = slNameListOfUniqueWords(cloneString(name),TRUE); // TRUE means respect quotes struct slName *descList = NULL; if (description) descList = slNameListOfUniqueWords(cloneString(description),TRUE); #endif///def MATCH_ON_EACH_WORD struct trackDb *tdbList = *pTdbList; struct trackDb *tdbRejects = NULL; struct trackDb *tdbMatched = NULL; #ifndef MATCH_ON_EACH_WORD char nameWild[256]; if (name) safef(nameWild,sizeof nameWild,"*%s*",name); char descWild[512]; if (description) safef(descWild,sizeof descWild,"*%s*",description); #endif///ndef MATCH_ON_EACH_WORD while (tdbList != NULL) { struct trackDb *tdb = slPopHead(&tdbList); if (!tdbIsComposite(tdb)) slAddHead(&tdbRejects,tdb); else if (group && differentString(tdb->grp,group)) slAddHead(&tdbRejects,tdb); #ifdef MATCH_ON_EACH_WORD else if (name && !doesNameMatch(tdb, nameList)) slAddHead(&tdbRejects,tdb); else if (description && !doesDescriptionMatch(tdb, descList)) slAddHead(&tdbRejects,tdb); #else///ifndef MATCH_ON_EACH_WORD else if (name && (!wildMatch(nameWild,tdb->shortLabel) && !wildMatch(nameWild,tdb->longLabel))) slAddHead(&tdbRejects,tdb); else if (description && (tdb->html == NULL || !wildMatch(descWild,tdb->html))) slAddHead(&tdbRejects,tdb); #endif///ndef MATCH_ON_EACH_WORD else slAddHead(&tdbMatched,tdb); } //slReverse(&tdbRejects); // Needed? //slReverse(&tdbMatched); // Needed? - *pTdbList = tdbRejects; //warn("matched %d tracks",slCount(tdbMatched)); return tdbMatched; } static boolean mdbSelectsAddFoundComposites(struct slPair **pMdbSelects,struct trackDb *tdbsFound) // Adds a composite mdbSelect (if found in tdbsFound) to the head of the pairs list. // If tdbsFound is NULL, then add dummy composite search criteria { // create comma separated list of composites struct dyString *dyComposites = dyStringNew(256); struct trackDb *tdb = tdbsFound; for(;tdb != NULL; tdb = tdb->next) { if (tdbIsComposite(tdb)) dyStringPrintf(dyComposites,"%s,",tdb->track); else if (tdbIsCompositeChild(tdb)) { struct trackDb *composite = tdbGetComposite(tdb); dyStringPrintf(dyComposites,"%s,",composite->track); } } if (dyStringLen(dyComposites) > 0) { char *composites = dyStringCannibalize(&dyComposites); composites[strlen(composites) - 1] = '\0'; // drop the last ',' //warn("Found composites: %s",composites); slPairAdd(pMdbSelects,MDB_VAR_COMPOSITE,composites); // Composite should not already be in the list, because it is only indirectly sortable return TRUE; } //warn("No composites found"); dyStringFree(&dyComposites); return FALSE; } #endif///def SUPPORT_COMPOSITE_SEARCH #ifdef USE_TABS static struct slRef *simpleSearchForTdbs(struct trix *trix,char **descWords,int descWordCount) // Performs the simple search and returns the found tracks. { struct slRef *foundTdbs = NULL; struct trixSearchResult *tsList; for(tsList = trixSearch(trix, descWordCount, descWords, TRUE); tsList != NULL; tsList = tsList->next) { struct trackDb *tdb = (struct track *) hashFindVal(trackHash, tsList->itemId); if (track != NULL) // It is expected that this is NULL (e.g. when the trix references trackDb tracks which have no tables) { refAdd(&foundTdbs, tdb); } } return foundTdbs; } #endif///def USE_TABS struct slName *tdbListGetGroups(struct trackDb *tdbList) // Returns a list of groups found in the tdbList // FIXME: Should be movedf to trackDbCustom and shared { struct slName *groupList = NULL; char *lastGroup = "[]"; struct trackDb *tdb = tdbList; for(;tdb!=NULL;tdb=tdb->next) { if (differentString(lastGroup,tdb->grp)) lastGroup = slNameStore(&groupList, tdb->grp); } return groupList; } struct grp *groupsFilterForTdbList(struct grp **grps,struct trackDb *tdbList) { struct grp *grpList = *grps; *grps = NULL; struct slName *tdbGroups = tdbListGetGroups(tdbList); if (tdbList == NULL) return *grps; while (grpList != NULL) { struct grp *grp = slPopHead(&grpList); if (slNameInList(tdbGroups,grp->name)) slAddHead(grps,grp); } slNameFreeList(&tdbGroups); slReverse(grps); return *grps; } static void doFileSearch(char *db,char *organism,struct cart *cart,struct trackDb *tdbList) { if (!advancedJavascriptFeaturesEnabled(cart)) { warn("Requires advanced javascript features."); return; } struct sqlConnection *conn = hAllocConn(db); boolean metaDbExists = sqlTableExists(conn, "metaDb"); if (!sqlTableExists(conn, "metaDb")) { warn("Assembly %s %s does not support Downloadable Files search.", organism, hFreezeFromDb(db)); hFreeConn(&conn); return; } #ifdef SUPPORT_COMPOSITE_SEARCH char *nameSearch = cartOptionalString(cart, TRACK_SEARCH_ON_NAME); char *descSearch=NULL; #endif///def SUPPORT_COMPOSITE_SEARCH char *fileTypeSearch = cartOptionalString(cart, FILE_SEARCH_ON_FILETYPE); boolean doSearch = sameWord(cartUsualString(cart, FILE_SEARCH,"no"), "search"); #ifdef ONE_FUNC struct hash *parents = newHash(4); #endif///def ONE_FUNC boolean searchTermsExist = FALSE; // FIXME: Why is this needed? int cols; #ifdef SUPPORT_COMPOSITE_SEARCH #ifdef USE_TABS enum searchTab selectedTab = simpleTab; char *currentTab = cartUsualString(cart, FILE_SEARCH_CURRENT_TAB, "simpleTab"); if(sameString(currentTab, "simpleTab")) { selectedTab = simpleTab; descSearch = cartOptionalString(cart, TRACK_SEARCH_SIMPLE); freez(&nameSearch); } else if(sameString(currentTab, "filesTab")) { selectedTab = filesTab; descSearch = cartOptionalString(cart, TRACK_SEARCH_ON_DESCR); } #else///ifndef USE_TABS enum searchTab selectedTab = filesTab; descSearch = cartOptionalString(cart, TRACK_SEARCH_ON_DESCR); #endif///ndef USE_TABS #ifndef MATCH_ON_EACH_WORD if(descSearch) stripChar(descSearch, '"'); #endif///ndef MATCH_ON_EACH_WORD #ifdef USE_TABS struct trix *trix; char trixFile[HDB_MAX_PATH_STRING]; getSearchTrixFile(db, trixFile, sizeof(trixFile)); trix = trixOpen(trixFile); #endif///def USE_TABS #endif///def SUPPORT_COMPOSITE_SEARCH printf("
"); // FIXME: Do we need a form at all? //printf("
\n\n", hgTracksName(),FILE_SEARCH_FORM,FILE_SEARCH_FORM); printf("\n\n", FILE_SEARCH_FORM,FILE_SEARCH_FORM); cartSaveSession(cart); // Creates hidden var of hgsid to avoid bad voodoo //safef(buf, sizeof(buf), "%lu", clock1()); //cgiMakeHiddenVar("hgt_", buf); // timestamps page to avoid browser cache printf("\n", db); printf("\n",TRACK_SEARCH_DEL_ROW); printf("\n",TRACK_SEARCH_ADD_ROW); #ifdef SUPPORT_COMPOSITE_SEARCH #ifdef USE_TABS printf("\n", FILE_SEARCH_CURRENT_TAB, currentTab); printf("
"); // Restricts to max-width:1000px; if (measureTiming) uglyTime("Rendered tabs"); #ifdef USE_TABS if (doSearch && selectedTab==simpleTab && isEmpty(descSearch)) doSearch = FALSE; #endif///def USE_TABS if(doSearch) { // Now search #ifdef USE_TABS struct slRef *foundTdbs = NULL; if(selectedTab==simpleTab) { foundTdbs = simpleSearchForTdbs(trix,descWords,descWordCount); // What to do now? if (measureTiming) uglyTime("Searched for tracks"); // Sort and Print results if(selectedTab!=filesTab) { enum sortBy sortBy = cartUsualInt(cart,TRACK_SEARCH_SORT,sbRelevance); int tracksFound = slCount(foundTdbs); if(tracksFound > 1) findTracksSort(&tracks,sortBy); displayFoundTracks(cart,tracks,tracksFound,sortBy); if (measureTiming) uglyTime("Displayed found files"); } } else if(selectedTab==filesTab && mdbPairs != NULL) #endif///def USE_TABS { #ifdef SUPPORT_COMPOSITE_SEARCH if (nameSearch || descSearch || groupSearch) { // Use nameSearch, descSearch and groupSearch to narrow down the list of composites. if (isNotEmpty(nameSearch) || isNotEmpty(descSearch) || isNotEmpty(groupSearch)) { struct trackDb *tdbList = hTrackDb(db); struct trackDb *tdbsMatch = tdbFilterBy(&tdbList, nameSearch, descSearch, groupSearch); // Now we have a list of tracks, so we need a unique list of composites to add to mdbSelects doSearch = mdbSelectsAddFoundComposites(&mdbSelects,tdbsMatch); } } #endif///def SUPPORT_COMPOSITE_SEARCH if (doSearch && mdbSelects != NULL && isNotEmpty(fileTypeSearch)) fileSearchResults(db, conn, mdbSelects, fileTypeSearch); else printf("

No files found.

\n"); if (measureTiming) uglyTime("Searched for files"); } slPairFreeList(&mdbSelects); } hFreeConn(&conn); webNewSection("About Downloadable Files Search"); printf("

Search for downloadable ENCODE files by entering search terms in " "the Track name or Description fields and/or by making selections with " "the group, data format, and/or ENCODE metadata drop-downs. For exact " "matches, use quotes around your search terms."); printf("
more help

\n"); webEndSectionTables(); } void doMiddle(struct cart *cart) /* Write body of web page. */ { struct trackDb *tdbList = NULL; char *organism = NULL; char *db = NULL; getDbAndGenome(cart, &db, &organism, NULL); char *chrom = cartUsualString(cart, "c", hDefaultChrom(db)); measureTiming = isNotEmpty(cartOptionalString(cart, "measureTiming")); // QUESTION: Do We need track list ??? trackHash ??? Can't we just get one track and no children trackHash = trackHashMakeWithComposites(db,chrom,&tdbList,FALSE); cartWebStart(cart, db, "Search for Downloadable Files in the %s %s Assembly", organism, hFreezeFromDb(db)); webIncludeResourceFile("HGStyle.css"); webIncludeResourceFile("jquery-ui.css"); webIncludeResourceFile("ui.dropdownchecklist.css"); jsIncludeFile("jquery.js", NULL); jsIncludeFile("jquery-ui.js", NULL); //jsIncludeFile("ui.core.js",NULL); // NOTE: This appears to be not needed as long as jquery-ui.js comes before ui.dropdownchecklist.js jsIncludeFile("ui.dropdownchecklist.js",NULL); jsIncludeFile("utils.js",NULL); // This line is needed to get the multi-selects initialized //printf("\n"); printf("\n"); doFileSearch(db,organism,cart,tdbList); printf("
\n"); webEnd(); } char *excludeVars[] = { "submit", "Submit", "g", "ajax", FILE_SEARCH,TRACK_SEARCH_ADD_ROW,TRACK_SEARCH_DEL_ROW}; // HOW IS 'ajax" going to be supported? int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); htmlSetBackground(hBackgroundImage()); cartEmptyShell(doMiddle, hUserCookie(), excludeVars, NULL); return 0; } // TODO: // 1) Done: Limit to first 1000 // 2) SORT OF: Work out strangeness with dropdownchecklist and use in hgTracks (By some miracle multiselect is working in my hgTracks) // 3) Work out support for selecting composites and limiting search to those // 4) Work out simple verses advanced tabs // 5) work out support for non-encode downloads // 6) Make an hgTrackSearch to replces hgTracks track search ?? Silpler code, but may not be good idea.