59c4b5446987113a20a88c2860c469ae14dcc0b9 larrym Thu Jul 28 21:39:44 2011 -0700 switch to using in-memory json; refactor trackDbJson so it is part of hgTracks global in the client (redmine #4550) diff --git src/hg/hgTracks/searchTracks.c src/hg/hgTracks/searchTracks.c index d6a1790..6cb9905 100644 --- src/hg/hgTracks/searchTracks.c +++ src/hg/hgTracks/searchTracks.c @@ -1,852 +1,852 @@ /* Track search code used by hgTracks CGI */ #include "common.h" #include "search.h" #include "hCommon.h" #include "memalloc.h" #include "obscure.h" #include "dystring.h" #include "hash.h" #include "cheapcgi.h" #include "hPrint.h" #include "htmshell.h" #include "cart.h" #include "hgTracks.h" #include "web.h" #include "jksql.h" #include "hdb.h" #include "mdb.h" #include "fileUi.h" #include "trix.h" #include "jsHelper.h" #include "imageV2.h" #define TRACK_SEARCH_FORM "trackSearch" #define SEARCH_RESULTS_FORM "searchResults" #define TRACK_SEARCH_CURRENT_TAB "tsCurTab" #define TRACK_SEARCH_SIMPLE "tsSimple" #define TRACK_SEARCH_ON_NAME "tsName" #define TRACK_SEARCH_ON_TYPE "tsType" #define TRACK_SEARCH_ON_GROUP "tsGroup" #define TRACK_SEARCH_ON_DESCR "tsDescr" #define TRACK_SEARCH_SORT "tsSort" #define SUPPORT_QUOTES_IN_NAME_SEARCH static int gCmpGroup(const void *va, const void *vb) /* Compare groups based on label. */ { const struct group *a = *((struct group **)va); const struct group *b = *((struct group **)vb); return strcmp(a->label, b->label); } // Would like to do a radio button choice ofsorts enum sortBy { sbRelevance=0, sbAbc =1, sbHierarchy=2, }; static int gCmpTrackHierarchy(const void *va, const void *vb) /* Compare tracks based on longLabel. */ { const struct slRef *aa = *((struct slRef **)va); const struct slRef *bb = *((struct slRef **)vb); const struct track *a = ((struct track *) aa->val); const struct track *b = ((struct track *) bb->val); if ( tdbIsFolder(a->tdb) && !tdbIsFolder(b->tdb)) return -1; else if (!tdbIsFolder(a->tdb) && tdbIsFolder(b->tdb)) return 1; if ( tdbIsContainer(a->tdb) && !tdbIsContainer(b->tdb)) return -1; else if (!tdbIsContainer(a->tdb) && tdbIsContainer(b->tdb)) return 1; if (!tdbIsContainerChild(a->tdb) && tdbIsContainerChild(b->tdb)) return -1; else if ( tdbIsContainerChild(a->tdb) && !tdbIsContainerChild(b->tdb)) return 1; return strcasecmp(a->longLabel, b->longLabel); } static int gCmpTrack(const void *va, const void *vb) /* Compare tracks based on longLabel. */ { const struct slRef *aa = *((struct slRef **)va); const struct slRef *bb = *((struct slRef **)vb); const struct track *a = ((struct track *) aa->val); const struct track *b = ((struct track *) bb->val); return strcasecmp(a->longLabel, b->longLabel); } static void findTracksSort(struct slRef **pTrack, enum sortBy sortBy) { if (sortBy == sbHierarchy) slSort(pTrack, gCmpTrackHierarchy); else if (sortBy == sbAbc) slSort(pTrack, gCmpTrack); else slReverse(pTrack); } #ifndef SUPPORT_QUOTES_IN_NAME_SEARCH // XXXX make a matchString function to support "contains", "is" etc. and wildcards in contains // ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || static boolean isNameMatch(struct track *track, char *str, char *op) { return str && strlen(str) && ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || (sameString(op, "is") && !strcasecmp(track->longLabel, str)) || (sameString(op, "contains") && containsStringNoCase(track->shortLabel, str) != NULL) || (sameString(op, "contains") && containsStringNoCase(track->longLabel, str) != NULL)); } static boolean isDescriptionMatch(struct track *track, char **words, int wordCount) // We parse str and look for every word at the start of any word in track description (i.e. google style). { if(words) { // We do NOT lookup up parent hierarchy for html descriptions. char *html = track->tdb->html; if(!isEmpty(html)) { /* This probably could be made more efficient by parsing the html into some kind of b-tree, but I am assuming that the inner html loop while only happen for 1-2 words for vast majority of the tracks. */ int i, numMatches = 0; html = stripRegEx(html, "<[^>]*>", REG_ICASE); for(i = 0; i < wordCount; i++) { char *needle = words[i]; char *haystack, *tmp = cloneString(html); boolean found = FALSE; while((haystack = nextWord(&tmp))) { char *ptr = strstrNoCase(haystack, needle); if(ptr != NULL && ptr == haystack) { found = TRUE; break; } } if(found) numMatches++; else break; } if(numMatches == wordCount) return TRUE; } } return FALSE; } #endif///ndef SUPPORT_QUOTES_IN_NAME_SEARCH static int getFormatTypes(char ***pLabels, char ***pTypes) { char *crudeTypes[] = { ANYLABEL, "bam", "psl", "chain", "netAlign", "maf", "bed", "bigBed", "ctgPos", "expRatio", "genePred", "broadPeak", "narrowPeak", "rmsk", "bedGraph", "bigWig", "wig", "wigMaf" }; // Non-standard: // type altGraphX // type axt // type bed5FloatScore // type bed5FloatScoreWithFdr // type chromGraph // type clonePos // type coloredExon // type encodeFiveC // type factorSource // type ld2 // type logo // type maf // type sample // type wigMafProt 0.0 1.0 char *nicerTypes[] = { ANYLABEL, "Alignment binary (bam) - binary SAM", "Alignment Blast (psl) - Blast output", "Alignment Chains (chain) - Pairwise alignment", "Alignment Nets (netAlign) - Net alignments", "Alignments (maf) - multiple alignment format", "bed - browser extensible data", "bigBed - self index, often remote bed format", "ctgPos - Contigs", "expRatio - Expression ratios", "Genes (genePred) - Gene prediction and annotation", "Peaks Broad (broadPeak) - ENCODE large region peak format", "Peaks Narrow (narrowPeak) - ENCODE small region peak format", "Repeats (rmsk) - Repeat masking", "Signal (bedGraph) - graphically represented bed data", "Signal (bigWig) - self index, often remote wiggle format", "Signal (wig) - wiggle format", "Signal (wigMaf) - multiple alignment wiggle" }; int ix = 0, count = sizeof(crudeTypes)/sizeof(char *); char **labels; char **values; AllocArray(labels, count); AllocArray(values, count); for(ix=0;ixnext) { struct track *track = (struct track *) hashFindVal(trackHash, tsList->itemId); if (track != NULL) // It is expected that this is NULL (e.g. when the trix references trackDb tracks which have no tables) { refAdd(&tracks, track); } } return tracks; } static struct slRef *advancedSearchForTracks(struct sqlConnection *conn,struct group *groupList, char **descWords,int descWordCount, char *nameSearch, char *typeSearch, char *descSearch, char *groupSearch, struct slPair *mdbPairs) // Performs the advanced search and returns the found tracks. { int tracksFound = 0; struct slRef *tracks = NULL; int numMetadataNonEmpty = 0; struct slPair *pair = mdbPairs; for (; pair!= NULL;pair=pair->next) { if (!isEmpty((char *)(pair->val))) numMetadataNonEmpty++; } if(!isEmpty(nameSearch) || typeSearch != NULL || descSearch != NULL || groupSearch != NULL || numMetadataNonEmpty) { // First do the metaDb searches, which can be done quickly for all tracks with db queries. struct hash *matchingTracks = NULL; if (numMetadataNonEmpty) { struct mdbObj *mdbObj, *mdbObjs = mdbObjRepeatedSearch(conn,mdbPairs,TRUE,FALSE); if (mdbObjs) { for (mdbObj = mdbObjs; mdbObj != NULL; mdbObj = mdbObj->next) { if (matchingTracks == NULL) matchingTracks = newHash(0); hashAddInt(matchingTracks, mdbObj->obj, 1); } mdbObjsFree(&mdbObjs); } if (matchingTracks == NULL) return NULL; } #ifdef SUPPORT_QUOTES_IN_NAME_SEARCH // Set the word lists up once struct slName *nameList = NULL; if (nameSearch) nameList = slNameListOfUniqueWords(cloneString(nameSearch),TRUE); // TRUE means respect quotes struct slName *descList = NULL; if (descSearch) descList = slNameListOfUniqueWords(cloneString(descSearch),TRUE); #endif///def SUPPORT_QUOTES_IN_NAME_SEARCH struct group *group; for (group = groupList; group != NULL; group = group->next) { if(groupSearch == NULL || sameString(group->name, groupSearch)) { if (group->trackList != NULL) { struct trackRef *tr; for (tr = group->trackList; tr != NULL; tr = tr->next) { struct track *track = tr->track; char *trackType = cloneFirstWord(track->tdb->type); // will be spilled #ifdef SUPPORT_QUOTES_IN_NAME_SEARCH if((isEmpty(nameSearch) || searchNameMatches(track->tdb, nameList)) && (isEmpty(descSearch) || searchDescriptionMatches(track->tdb, descList)) #else///ifndef SUPPORT_QUOTES_IN_NAME_SEARCH if((isEmpty(nameSearch) || isNameMatch(track, nameSearch, "contains")) && (isEmpty(descSearch) || isDescriptionMatch(track, descWords, descWordCount)) #endif///ndef SUPPORT_QUOTES_IN_NAME_SEARCH && (isEmpty(typeSearch) || (sameWord(typeSearch, trackType) && !tdbIsComposite(track->tdb))) && (matchingTracks == NULL || hashLookup(matchingTracks, track->track) != NULL)) { if (track != NULL) { tracksFound++; refAdd(&tracks, track); } else warn("found group track is NULL."); } if (track->subtracks != NULL) { struct track *subTrack; for (subTrack = track->subtracks; subTrack != NULL; subTrack = subTrack->next) { trackType = cloneFirstWord(subTrack->tdb->type); // will be spilled #ifdef SUPPORT_QUOTES_IN_NAME_SEARCH if((isEmpty(nameSearch) || searchNameMatches(subTrack->tdb, nameList)) && (isEmpty(descSearch) || searchDescriptionMatches(subTrack->tdb, descList)) #else///ifndef SUPPORT_QUOTES_IN_NAME_SEARCH if((isEmpty(nameSearch) || isNameMatch(subTrack, nameSearch, "contains")) && (isEmpty(descSearch) || isDescriptionMatch(subTrack, descWords, descWordCount)) #endif///ndef SUPPORT_QUOTES_IN_NAME_SEARCH && (isEmpty(typeSearch) || sameWord(typeSearch, trackType)) && (matchingTracks == NULL || hashLookup(matchingTracks, subTrack->track) != NULL)) { if (track != NULL) { tracksFound++; refAdd(&tracks, subTrack); } else warn("found subtrack is NULL."); } } } } } } } } return tracks; } #define MAX_FOUND_TRACKS 100 static void findTracksPageLinks(int tracksFound, int startFrom) { if (tracksFound <= MAX_FOUND_TRACKS) return; // Opener int willStartAt = 0; int curPage = (startFrom/MAX_FOUND_TRACKS) + 1; int endAt = startFrom+MAX_FOUND_TRACKS; if (endAt > tracksFound) endAt = tracksFound; hPrintf("Listing %d - %d of %d tracks   ",startFrom+1,endAt,tracksFound); // << and < if (startFrom >= MAX_FOUND_TRACKS) { hPrintf("« ", TRACK_SEARCH,TRACK_SEARCH_PAGER,TRACK_SEARCH_PAGER); willStartAt = startFrom - MAX_FOUND_TRACKS; hPrintf("  ", TRACK_SEARCH,TRACK_SEARCH_PAGER,willStartAt,TRACK_SEARCH_PAGER,willStartAt); } // page number links int lastPage = (tracksFound/MAX_FOUND_TRACKS); if ((tracksFound % MAX_FOUND_TRACKS) > 0) lastPage++; int thisPage = curPage - 3; // Window of 3 pages above and below if (thisPage < 1) thisPage = 1; for (;thisPage <= lastPage && thisPage <= curPage + 3; thisPage++) { if (thisPage != curPage) { willStartAt = ((thisPage - 1) * MAX_FOUND_TRACKS); endAt = willStartAt+ MAX_FOUND_TRACKS; if (endAt > tracksFound) endAt = tracksFound; hPrintf(" %d ", TRACK_SEARCH,TRACK_SEARCH_PAGER,willStartAt,thisPage,willStartAt+1,endAt,TRACK_SEARCH_PAGER,willStartAt,thisPage); } else hPrintf(" %d ",COLOR_DARKGREY,thisPage); } // > and >> if ((startFrom + MAX_FOUND_TRACKS) < tracksFound) { willStartAt = startFrom + MAX_FOUND_TRACKS; hPrintf("  ", TRACK_SEARCH,TRACK_SEARCH_PAGER,willStartAt,TRACK_SEARCH_PAGER,willStartAt); willStartAt = tracksFound - (tracksFound % MAX_FOUND_TRACKS); if (willStartAt == tracksFound) willStartAt -= MAX_FOUND_TRACKS; hPrintf(" »\n", TRACK_SEARCH,TRACK_SEARCH_PAGER,willStartAt,TRACK_SEARCH_PAGER,willStartAt); } } static void displayFoundTracks(struct cart *cart, struct slRef *tracks, int tracksFound,enum sortBy sortBy) // Routine for displaying found tracks { hPrintf(""); // This div allows the clear button to empty it } void doSearchTracks(struct group *groupList) { if (!advancedJavascriptFeaturesEnabled(cart)) { warn("Requires advanced javascript features."); return; } webIncludeResourceFile("ui.dropdownchecklist.css"); jsIncludeFile("ui.dropdownchecklist.js",NULL); // This line is needed to get the multi-selects initialized #ifdef NEW_JQUERY jsIncludeFile("ddcl.js",NULL); hPrintf("\n"); #else///ifndef NEW_JQUERY hPrintf("\n"); hPrintf("\n"); #endif///ndef NEW_JQUERY struct group *group; char *groups[128]; char *labels[128]; int numGroups = 1; groups[0] = ANYLABEL; labels[0] = ANYLABEL; char *nameSearch = cartOptionalString(cart, TRACK_SEARCH_ON_NAME); char *typeSearch = cartOptionalString(cart, TRACK_SEARCH_ON_TYPE); char *descSearch = NULL; char *groupSearch = cartOptionalString(cart, TRACK_SEARCH_ON_GROUP); boolean doSearch = sameString(cartOptionalString(cart, TRACK_SEARCH), "Search") || cartUsualInt(cart, TRACK_SEARCH_PAGER, -1) >= 0; struct sqlConnection *conn = hAllocConn(database); boolean metaDbExists = sqlTableExists(conn, "metaDb"); int tracksFound = 0; struct trix *trix; char trixFile[HDB_MAX_PATH_STRING]; char **descWords = NULL; int descWordCount = 0; boolean searchTermsExist = FALSE; int cols; char buf[512]; enum searchTab selectedTab = simpleTab; char *currentTab = cartUsualString(cart, TRACK_SEARCH_CURRENT_TAB, "simpleTab"); if(sameString(currentTab, "simpleTab")) { selectedTab = simpleTab; descSearch = cartOptionalString(cart, TRACK_SEARCH_SIMPLE); freez(&nameSearch); } else if(sameString(currentTab, "advancedTab")) { selectedTab = advancedTab; descSearch = cartOptionalString(cart, TRACK_SEARCH_ON_DESCR); } #ifdef SUPPORT_QUOTES_IN_NAME_SEARCH if(descSearch && selectedTab == simpleTab) // TODO: could support quotes in simple tab by detecting quotes and choosing to use doesNameMatch() || doesDescriptionMatch() stripChar(descSearch, '"'); #else///ifndef SUPPORT_QUOTES_IN_NAME_SEARCH if(descSearch) stripChar(descSearch, '"'); #endif///ndef SUPPORT_QUOTES_IN_NAME_SEARCH trackList = getTrackList(&groupList, -2); // global makeGlobalTrackHash(trackList); // NOTE: This is necessary when container cfg by '*' results in vis changes // This will handle composite/view override when subtrack specific vis exists, AND superTrack reshaping. parentChildCartCleanup(trackList,cart,oldVars); // Subtrack settings must be removed when composite/view settings are updated getSearchTrixFile(database, trixFile, sizeof(trixFile)); trix = trixOpen(trixFile); slSort(&groupList, gCmpGroup); for (group = groupList; group != NULL; group = group->next) { groupTrackListAddSuper(cart, group); if (group->trackList != NULL) { groups[numGroups] = cloneString(group->name); labels[numGroups] = cloneString(group->label); numGroups++; if (numGroups >= ArraySize(groups)) internalErr(); } } safef(buf, sizeof(buf),"Search for Tracks in the %s %s Assembly", organism, hFreezeFromDb(database)); webStartWrapperDetailedNoArgs(cart, database, "", buf, FALSE, FALSE, FALSE, FALSE); hPrintf("
"); hPrintf("
\n\n", hgTracksName(),TRACK_SEARCH_FORM,TRACK_SEARCH_FORM); cartSaveSession(cart); // Creates hidden var of hgsid to avoid bad voodoo safef(buf, sizeof(buf), "%lu", clock1()); cgiMakeHiddenVar("hgt_", buf); // timestamps page to avoid browser cache hPrintf("\n", database); hPrintf("\n", TRACK_SEARCH_CURRENT_TAB, currentTab); hPrintf("\n",TRACK_SEARCH_DEL_ROW); hPrintf("\n",TRACK_SEARCH_ADD_ROW); hPrintf("\n",TRACK_SEARCH_PAGER); hPrintf("\n"); hPrintf("
\n"); hPrintf("
"); // Restricts to max-width:1000px; cgiDown(0.8); if (measureTiming) measureTime("Rendered tabs"); if(descSearch != NULL && !strlen(descSearch)) descSearch = NULL; if(groupSearch != NULL && sameString(groupSearch, ANYLABEL)) groupSearch = NULL; if(typeSearch != NULL && sameString(typeSearch, ANYLABEL)) typeSearch = NULL; if(!isEmpty(descSearch)) { char *tmp = cloneString(descSearch); char *val = nextWord(&tmp); struct slName *el, *descList = NULL; int i; while (val != NULL) { slNameAddTail(&descList, val); descWordCount++; val = nextWord(&tmp); } descWords = needMem(sizeof(char *) * descWordCount); for(i = 0, el = descList; el != NULL; i++, el = el->next) descWords[i] = strLower(el->name); } if (doSearch && selectedTab==simpleTab && descWordCount <= 0) doSearch = FALSE; if(doSearch) { // Now search struct slRef *tracks = NULL; if(selectedTab==simpleTab) tracks = simpleSearchForTracksstruct(trix,descWords,descWordCount); else if(selectedTab==advancedTab) tracks = advancedSearchForTracks(conn,groupList,descWords,descWordCount,nameSearch,typeSearch,descSearch,groupSearch,mdbSelects); if (measureTiming) measureTime("Searched for tracks"); // Sort and Print results if(selectedTab!=filesTab) { enum sortBy sortBy = cartUsualInt(cart,TRACK_SEARCH_SORT,sbRelevance); tracksFound = slCount(tracks); if(tracksFound > 1) findTracksSort(&tracks,sortBy); displayFoundTracks(cart,tracks,tracksFound,sortBy); if (measureTiming) measureTime("Displayed found tracks"); } slPairFreeList(&mdbSelects); } hFreeConn(&conn); webNewSection("About Track Search"); if(metaDbExists) hPrintf("

Search for terms in track names, descriptions, groups, and ENCODE " "metadata. If multiple terms are entered, only tracks with all terms " "will be part of the results."); else hPrintf("

Search for terms in track descriptions, groups, and names. " "If multiple terms are entered, only tracks with all terms " "will be part of the results."); hPrintf("
more help

\n"); // NOTE: Could use ajax and dynamically popup the html file in a box: //hPrintf("
more help

\n"); // NOTE: OR by declaring a div and passing it to retrieveHtml, the html file could be embedded in the div. // However, this is not desired here because of the titles. //hPrintf("
more help

\n"); //hPrintf("
\n"); webEndSectionTables(); }