37cca719b0da877b05e1e45f63f511646e837423 larrym Thu Oct 21 15:44:00 2010 -0700 remove metaDbExists parameter from printMdbSelects call diff --git src/hg/hgTracks/searchTracks.c src/hg/hgTracks/searchTracks.c index 583bb5f..c9c50e7 100644 --- src/hg/hgTracks/searchTracks.c +++ src/hg/hgTracks/searchTracks.c @@ -1,927 +1,926 @@ /* Track search code used by hgTracks CGI */ #include "common.h" #include "searchTracks.h" #include "hCommon.h" #include "memalloc.h" #include "obscure.h" #include "dystring.h" #include "hash.h" #include "cheapcgi.h" #include "hPrint.h" #include "htmshell.h" #include "cart.h" #include "hgTracks.h" #include "web.h" #include "jksql.h" #include "hdb.h" #include "mdb.h" #include "trix.h" #include "jsHelper.h" #include "imageV2.h" static char const rcsid[] = "$Id: searchTracks.c,v 1.11 2010/06/11 18:21:40 larrym Exp $"; #define ANYLABEL "Any" #define METADATA_NAME_PREFIX "hgt.metadataName" #define METADATA_VALUE_PREFIX "hgt.metadataValue" static int gCmpGroup(const void *va, const void *vb) /* Compare groups based on label. */ { const struct group *a = *((struct group **)va); const struct group *b = *((struct group **)vb); return strcmp(a->label, b->label); } // Would like to do a radio button choice ofsorts #define SORT_BY_VAR "hgt_sortFound" enum sortBy { sbRelevance=0, sbAbc =1, sbHierarchy=2, }; static int gCmpTrackHierarchy(const void *va, const void *vb) /* Compare tracks based on longLabel. */ { const struct slRef *aa = *((struct slRef **)va); const struct slRef *bb = *((struct slRef **)vb); const struct track *a = ((struct track *) aa->val); const struct track *b = ((struct track *) bb->val); if ( tdbIsFolder(a->tdb) && !tdbIsFolder(b->tdb)) return -1; else if (!tdbIsFolder(a->tdb) && tdbIsFolder(b->tdb)) return 1; if ( tdbIsContainer(a->tdb) && !tdbIsContainer(b->tdb)) return -1; else if (!tdbIsContainer(a->tdb) && tdbIsContainer(b->tdb)) return 1; if (!tdbIsContainerChild(a->tdb) && tdbIsContainerChild(b->tdb)) return -1; else if ( tdbIsContainerChild(a->tdb) && !tdbIsContainerChild(b->tdb)) return 1; return strcasecmp(a->longLabel, b->longLabel); } static int gCmpTrack(const void *va, const void *vb) /* Compare tracks based on longLabel. */ { const struct slRef *aa = *((struct slRef **)va); const struct slRef *bb = *((struct slRef **)vb); const struct track *a = ((struct track *) aa->val); const struct track *b = ((struct track *) bb->val); return strcasecmp(a->longLabel, b->longLabel); } static void findTracksSort(struct slRef **pTrack, boolean simpleSearch, enum sortBy sortBy) { if (sortBy == sbHierarchy) slSort(pTrack, gCmpTrackHierarchy); else if (sortBy == sbAbc) slSort(pTrack, gCmpTrack); else slReverse(pTrack); } // XXXX make a matchString function to support "contains", "is" etc. and wildcards in contains // ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || static boolean isNameMatch(struct track *track, char *str, char *op) { return str && strlen(str) && ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || (sameString(op, "is") && !strcasecmp(track->longLabel, str)) || (sameString(op, "contains") && containsStringNoCase(track->shortLabel, str) != NULL) || (sameString(op, "contains") && containsStringNoCase(track->longLabel, str) != NULL)); } static boolean isDescriptionMatch(struct track *track, char **words, int wordCount) // We parse str and look for every word at the start of any word in track description (i.e. google style). { if(words) { // We do NOT lookup up parent hierarchy for html descriptions. char *html = track->tdb->html; if(!isEmpty(html)) { /* This probably could be made more efficient by parsing the html into some kind of b-tree, but I am assuming that the inner html loop while only happen for 1-2 words for vast majority of the tracks. */ int i, numMatches = 0; html = stripRegEx(html, "<[^>]*>", REG_ICASE); for(i = 0; i < wordCount; i++) { char *needle = words[i]; char *haystack, *tmp = cloneString(html); boolean found = FALSE; while((haystack = nextWord(&tmp))) { char *ptr = strstrNoCase(haystack, needle); if(ptr != NULL && ptr == haystack) { found = TRUE; break; } } if(found) numMatches++; else break; } if(numMatches == wordCount) return TRUE; } } return FALSE; } static int getTermArray(struct sqlConnection *conn, char ***pLabels, char ***pTerms, char *type) // Pull out all term fields from ra entries with given type // Returns count of items found and items via the terms argument. { int ix = 0, count = 0; char **labels; char **values; struct slPair *pairs = mdbValLabelSearch(conn, type, MDB_VAL_STD_TRUNCATION, TRUE, FALSE); // Tables not files count = slCount(pairs) + 1; // make room for "Any" AllocArray(labels, count); AllocArray(values, count); labels[ix] = cloneString(ANYLABEL); values[ix] = cloneString(ANYLABEL); struct slPair *pair = NULL; while((pair = slPopHead(&pairs)) != NULL) { ix++; labels[ix] = pair->name; values[ix] = pair->val; freeMem(pair); } *pLabels = labels; *pTerms = values; return count; } static int metaDbVars(struct sqlConnection *conn, char *** metaVars, char *** metaLabels) // Search the assemblies metaDb table; If name == NULL, we search every metadata field. { char query[256]; #define WHITE_LIST_COUNT 35 #ifdef WHITE_LIST_COUNT #define WHITE_LIST_VAR 0 #define WHITE_LIST_LABEL 1 char *whiteList[WHITE_LIST_COUNT][2] = { {"age", "Age of experimental organism"}, {"antibody", "Antibody or target protein"}, {"origAssembly", "Assembly originally mapped to"}, {"cell", "Cell, tissue or DNA sample"}, {"localization", "Cell compartment"}, {"control", "Control or Input for ChIPseq"}, //{"controlId", "ControlId - explicit relationship"}, {"dataType", "Experiment type"}, {"dataVersion", "ENCODE release"}, //{"fragLength", "Fragment Length for ChIPseq"}, //{"freezeDate", "Gencode freeze date"}, //{"level", "Gencode level"}, //{"annotation", "Gencode annotation"}, {"geoSample", "GEO accession"}, {"growthProtocol", "Growth Protocol"}, {"lab", "Lab producing data"}, {"labVersion", "Lab specific details"}, {"labExpId", "Lab specific identifier"}, {"softwareVersion", "Lab specific informatics"}, {"protocol", "Library Protocol"}, {"mapAlgorithm", "Mapping algorithm"}, {"readType", "Paired/Single reads lengths"}, {"grant", "Principal Investigator"}, {"replicate", "Replicate number"}, //{"restrictionEnzyme","Restriction Enzyme used"}, //{"ripAntibody", "RIP Antibody"}, //{"ripTgtProtein", "RIP Target Protein"}, {"rnaExtract", "RNA Extract"}, {"seqPlatform", "Sequencing Platform"}, {"setType", "Experiment or Input"}, {"sex", "Sex of organism"}, {"strain", "Strain of organism"}, {"subId", "Submission Id"}, {"treatment", "Treatment"}, {"view", "View - Peaks or Signals"}, }; // FIXME: The whitelist should be a table or ra // FIXME: The whitelist should be in list order // FIXME: Should read in list, then verify that an mdb val exists. char **retVar = needMem(sizeof(char *) * WHITE_LIST_COUNT); char **retLab = needMem(sizeof(char *) * WHITE_LIST_COUNT); int ix,count; for(ix=0,count=0;ix 0) { retVar[count] = whiteList[ix][WHITE_LIST_VAR]; retLab[count] = whiteList[ix][WHITE_LIST_LABEL]; count++; } } if(count == 0) { freez(&retVar); freez(&retLab); } *metaVars = retVar; *metaLabels = retLab; return count; #else///ifndef WHITE_LIST_COUNT char **retVar; char **retLab; struct slName *el, *varList = NULL; struct sqlResult *sr = NULL; char **row = NULL; safef(query, sizeof(query), "select distinct var from metaDb order by var"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) slNameAddHead(&varList, row[0]); sqlFreeResult(&sr); retVar = needMem(sizeof(char *) * slCount(varList)); retLab = needMem(sizeof(char *) * slCount(varList)); slReverse(&varList); //slNameSort(&varList); int count = 0; for (el = varList; el != NULL; el = el->next) { retVar[count] = el->name; retLab[count] = el->name; count++; } *metaVars = retVar; *whiteLabels = retLab; return count; #endif///ndef WHITE_LIST_COUNT } -static int printMdbSelects(struct sqlConnection *conn,struct cart *cart,boolean metaDbExists,boolean simpleSearch,char ***pMdbVar,char ***pMdbVal,int *numMetadataNonEmpty,int cols) +static int printMdbSelects(struct sqlConnection *conn,struct cart *cart,boolean simpleSearch,char ***pMdbVar,char ***pMdbVal,int *numMetadataNonEmpty,int cols) // Prints a table of mdb selects if appropriate and returns number of them { // figure out how many metadata selects are visible. int delSearchSelect = cartUsualInt(cart, "hgt.delRow", 0); // 1-based row to delete int addSearchSelect = cartUsualInt(cart, "hgt.addRow", 0); // 1-based row to insert after int numMetadataSelects = 0; char **mdbVar = NULL; char **mdbVal = NULL; +int i, count; +char **mdbVars = NULL; +char **mdbVarLabels = NULL; for(;;) { char buf[256]; safef(buf, sizeof(buf), "%s%d", METADATA_NAME_PREFIX, numMetadataSelects + 1); char *str = cartOptionalString(cart, buf); if(isEmpty(str)) break; else numMetadataSelects++; } if(delSearchSelect) numMetadataSelects--; if(addSearchSelect) numMetadataSelects++; if(numMetadataSelects) { mdbVar = needMem(sizeof(char *) * numMetadataSelects); mdbVal = needMem(sizeof(char *) * numMetadataSelects); *pMdbVar = mdbVar; *pMdbVal = mdbVal; int i; for(i = 0; i < numMetadataSelects; i++) { char buf[256]; int offset; // used to handle additions/deletions if(addSearchSelect > 0 && i >= addSearchSelect) offset = 0; // do nothing to offset (i.e. copy data from previous row) else if(delSearchSelect > 0 && i + 1 >= delSearchSelect) offset = 2; else offset = 1; safef(buf, sizeof(buf), "%s%d", METADATA_NAME_PREFIX, i + offset); mdbVar[i] = cloneString(cartOptionalString(cart, buf)); + // XXXX we need to make sure mdbVar[i] is valid in this assembly if(!simpleSearch) { safef(buf, sizeof(buf), "%s%d", METADATA_VALUE_PREFIX, i + offset); mdbVal[i] = cloneString(cartOptionalString(cart, buf)); if(sameString(mdbVal[i], ANYLABEL)) mdbVal[i] = NULL; if(!isEmpty(mdbVal[i])) (*numMetadataNonEmpty)++; } } if(delSearchSelect > 0) { char buf[255]; safef(buf, sizeof(buf), "%s%d", METADATA_NAME_PREFIX, numMetadataSelects + 1); cartRemove(cart, buf); safef(buf, sizeof(buf), "%s%d", METADATA_VALUE_PREFIX, numMetadataSelects + 1); cartRemove(cart, buf); } } else { // create defaults numMetadataSelects = 2; mdbVar = needMem(sizeof(char *) * numMetadataSelects); mdbVal = needMem(sizeof(char *) * numMetadataSelects); mdbVar[0] = "cell"; mdbVar[1] = "antibody"; mdbVal[0] = ANYLABEL; mdbVal[1] = ANYLABEL; } -if(metaDbExists) - { - int i; - char **mdbVars = NULL; - char **mdbVarLabels = NULL; - int count = metaDbVars(conn, &mdbVars,&mdbVarLabels); +count = metaDbVars(conn, &mdbVars,&mdbVarLabels); hPrintf("ENCODE terms", cols,COLOR_DARKGREY); for(i = 0; i < numMetadataSelects; i++) { char **terms = NULL, **labels = NULL; char buf[256]; int len; #define PRINT_BUTTON(name,value,msg,js) printf("", (name), (value), (msg), (js)); hPrintf("\n"); if(numMetadataSelects > 2 || i >= 2) { safef(buf, sizeof(buf), "return delSearchSelect(this, %d);", i + 1); PRINT_BUTTON(searchTracks, "-", "delete this row", buf); } else hPrintf(" "); hPrintf("\n"); safef(buf, sizeof(buf), "return addSearchSelect(this, %d);", i + 1); PRINT_BUTTON(searchTracks, "+", "add another row after this row", buf); hPrintf("and \n"); safef(buf, sizeof(buf), "%s%i", METADATA_NAME_PREFIX, i + 1); cgiDropDownWithTextValsAndExtra(buf, mdbVarLabels, mdbVars,count,mdbVar[i],"class='mdbVar' onchange='findTracksMdbVarChanged(this);'"); hPrintf("is\n"); len = getTermArray(conn, &labels, &terms, mdbVar[i]); safef(buf, sizeof(buf), "%s%i", METADATA_VALUE_PREFIX, i + 1); cgiMakeDropListFull(buf, labels, terms, len, mdbVal[i], "class='mdbVal' style='min-width:200px;' onchange='findTracksSearchButtonsEnable(true);'"); hPrintf("help\n", i + 1); hPrintf("\n"); } - } + hPrintf(" ", cols); //hPrintf(" ", cols); return numMetadataSelects; } static struct slRef *simpleSearchForTracksstruct(struct trix *trix,char **descWords,int descWordCount) // Performs the simple search and returns the found tracks. { struct slRef *tracks = NULL; struct trixSearchResult *tsList; for(tsList = trixSearch(trix, descWordCount, descWords, TRUE); tsList != NULL; tsList = tsList->next) { struct track *track = (struct track *) hashFindVal(trackHash, tsList->itemId); if (track != NULL) // It is expected that this is NULL (e.g. when the trix references trackDb tracks which have no tables) { refAdd(&tracks, track); } } return tracks; } static struct slRef *advancedSearchForTracks(struct sqlConnection *conn,struct group *groupList, char **descWords,int descWordCount, char *nameSearch, char *descSearch, char *groupSearch, int numMetadataNonEmpty,int numMetadataSelects,char **mdbVar,char **mdbVal) // Performs the advanced search and returns the found tracks. { int tracksFound = 0; struct slRef *tracks = NULL; if(!isEmpty(nameSearch) || descSearch != NULL || groupSearch != NULL || numMetadataNonEmpty) { // First do the metaDb searches, which can be done quickly for all tracks with db queries. struct hash *matchingTracks = newHash(0); struct slName *el, *metaTracks = NULL; int i; for(i = 0; i < numMetadataSelects; i++) { if(!isEmpty(mdbVal[i])) { struct slName *tmp = mdbObjSearch(conn, mdbVar[i], mdbVal[i], "is", MDB_VAL_STD_TRUNCATION, TRUE, FALSE); if(metaTracks == NULL) metaTracks = tmp; else metaTracks = slNameIntersection(metaTracks, tmp); } } for (el = metaTracks; el != NULL; el = el->next) hashAddInt(matchingTracks, el->name, 1); struct group *group; for (group = groupList; group != NULL; group = group->next) { if(groupSearch == NULL || sameString(group->name, groupSearch)) { if (group->trackList != NULL) { struct trackRef *tr; for (tr = group->trackList; tr != NULL; tr = tr->next) { struct track *track = tr->track; if((isEmpty(nameSearch) || isNameMatch(track, nameSearch, "contains")) && (isEmpty(descSearch) || isDescriptionMatch(track, descWords, descWordCount)) && (!numMetadataNonEmpty || hashLookup(matchingTracks, track->track) != NULL)) { if (track != NULL) { tracksFound++; refAdd(&tracks, track); } else warn("found group track is NULL."); } if (track->subtracks != NULL) { struct track *subTrack; for (subTrack = track->subtracks; subTrack != NULL; subTrack = subTrack->next) { if((isEmpty(nameSearch) || isNameMatch(subTrack, nameSearch, "contains")) && (isEmpty(descSearch) || isDescriptionMatch(subTrack, descWords, descWordCount)) && (!numMetadataNonEmpty || hashLookup(matchingTracks, subTrack->track) != NULL)) { // XXXX to parent hash. - use tdb->parent instead. //hashAdd(parents, subTrack->track, track); if (track != NULL) { tracksFound++; refAdd(&tracks, subTrack); } else warn("found subtrack is NULL."); } } } } } } } } return tracks; } #define MAX_FOUND_TRACKS 100 #define FOUND_TRACKS_PAGING "hgt_startFrom" static void findTracksPageLinks(int tracksFound, int startFrom) { if (tracksFound <= MAX_FOUND_TRACKS) return; // Opener int curPage = (startFrom/MAX_FOUND_TRACKS) + 1; int endAt = startFrom+MAX_FOUND_TRACKS; if (endAt > tracksFound) endAt = tracksFound; hPrintf("Listing %d - %d of %d tracks   ",startFrom+1,endAt,tracksFound); // << and < hPrintf("<< ",FOUND_TRACKS_PAGING,0); if (startFrom >= MAX_FOUND_TRACKS) hPrintf(" < ",FOUND_TRACKS_PAGING,startFrom - MAX_FOUND_TRACKS); else hPrintf(" < "); // page number links int lastPage = (tracksFound/MAX_FOUND_TRACKS); if ((tracksFound % MAX_FOUND_TRACKS) > 0) lastPage++; int thisPage = curPage - 3; // Window of 3 pages above and below if (thisPage < 1) thisPage = 1; for (;thisPage <= lastPage && thisPage <= curPage + 3; thisPage++) { if (thisPage != curPage) { int willStartAt = ((thisPage - 1) * MAX_FOUND_TRACKS); endAt = willStartAt+ MAX_FOUND_TRACKS; if (endAt > tracksFound) endAt = tracksFound; hPrintf(" %d ", thisPage,willStartAt+1,endAt,FOUND_TRACKS_PAGING,((thisPage - 1) * MAX_FOUND_TRACKS),thisPage); } else hPrintf(" %d ",thisPage); } // > and >> if ((startFrom + MAX_FOUND_TRACKS) < tracksFound) hPrintf(" > ",FOUND_TRACKS_PAGING,(startFrom + MAX_FOUND_TRACKS)); else hPrintf(" > "); thisPage = tracksFound - (tracksFound % MAX_FOUND_TRACKS); if (thisPage == tracksFound) thisPage -= MAX_FOUND_TRACKS; hPrintf(" >>\n",FOUND_TRACKS_PAGING,thisPage); } static void displayFoundTracks(struct cart *cart, struct slRef *tracks, int tracksFound,enum sortBy sortBy) // Routine for displaying found tracks { hPrintf(""); // This div allows the clear button to empty it } void doSearchTracks(struct group *groupList) { struct group *group; char *groups[128]; char *labels[128]; int numGroups = 1; groups[0] = ANYLABEL; labels[0] = ANYLABEL; char *currentTab = cartUsualString(cart, "hgt.currentSearchTab", "simpleTab"); char *nameSearch = cartOptionalString(cart, "hgt.nameSearch"); char *descSearch; char *groupSearch = cartOptionalString(cart, "hgt.groupSearch"); boolean doSearch = sameString(cartOptionalString(cart, searchTracks), "Search") || cartUsualInt(cart, "hgt.forceSearch", 0) == 1; struct sqlConnection *conn = hAllocConn(database); boolean metaDbExists = sqlTableExists(conn, "metaDb"); int numMetadataSelects, tracksFound = 0; int numMetadataNonEmpty = 0; char **mdbVar = NULL; char **mdbVal = NULL; #ifdef ONE_FUNC struct hash *parents = newHash(4); #endif///def ONE_FUNC boolean simpleSearch; struct trix *trix; char trixFile[HDB_MAX_PATH_STRING]; char **descWords = NULL; int descWordCount = 0; boolean searchTermsExist = FALSE; int cols; char buf[512]; if(sameString(currentTab, "simpleTab")) { descSearch = cartOptionalString(cart, "hgt.simpleSearch"); simpleSearch = TRUE; freez(&nameSearch); freez(&groupSearch); } else { descSearch = cartOptionalString(cart, "hgt.descSearch"); simpleSearch = FALSE; } trackList = getTrackList(&groupList, -2); // global makeGlobalTrackHash(trackList); // NOTE: This is necessary when container cfg by '*' results in vis changes // This will handle composite/view override when subtrack specific vis exists, AND superTrack reshaping. #ifdef SUBTRACKS_HAVE_VIS parentChildCartCleanup(trackList,cart,oldVars); // Subtrack settings must be removed when composite/view settings are updated #endif///def SUBTRACKS_HAVE_VIS getSearchTrixFile(database, trixFile, sizeof(trixFile)); trix = trixOpen(trixFile); slSort(&groupList, gCmpGroup); for (group = groupList; group != NULL; group = group->next) { groupTrackListAddSuper(cart, group); if (group->trackList != NULL) { groups[numGroups] = cloneString(group->name); labels[numGroups] = cloneString(group->label); numGroups++; if (numGroups >= ArraySize(groups)) internalErr(); } } safef(buf, sizeof(buf),"Search for Tracks in the %s %s Assembly", organism, hFreezeFromDb(database)); webStartWrapperDetailedNoArgs(cart, database, "", buf, FALSE, FALSE, FALSE, FALSE); hPrintf("
"); hPrintf("
\n\n", hgTracksName()); cartSaveSession(cart); // Creates hidden var of hgsid to avoid bad voodoo safef(buf, sizeof(buf), "%lu", clock1()); cgiMakeHiddenVar("hgt_", buf); // timestamps page to avoid browser cache hPrintf("\n", database); hPrintf("\n", currentTab); hPrintf("\n"); hPrintf("\n"); hPrintf("\n"); hPrintf("\n"); hPrintf("
\n"); hPrintf("
"); // Restricts to max-width:1000px; if(descSearch != NULL && !strlen(descSearch)) descSearch = NULL; if(groupSearch != NULL && sameString(groupSearch, ANYLABEL)) groupSearch = NULL; if(!isEmpty(descSearch)) { char *tmp = cloneString(descSearch); char *val = nextWord(&tmp); struct slName *el, *descList = NULL; int i; while (val != NULL) { slNameAddTail(&descList, val); descWordCount++; val = nextWord(&tmp); } descWords = needMem(sizeof(char *) * descWordCount); for(i = 0, el = descList; el != NULL; i++, el = el->next) descWords[i] = strLower(el->name); } if (doSearch && simpleSearch && descWordCount <= 0) doSearch = FALSE; if(doSearch) { // Now search struct slRef *tracks = NULL; if(simpleSearch) tracks = simpleSearchForTracksstruct(trix,descWords,descWordCount); else tracks = advancedSearchForTracks(conn,groupList,descWords,descWordCount,nameSearch,descSearch,groupSearch,numMetadataNonEmpty,numMetadataSelects,mdbVar,mdbVal); // Sort and Print results enum sortBy sortBy = cartUsualInt(cart,SORT_BY_VAR,sbRelevance); tracksFound = slCount(tracks); if(tracksFound > 1) findTracksSort(&tracks,simpleSearch,sortBy); displayFoundTracks(cart,tracks,tracksFound,sortBy); } hFreeConn(&conn); webNewSection("About Track Search"); if(metaDbExists) hPrintf("

Search for terms in track descriptions, groups, names, and ENCODE " "metadata. If multiple terms are entered, only tracks with all terms " "will be part of the results."); else hPrintf("

Search for terms in track descriptions, groups, and names. " "If multiple terms are entered, only tracks with all terms " "will be part of the results."); hPrintf("
more help

\n"); webEndSectionTables(); }