f2c11911e71a48636cd19a36231f7b60b43a2764 tdreszer Wed Sep 22 11:38:29 2010 -0700 Bug in js caused many more cart vars to be set than desired. Also made view in browser button reflect whether changes have been made, rather than whether tracks are checked. diff --git src/hg/hgTracks/searchTracks.c src/hg/hgTracks/searchTracks.c index 4cde524..e5f6904 100644 --- src/hg/hgTracks/searchTracks.c +++ src/hg/hgTracks/searchTracks.c @@ -1,833 +1,833 @@ /* Track search code used by hgTracks CGI */ #include "common.h" #include "searchTracks.h" #include "hCommon.h" #include "memalloc.h" #include "obscure.h" #include "dystring.h" #include "hash.h" #include "cheapcgi.h" #include "hPrint.h" #include "htmshell.h" #include "cart.h" #include "hgTracks.h" #include "web.h" #include "jksql.h" #include "hdb.h" #include "trix.h" #include "jsHelper.h" #include "imageV2.h" static char const rcsid[] = "$Id: searchTracks.c,v 1.11 2010/06/11 18:21:40 larrym Exp $"; #define ANYLABEL "Any" #define METADATA_NAME_PREFIX "hgt.metadataName" #define METADATA_VALUE_PREFIX "hgt.metadataValue" static int gCmpGroup(const void *va, const void *vb) /* Compare groups based on label. */ { const struct group *a = *((struct group **)va); const struct group *b = *((struct group **)vb); return strcmp(a->label, b->label); } // Would like to do a radio button choice ofsorts #define SORT_BY_HIERARCHY #ifdef SORT_BY_HIERARCHY #define SORT_BY_VAR "findTracksSortBy" #define SORT_BY_ABC "abc" #define SORT_BY_HIER "hier" static int gCmpTrackHierarchy(const void *va, const void *vb) /* Compare tracks based on longLabel. */ { const struct slRef *aa = *((struct slRef **)va); const struct slRef *bb = *((struct slRef **)vb); const struct track *a = ((struct track *) aa->val); const struct track *b = ((struct track *) bb->val); if ( tdbIsSuperTrack(a->tdb) && !tdbIsSuperTrack(b->tdb)) return -1; else if (!tdbIsSuperTrack(a->tdb) && tdbIsSuperTrack(b->tdb)) return 1; if ( tdbIsComposite(a->tdb) && !tdbIsComposite(b->tdb)) return -1; else if (!tdbIsComposite(a->tdb) && tdbIsComposite(b->tdb)) return 1; if (!tdbIsCompositeChild(a->tdb) && tdbIsCompositeChild(b->tdb)) return -1; else if ( tdbIsCompositeChild(a->tdb) && !tdbIsCompositeChild(b->tdb)) return 1; return strcasecmp(a->longLabel, b->longLabel); } #endif///def SORT_BY_HIERARCHY static int gCmpTrack(const void *va, const void *vb) /* Compare tracks based on longLabel. */ { const struct slRef *aa = *((struct slRef **)va); const struct slRef *bb = *((struct slRef **)vb); const struct track *a = ((struct track *) aa->val); const struct track *b = ((struct track *) bb->val); return strcasecmp(a->longLabel, b->longLabel); } // XXXX make a matchString function to support "contains", "is" etc. and wildcards in contains // ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || static boolean isNameMatch(struct track *track, char *str, char *op) { return str && strlen(str) && ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || (sameString(op, "is") && !strcasecmp(track->longLabel, str)) || (sameString(op, "contains") && containsStringNoCase(track->shortLabel, str) != NULL) || (sameString(op, "contains") && containsStringNoCase(track->longLabel, str) != NULL)); } static boolean isDescriptionMatch(struct track *track, char **words, int wordCount) // We parse str and look for every word at the start of any word in track description (i.e. google style). { if(words) { // We do NOT lookup up parent hierarchy for html descriptions. char *html = track->tdb->html; if(!isEmpty(html)) { /* This probably could be made more efficient by parsing the html into some kind of b-tree, but I am assuming that the inner html loop while only happen for 1-2 words for vast majority of the tracks. */ int i, numMatches = 0; html = stripRegEx(html, "<[^>]*>", REG_ICASE); for(i = 0; i < wordCount; i++) { char *needle = words[i]; char *haystack, *tmp = cloneString(html); boolean found = FALSE; while((haystack = nextWord(&tmp))) { char *ptr = strstrNoCase(haystack, needle); if(ptr != NULL && ptr == haystack) { found = TRUE; break; } } if(found) numMatches++; else break; } if(numMatches == wordCount) return TRUE; } } return FALSE; } static int getTermArray(struct sqlConnection *conn, char ***terms, char *type) // Pull out all term fields from ra entries with given type // Returns count of items found and items via the terms argument. { struct sqlResult *sr = NULL; char **row = NULL; char query[256]; struct slName *termList = NULL; int i, count = 0; char **retVal; safef(query, sizeof(query), "select distinct val from metaDb where var = '%s'", type); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { slNameAddHead(&termList, row[0]); count++; } sqlFreeResult(&sr); slSort(&termList, slNameCmpCase); count++; // make room for "Any" AllocArray(retVal, count); retVal[0] = cloneString(ANYLABEL); for(i = 1; termList != NULL;termList = termList->next, i++) { retVal[i] = cloneString(termList->name); } *terms = retVal; return count; } static struct slName *metaDbSearch(struct sqlConnection *conn, char *name, char *val, char *op) // Search the assembly's metaDb table for var; If name == NULL, we search every metadata field. // Search is via mysql, so it's case-insensitive. { char query[256]; char *prefix = "select distinct obj from metaDb"; if(sameString(op, "contains")) if(name == NULL) safef(query, sizeof(query), "%s where val like '%%%s%%'", prefix, val); else safef(query, sizeof(query), "%s where var = '%s' and val like '%%%s%%'", prefix, name, val); else if(name == NULL) safef(query, sizeof(query), "%s where val = '%s'", prefix, val); else safef(query, sizeof(query), "%s where var = '%s' and val = '%s'", prefix, name, val); return sqlQuickList(conn, query); } static int metaDbVars(struct sqlConnection *conn, char *** metaVars, char *** metaLabels) // Search the assemblies metaDb table; If name == NULL, we search every metadata field. { char query[256]; #define WHITE_LIST_COUNT 35 #ifdef WHITE_LIST_COUNT #define WHITE_LIST_VAR 0 #define WHITE_LIST_LABEL 1 char *whiteList[WHITE_LIST_COUNT][2] = { {"age", "Age of experimental organism"}, {"antibody", "Antibody or target protein"}, {"origAssembly", "Assembly originally mapped to"}, {"cell", "Cell, tissue or DNA sample"}, {"localization", "Cell compartment"}, {"control", "Control or Input for ChIPseq"}, //{"controlId", "ControlId - explicit relationship"}, {"dataType", "Experiment type"}, {"dataVersion", "ENCODE release"}, //{"fragLength", "Fragment Length for ChIPseq"}, //{"freezeDate", "Gencode freeze date"}, //{"level", "Gencode level"}, //{"annotation", "Gencode annotation"}, {"accession", "GEO accession"}, {"growthProtocol", "Growth Protocol"}, {"lab", "Lab producing data"}, {"labVersion", "Lab specific details"}, {"labExpId", "Lab specific identifier"}, {"softwareVersion", "Lab specific informatics"}, {"protocol", "Library Protocol"}, {"mapAlgorithm", "Mapping algorithm"}, {"readType", "Paired/Single reads lengths"}, {"grant", "Prinipal Investigator"}, {"replicate", "Replicate number"}, //{"restrictionEnzyme","Restriction Enzyme used"}, //{"ripAntibody", "RIP Antibody"}, //{"ripTgtProtein", "RIP Target Protein"}, {"rnaExtract", "RNA Extract"}, {"seqPlatform", "Sequencing Platform"}, {"setType", "Experiment or Input"}, {"sex", "Sex of organism"}, {"strain", "Strain of organism"}, {"subId", "Submission Id"}, {"treatment", "Treatment"}, {"view", "View - Peaks or Signals"}, }; // FIXME: The whitelist should be a table or ra // FIXME: The whitelist should be in list order // FIXME: Should read in list, then verify that an mdb val exists. char **retVar = needMem(sizeof(char *) * WHITE_LIST_COUNT); char **retLab = needMem(sizeof(char *) * WHITE_LIST_COUNT); int ix,count; for(ix=0,count=0;ix 0) { retVar[count] = whiteList[ix][WHITE_LIST_VAR]; retLab[count] = whiteList[ix][WHITE_LIST_LABEL]; count++; } } if(count == 0) { freez(&retVar); freez(&retLab); } *metaVars = retVar; *metaLabels = retLab; return count; #else///ifndef WHITE_LIST_COUNT char **retVar; char **retLab; struct slName *el, *varList = NULL; struct sqlResult *sr = NULL; char **row = NULL; safef(query, sizeof(query), "select distinct var from metaDb order by var"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) slNameAddHead(&varList, row[0]); sqlFreeResult(&sr); retVar = needMem(sizeof(char *) * slCount(varList)); retLab = needMem(sizeof(char *) * slCount(varList)); slReverse(&varList); //slNameSort(&varList); int count = 0; for (el = varList; el != NULL; el = el->next) { retVar[count] = el->name; retLab[count] = el->name; count++; } *metaVars = retVar; *whiteLabels = retLab; return count; #endif///ndef WHITE_LIST_COUNT } void doSearchTracks(struct group *groupList) { struct group *group; char *groups[128]; char *labels[128]; int numGroups = 1; groups[0] = ANYLABEL; labels[0] = ANYLABEL; char *currentTab = cartUsualString(cart, "hgt.currentSearchTab", "simpleTab"); char *nameSearch = cartOptionalString(cart, "hgt.nameSearch"); char *descSearch; char *groupSearch = cartOptionalString(cart, "hgt.groupSearch"); boolean doSearch = sameString(cartOptionalString(cart, searchTracks), "Search") || cartUsualInt(cart, "hgt.forceSearch", 0) == 1; struct sqlConnection *conn = hAllocConn(database); boolean metaDbExists = sqlTableExists(conn, "metaDb"); struct slRef *tracks = NULL; int numMetadataSelects, tracksFound = 0; int numMetadataNonEmpty = 0; char **mdbVar; char **mdbVal; struct hash *parents = newHash(4); boolean simpleSearch; struct trix *trix; char trixFile[HDB_MAX_PATH_STRING]; char **descWords = NULL; int descWordCount = 0; boolean searchTermsExist = FALSE; if(sameString(currentTab, "simpleTab")) { descSearch = cartOptionalString(cart, "hgt.simpleSearch"); simpleSearch = TRUE; freez(&nameSearch); freez(&groupSearch); } else { descSearch = cartOptionalString(cart, "hgt.descSearch"); simpleSearch = FALSE; } getSearchTrixFile(database, trixFile, sizeof(trixFile)); trix = trixOpen(trixFile); getTrackList(&groupList, -2); slSort(&groupList, gCmpGroup); for (group = groupList; group != NULL; group = group->next) { if (group->trackList != NULL) { groups[numGroups] = cloneString(group->name); labels[numGroups] = cloneString(group->label); numGroups++; if (numGroups >= ArraySize(groups)) internalErr(); } } webStartWrapperDetailedNoArgs(cart, database, "", "Search for Tracks", FALSE, FALSE, FALSE, FALSE); hPrintf("
"); hPrintf("
\n\n", hgTracksName()); cartSaveSession(cart); // Creates hidden var of hgsid to avoid bad voodoo hPrintf("\n", database); hPrintf("\n", currentTab); hPrintf("\n"); hPrintf("\n"); hPrintf("\n"); hPrintf("\n"); hPrintf("
\n"); hPrintf("next) descWords[i] = strLower(el->name); } #ifdef SORT_BY_HIERARCHY boolean sortByHierarchy = sameString(cartUsualString(cart,SORT_BY_VAR,SORT_BY_HIER),SORT_BY_HIER); #endif///def SORT_BY_HIERARCHY if(doSearch) { if(simpleSearch) { struct trixSearchResult *tsList; struct hash *trackHash = newHash(0); // Create a hash of tracks, so we can map the track name into a track struct. for (group = groupList; group != NULL; group = group->next) { struct trackRef *tr; for (tr = group->trackList; tr != NULL; tr = tr->next) { struct track *track = tr->track; hashAdd(trackHash, track->track, track); struct track *subTrack = track->subtracks; for (subTrack = track->subtracks; subTrack != NULL; subTrack = subTrack->next) hashAdd(trackHash, subTrack->track, subTrack); } } for(tsList = trixSearch(trix, descWordCount, descWords, TRUE); tsList != NULL; tsList = tsList->next) { struct track *track = (struct track *) hashFindVal(trackHash, tsList->itemId); if (track != NULL) { refAdd(&tracks, track); tracksFound++; } //else // FIXME: Should get to the bottom of why some of these are null // warn("found trix track is NULL."); } #ifdef SORT_BY_HIERARCHY slSort(&tracks, sortByHierarchy? gCmpTrackHierarchy:gCmpTrack); #else///ifndef SORT_BY_HIERARCHY slReverse(&tracks); #endif///ndef SORT_BY_HIERARCHY } else if(!isEmpty(nameSearch) || descSearch != NULL || groupSearch != NULL || numMetadataNonEmpty) { // First do the metaDb searches, which can be done quickly for all tracks with db queries. struct hash *matchingTracks = newHash(0); struct hash *trackMetadata = newHash(0); struct slName *el, *metaTracks = NULL; int i; for(i = 0; i < numMetadataSelects; i++) { if(!isEmpty(mdbVal[i])) { struct slName *tmp = metaDbSearch(conn, mdbVar[i], mdbVal[i], "is"); if(metaTracks == NULL) metaTracks = tmp; else metaTracks = slNameIntersection(metaTracks, tmp); } } for (el = metaTracks; el != NULL; el = el->next) hashAddInt(matchingTracks, el->name, 1); if(metaDbExists && !isEmpty(descSearch)) { // Load all metadata words for each track to facilitate metadata search. char query[256]; struct sqlResult *sr = NULL; char **row; safef(query, sizeof(query), "select obj, val from metaDb"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *str = cloneString(row[1]); hashAdd(trackMetadata, row[0], str); } sqlFreeResult(&sr); } for (group = groupList; group != NULL; group = group->next) { if(groupSearch == NULL || sameString(group->name, groupSearch)) { if (group->trackList != NULL) { struct trackRef *tr; for (tr = group->trackList; tr != NULL; tr = tr->next) { struct track *track = tr->track; if((isEmpty(nameSearch) || isNameMatch(track, nameSearch, "contains")) && (isEmpty(descSearch) || isDescriptionMatch(track, descWords, descWordCount)) && (!numMetadataNonEmpty || hashLookup(matchingTracks, track->track) != NULL)) { if (track != NULL) { tracksFound++; refAdd(&tracks, track); } else warn("found group track is NULL."); } if (track->subtracks != NULL) { struct track *subTrack; for (subTrack = track->subtracks; subTrack != NULL; subTrack = subTrack->next) { if((isEmpty(nameSearch) || isNameMatch(subTrack, nameSearch, "contains")) && (isEmpty(descSearch) || isDescriptionMatch(subTrack, descWords, descWordCount)) && (!numMetadataNonEmpty || hashLookup(matchingTracks, subTrack->track) != NULL)) { // XXXX to parent hash. - use tdb->parent instead. hashAdd(parents, subTrack->track, track); if (track != NULL) { tracksFound++; refAdd(&tracks, subTrack); } else warn("found subtrack is NULL."); } } } } } } } #ifdef SORT_BY_HIERARCHY slSort(&tracks, sortByHierarchy? gCmpTrackHierarchy:gCmpTrack); #else///ifndef SORT_BY_HIERARCHY slSort(&tracks, gCmpTrack); #endif///ndef SORT_BY_HIERARCHY } } -hPrintf("