77dc06c4bdc5e10f5d2705a51300ec028a7d785f tdreszer Fri Oct 1 10:51:48 2010 -0700 FindTracks now finds superTracks (Advanced only) and hgTrackUi now does superTrack reshaping when children have vis changed diff --git src/hg/hgTracks/searchTracks.c.fxit src/hg/hgTracks/searchTracks.c.fxit new file mode 100644 index 0000000..eeff438 --- /dev/null +++ src/hg/hgTracks/searchTracks.c.fxit @@ -0,0 +1,861 @@ +/* Track search code used by hgTracks CGI */ + +#include "common.h" +#include "searchTracks.h" +#include "hCommon.h" +#include "memalloc.h" +#include "obscure.h" +#include "dystring.h" +#include "hash.h" +#include "cheapcgi.h" +#include "hPrint.h" +#include "htmshell.h" +#include "cart.h" +#include "hgTracks.h" +#include "web.h" +#include "jksql.h" +#include "hdb.h" +#include "mdb.h" +#include "trix.h" +#include "jsHelper.h" +#include "imageV2.h" + +static char const rcsid[] = "$Id: searchTracks.c,v 1.11 2010/06/11 18:21:40 larrym Exp $"; + +#define ANYLABEL "Any" +#define METADATA_NAME_PREFIX "hgt.metadataName" +#define METADATA_VALUE_PREFIX "hgt.metadataValue" + +static int gCmpGroup(const void *va, const void *vb) +/* Compare groups based on label. */ +{ +const struct group *a = *((struct group **)va); +const struct group *b = *((struct group **)vb); +return strcmp(a->label, b->label); +} + +// Would like to do a radio button choice ofsorts +#define FINDTRACKS_SORT +#ifdef FINDTRACKS_SORT +#define SORT_BY_VAR "findTracksSortBy" +#define SORT_BY_ABC "abc" +#define SORT_BY_HIER "hier" +#define SORT_BY_REL "rel" +enum sortBy + { + sbRelevance=0, + sbAbc =1, + sbHierarchy=2, + }; +static int gCmpTrackHierarchy(const void *va, const void *vb) +/* Compare tracks based on longLabel. */ +{ +const struct slRef *aa = *((struct slRef **)va); +const struct slRef *bb = *((struct slRef **)vb); +const struct track *a = ((struct track *) aa->val); +const struct track *b = ((struct track *) bb->val); + if ( tdbIsFolder(a->tdb) && !tdbIsFolder(b->tdb)) + return -1; +else if (!tdbIsFolder(a->tdb) && tdbIsFolder(b->tdb)) + return 1; + if ( tdbIsContainer(a->tdb) && !tdbIsContainer(b->tdb)) + return -1; +else if (!tdbIsContainer(a->tdb) && tdbIsContainer(b->tdb)) + return 1; + if (!tdbIsContainerChild(a->tdb) && tdbIsContainerChild(b->tdb)) + return -1; +else if ( tdbIsContainerChild(a->tdb) && !tdbIsContainerChild(b->tdb)) + return 1; +return strcasecmp(a->longLabel, b->longLabel); +} + +//static int gCmpTrackReverseHierarchy(const void *va, const void *vb) +///* Compare tracks based on longLabel. */ +//{ +//const struct slRef *aa = *((struct slRef **)va); +//const struct slRef *bb = *((struct slRef **)vb); +//const struct track *a = ((struct track *) aa->val); +//const struct track *b = ((struct track *) bb->val); +// if ( tdbIsContainerChild(a->tdb) && !tdbIsContainerChild(b->tdb)) +// return -1; +//else if (!tdbIsContainerChild(a->tdb) && tdbIsContainerChild(b->tdb)) +// return 1; +// if ( tdbIsContainer(a->tdb) && !tdbIsContainer(b->tdb)) +// return -1; +//else if (!tdbIsContainer(a->tdb) && tdbIsContainer(b->tdb)) +// return 1; +// if (!tdbIsFolder(a->tdb) && tdbIsFolder(b->tdb)) +// return -1; +//else if ( tdbIsFolder(a->tdb) && !tdbIsFolder(b->tdb)) +// return 1; +//return strcasecmp(a->longLabel, b->longLabel); +//} +#endif///def FINDTRACKS_SORT + +static int gCmpTrack(const void *va, const void *vb) +/* Compare tracks based on longLabel. */ +{ +const struct slRef *aa = *((struct slRef **)va); +const struct slRef *bb = *((struct slRef **)vb); +const struct track *a = ((struct track *) aa->val); +const struct track *b = ((struct track *) bb->val); +return strcasecmp(a->longLabel, b->longLabel); +} + +static void findTracksSort(struct slRef **pTrack, boolean simpleSearch, enum sortBy sortBy) +{ +#ifdef FINDTRACKS_SORT +if (sortBy == sbHierarchy) + slSort(pTrack, gCmpTrackHierarchy); +else if (sortBy == sbAbc) + slSort(pTrack, gCmpTrack); +else //if(simpleSearch) + slReverse(pTrack); +//else +// slSort(pTrack, gCmpTrackReverseHierarchy); +#else///ifndef FINDTRACKS_SORT +if (simpleSearch) + slReverse(pTrack); +else + slSort(&tracks, gCmpTrack); +#endif///ndef FINDTRACKS_SORT +} + + +// XXXX make a matchString function to support "contains", "is" etc. and wildcards in contains + +// ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || + +static boolean isNameMatch(struct track *track, char *str, char *op) +{ +return str && strlen(str) && + ((sameString(op, "is") && !strcasecmp(track->shortLabel, str)) || + (sameString(op, "is") && !strcasecmp(track->longLabel, str)) || + (sameString(op, "contains") && containsStringNoCase(track->shortLabel, str) != NULL) || + (sameString(op, "contains") && containsStringNoCase(track->longLabel, str) != NULL)); +} + +static boolean isDescriptionMatch(struct track *track, char **words, int wordCount) +// We parse str and look for every word at the start of any word in track description (i.e. google style). +{ +if(words) + { + // We do NOT lookup up parent hierarchy for html descriptions. + char *html = track->tdb->html; + if(!isEmpty(html)) + { + /* This probably could be made more efficient by parsing the html into some kind of b-tree, but I am assuming + that the inner html loop while only happen for 1-2 words for vast majority of the tracks. */ + + int i, numMatches = 0; + html = stripRegEx(html, "<[^>]*>", REG_ICASE); + for(i = 0; i < wordCount; i++) + { + char *needle = words[i]; + char *haystack, *tmp = cloneString(html); + boolean found = FALSE; + while((haystack = nextWord(&tmp))) + { + char *ptr = strstrNoCase(haystack, needle); + if(ptr != NULL && ptr == haystack) + { + found = TRUE; + break; + } + } + if(found) + numMatches++; + else + break; + } + if(numMatches == wordCount) + return TRUE; + } + } +return FALSE; +} + +static int getTermArray(struct sqlConnection *conn, char ***terms, char *type) +// Pull out all term fields from ra entries with given type +// Returns count of items found and items via the terms argument. +{ +int i, count = 0; +char **retVal; +struct slName *termList = mdbValSearch(conn, type, MDB_VAL_STD_TRUNCATION, TRUE, FALSE); // Tables not files +count = slCount(termList) + 1; // make room for "Any" +AllocArray(retVal, count); +retVal[0] = cloneString(ANYLABEL); +for(i = 1; termList != NULL;termList = termList->next, i++) + { + retVal[i] = cloneString(termList->name); + } +*terms = retVal; +return count; +} + +static int metaDbVars(struct sqlConnection *conn, char *** metaVars, char *** metaLabels) +// Search the assemblies metaDb table; If name == NULL, we search every metadata field. +{ +char query[256]; +#define WHITE_LIST_COUNT 35 +#ifdef WHITE_LIST_COUNT +#define WHITE_LIST_VAR 0 +#define WHITE_LIST_LABEL 1 +char *whiteList[WHITE_LIST_COUNT][2] = { + {"age", "Age of experimental organism"}, + {"antibody", "Antibody or target protein"}, + {"origAssembly", "Assembly originally mapped to"}, + {"cell", "Cell, tissue or DNA sample"}, + {"localization", "Cell compartment"}, + {"control", "Control or Input for ChIPseq"}, + //{"controlId", "ControlId - explicit relationship"}, + {"dataType", "Experiment type"}, + {"dataVersion", "ENCODE release"}, + //{"fragLength", "Fragment Length for ChIPseq"}, + //{"freezeDate", "Gencode freeze date"}, + //{"level", "Gencode level"}, + //{"annotation", "Gencode annotation"}, + {"accession", "GEO accession"}, + {"growthProtocol", "Growth Protocol"}, + {"lab", "Lab producing data"}, + {"labVersion", "Lab specific details"}, + {"labExpId", "Lab specific identifier"}, + {"softwareVersion", "Lab specific informatics"}, + {"protocol", "Library Protocol"}, + {"mapAlgorithm", "Mapping algorithm"}, + {"readType", "Paired/Single reads lengths"}, + {"grant", "Principal Investigator"}, + {"replicate", "Replicate number"}, + //{"restrictionEnzyme","Restriction Enzyme used"}, + //{"ripAntibody", "RIP Antibody"}, + //{"ripTgtProtein", "RIP Target Protein"}, + {"rnaExtract", "RNA Extract"}, + {"seqPlatform", "Sequencing Platform"}, + {"setType", "Experiment or Input"}, + {"sex", "Sex of organism"}, + {"strain", "Strain of organism"}, + {"subId", "Submission Id"}, + {"treatment", "Treatment"}, + {"view", "View - Peaks or Signals"}, +}; +// FIXME: The whitelist should be a table or ra +// FIXME: The whitelist should be in list order +// FIXME: Should read in list, then verify that an mdb val exists. + +char **retVar = needMem(sizeof(char *) * WHITE_LIST_COUNT); +char **retLab = needMem(sizeof(char *) * WHITE_LIST_COUNT); +int ix,count; +for(ix=0,count=0;ix 0) + { + retVar[count] = whiteList[ix][WHITE_LIST_VAR]; + retLab[count] = whiteList[ix][WHITE_LIST_LABEL]; + count++; + } + } +if(count == 0) + { + freez(&retVar); + freez(&retLab); + } +*metaVars = retVar; +*metaLabels = retLab; +return count; + +#else///ifndef WHITE_LIST_COUNT + +char **retVar; +char **retLab; +struct slName *el, *varList = NULL; +struct sqlResult *sr = NULL; +char **row = NULL; + +safef(query, sizeof(query), "select distinct var from metaDb order by var"); +sr = sqlGetResult(conn, query); +while ((row = sqlNextRow(sr)) != NULL) + slNameAddHead(&varList, row[0]); +sqlFreeResult(&sr); +retVar = needMem(sizeof(char *) * slCount(varList)); +retLab = needMem(sizeof(char *) * slCount(varList)); +slReverse(&varList); +//slNameSort(&varList); +int count = 0; +for (el = varList; el != NULL; el = el->next) + { + retVar[count] = el->name; + retLab[count] = el->name; + count++; + } +*metaVars = retVar; +*whiteLabels = retLab; +return count; +#endif///ndef WHITE_LIST_COUNT +} + +void doSearchTracks(struct group *groupList) +{ +struct group *group; +char *groups[128]; +char *labels[128]; +int numGroups = 1; +groups[0] = ANYLABEL; +labels[0] = ANYLABEL; +char *currentTab = cartUsualString(cart, "hgt.currentSearchTab", "simpleTab"); +char *nameSearch = cartOptionalString(cart, "hgt.nameSearch"); +char *descSearch; +char *groupSearch = cartOptionalString(cart, "hgt.groupSearch"); +boolean doSearch = sameString(cartOptionalString(cart, searchTracks), "Search") || cartUsualInt(cart, "hgt.forceSearch", 0) == 1; +struct sqlConnection *conn = hAllocConn(database); +boolean metaDbExists = sqlTableExists(conn, "metaDb"); +struct slRef *tracks = NULL; +int numMetadataSelects, tracksFound = 0; +int numMetadataNonEmpty = 0; +char **mdbVar; +char **mdbVal; +struct hash *parents = newHash(4); +boolean simpleSearch; +struct trix *trix; +char trixFile[HDB_MAX_PATH_STRING]; +char **descWords = NULL; +int descWordCount = 0; +boolean searchTermsExist = FALSE; +int cols; + +if(sameString(currentTab, "simpleTab")) + { + descSearch = cartOptionalString(cart, "hgt.simpleSearch"); + simpleSearch = TRUE; + freez(&nameSearch); + freez(&groupSearch); + } +else + { + descSearch = cartOptionalString(cart, "hgt.descSearch"); + simpleSearch = FALSE; + } + +getSearchTrixFile(database, trixFile, sizeof(trixFile)); +trix = trixOpen(trixFile); +//#define DO_RESHAPING +// Do shaping was meant to handle shaping supertracks after * configuring contained composite. +// HOWEVER, this will not work since supertrack reshaping relies upon the temporary "_sel" cart var. +// What to do? Alter hgTrackUi to set "_sel" when necessary? Probably. Then doReshaping will be needed. +// Possibly need doReshaping anyway, if it relies upon old/new cart! +#ifdef DO_RESHAPING +struct track *trackList = getTrackList(&groupList, -2); +#else///ifndef DO_RESHAPING +(void)getTrackList(&groupList, -2); +#endif///ndef DO_RESHAPING +slSort(&groupList, gCmpGroup); +for (group = groupList; group != NULL; group = group->next) + { +#define FIND_SUPERS_TOO +#ifdef FIND_SUPERS_TOO + groupTrackListAddSuper(cart, group); +#endif///def FIND_SUPERS_TOO + if (group->trackList != NULL) + { + groups[numGroups] = cloneString(group->name); + labels[numGroups] = cloneString(group->label); + numGroups++; + if (numGroups >= ArraySize(groups)) + internalErr(); + } + } +#ifdef DO_RESHAPING +// NOTE: Is this buying me anything?? It should buy composite/view override when subtrack specific vis exists. +// FIXME: Crashes when superTrackChild "_sel" is found +parentChildCartCleanup(trackList,cart,oldVars); // Subtrack settings must be removed when composite/view settings are updated +#endif///def DO_RESHAPING + +webStartWrapperDetailedNoArgs(cart, database, "", "Search for Tracks", FALSE, FALSE, FALSE, FALSE); + +hPrintf("
"); +hPrintf("
\n\n", hgTracksName()); +cartSaveSession(cart); // Creates hidden var of hgsid to avoid bad voodoo + +hPrintf("\n", database); +hPrintf("\n", currentTab); +hPrintf("\n"); +hPrintf("\n"); +hPrintf("\n"); + +hPrintf("\n"); +hPrintf("
\n"); +hPrintf("next) + descWords[i] = strLower(el->name); + } +if (doSearch && simpleSearch && descWordCount <= 0) + doSearch = FALSE; + +#ifdef FINDTRACKS_SORT +enum sortBy sortBy = cartUsualInt(cart,SORT_BY_VAR,sbRelevance); +//boolean sortByHierarchy = sameString(cartUsualString(cart,SORT_BY_VAR,SORT_BY_HIER),SORT_BY_HIER); +#endif///def FINDTRACKS_SORT +if(doSearch) + { + if(simpleSearch) + { + struct trixSearchResult *tsList; + struct hash *trackHash = newHash(0); + + // Create a hash of tracks, so we can map the track name into a track struct. + for (group = groupList; group != NULL; group = group->next) + { + struct trackRef *tr; + for (tr = group->trackList; tr != NULL; tr = tr->next) + { + struct track *track = tr->track; + hashAdd(trackHash, track->track, track); + struct track *subTrack = track->subtracks; + for (subTrack = track->subtracks; subTrack != NULL; subTrack = subTrack->next) + hashAdd(trackHash, subTrack->track, subTrack); + } + } + for(tsList = trixSearch(trix, descWordCount, descWords, TRUE); tsList != NULL; tsList = tsList->next) + { + struct track *track = (struct track *) hashFindVal(trackHash, tsList->itemId); + if (track != NULL) + { + refAdd(&tracks, track); + tracksFound++; + } + //else // FIXME: Should get to the bottom of why some of these are null + // warn("found trix track is NULL."); + } + } + else if(!isEmpty(nameSearch) || descSearch != NULL || groupSearch != NULL || numMetadataNonEmpty) + { + // First do the metaDb searches, which can be done quickly for all tracks with db queries. + struct hash *matchingTracks = newHash(0); + struct hash *trackMetadata = newHash(0); + struct slName *el, *metaTracks = NULL; + int i; + + for(i = 0; i < numMetadataSelects; i++) + { + if(!isEmpty(mdbVal[i])) + { + struct slName *tmp = mdbObjSearch(conn, mdbVar[i], mdbVal[i], "is", MDB_VAL_STD_TRUNCATION, TRUE, FALSE); + if(metaTracks == NULL) + metaTracks = tmp; + else + metaTracks = slNameIntersection(metaTracks, tmp); + } + } + for (el = metaTracks; el != NULL; el = el->next) + hashAddInt(matchingTracks, el->name, 1); + + if(metaDbExists && !isEmpty(descSearch)) + { + // Load all metadata words for each track to facilitate metadata search. + char query[256]; + struct sqlResult *sr = NULL; + char **row; + safef(query, sizeof(query), "select obj, val from metaDb"); + sr = sqlGetResult(conn, query); + while ((row = sqlNextRow(sr)) != NULL) + { + char *str = cloneString(row[1]); + hashAdd(trackMetadata, row[0], str); + } + sqlFreeResult(&sr); + } + + for (group = groupList; group != NULL; group = group->next) + { + if(groupSearch == NULL || sameString(group->name, groupSearch)) + { + if (group->trackList != NULL) + { + struct trackRef *tr; + for (tr = group->trackList; tr != NULL; tr = tr->next) + { + struct track *track = tr->track; + if((isEmpty(nameSearch) || isNameMatch(track, nameSearch, "contains")) && + (isEmpty(descSearch) || isDescriptionMatch(track, descWords, descWordCount)) && + (!numMetadataNonEmpty || hashLookup(matchingTracks, track->track) != NULL)) + { + if (track != NULL) + { + tracksFound++; + refAdd(&tracks, track); + } + else + warn("found group track is NULL."); + } + if (track->subtracks != NULL) + { + struct track *subTrack; + for (subTrack = track->subtracks; subTrack != NULL; subTrack = subTrack->next) + { + if((isEmpty(nameSearch) || isNameMatch(subTrack, nameSearch, "contains")) && + (isEmpty(descSearch) || isDescriptionMatch(subTrack, descWords, descWordCount)) && + (!numMetadataNonEmpty || hashLookup(matchingTracks, subTrack->track) != NULL)) + { + // XXXX to parent hash. - use tdb->parent instead. + hashAdd(parents, subTrack->track, track); + if (track != NULL) + { + tracksFound++; + refAdd(&tracks, subTrack); + } + else + warn("found subtrack is NULL."); + } + } + } + } + } + } + } + } + if(tracksFound > 1) + findTracksSort(&tracks,simpleSearch,sortBy); + } + +hPrintf("