7544fd2a03bf573a9a12195d86a2fc082a88de78 tdreszer Tue Apr 5 15:17:56 2011 -0700 Tightened the screws on searching by mdb vars where the search is either for files or tables diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 6011719..11ea4f4 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -3169,45 +3169,160 @@ { // Note: This proves faster than getting mdbObjs then converting to slNames struct mdbObj *mdbObjs = mdbObjSearch(conn,var,val,op,limit); // May only be interested in tables or files: if (tables || files) { struct mdbObj *mdbObjsDropped = mdbObjsFilterTablesOrFiles(&mdbObjs,tables,files); mdbObjsFree(&mdbObjsDropped); } struct slName *mdbNames = mdbObjToSlName(mdbObjs); mdbObjsFree(&mdbObjs); return mdbNames; } -struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files) +static void mdbSearchableQueryRestictForTablesOrFiles(struct dyString *dyQuery,char *tableName, char letter,boolean hasTableName, boolean hasFileName) +// Append table and file restrictions onto an mdb query. +// letter (e.g. 'A') should be used in original query to alias table name: "select A.val from metaDb A where A.val = 'fred'". +{ +// A note about tables and files: objType=table may have fileNames associated, but objType=file will not have tableNames +// While objType=table should have a var=tableName, this is redundant because the obj=tableName +// So the lopsided 'exists' queries below are meant to be the most flexible/efficent + +assert(isalpha(letter) && isalpha(letter + 2)); // will need one or two sub-queries. +char nextLtr = letter + 1; + +// We are only searching for objects that are of objType table or file. objType=composite are not search targets! +if (hasTableName && !hasFileName) // objType=table may have fileNames associated, but objType=file will not have tableNames + dyStringPrintf(dyQuery," and exists (select %c.obj from %s %c where %c.obj = %c.obj and %c.var='objType' and %c.val = '%s')", + nextLtr,tableName,nextLtr,nextLtr,letter,nextLtr,nextLtr,MDB_OBJ_TYPE_TABLE); +else // tables OR files (but not objType=composite) + dyStringPrintf(dyQuery," and exists (select %c.obj from %s %c where %c.obj = %c.obj and %c.var='objType' and %c.val in ('%s','%s'))", + nextLtr,tableName,nextLtr,nextLtr,letter,nextLtr,nextLtr,MDB_OBJ_TYPE_TABLE,MDB_OBJ_TYPE_FILE); + +nextLtr++; + +if (!hasTableName && hasFileName) // last of 3 possibilites objType either table or file but must have fileName var + dyStringPrintf(dyQuery," and exists (select %c.obj from %s %c where %c.obj = %c.obj and %c.var in ('%s','%s'))", + nextLtr,tableName,nextLtr,nextLtr,letter,nextLtr,MDB_VAR_FILENAME,MDB_VAR_FILEINDEX); +} + +struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean hasTableName, boolean hasFileName) // Search the metaDb table for vals by var. Can impose (non-zero) limit on returned string size of val // Search is via mysql, so it's case-insensitive. Return is sorted on val. +// Searchable vars are only for table or file objects. Further restrict to vars associated with tableName, fileName or both. { // TODO: Change this to use normal mdb struct routines? struct slName *retVal; -if (!tables && !files) - errAbort("mdbValSearch requests values for neither table nor file objects.\n"); +if (!hasTableName && !hasFileName) + errAbort("mdbValSearch requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first +char letter = 'A'; struct dyString *dyQuery = dyStringNew(512); if (limit > 0) - dyStringPrintf(dyQuery,"select distinct LEFT(val,%d)",limit); + dyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit); else - dyStringPrintf(dyQuery,"select distinct val"); + dyStringPrintf(dyQuery,"select distinct %c.val",letter); + +dyStringPrintf(dyQuery," from %s %c where %c.var='%s'",tableName,letter,letter,var); -dyStringPrintf(dyQuery," from %s l1 where l1.var='%s' ",tableName,var); +mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName); -if (!tables || !files) - dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", - tableName,tables?MDB_OBJ_TYPE_TABLE:MDB_OBJ_TYPE_FILE); -dyStringAppend(dyQuery," order by val"); +dyStringPrintf(dyQuery," order by %c.val",letter); retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery)); slNameSortCase(&retVal); return retVal; } -// ------------ CONTROLLED VOCABULARY APIs -------------- +struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags, boolean hasTableName, boolean hasFileName) +// Search the metaDb table for vals by var and returns val (as pair->name) and controlled vocabulary (cv) label +// (if it exists) (as pair->val). Can impose (non-zero) limit on returned string size of name. +// Searchable vars are only for table or file objects. Further restrict to vars associated with tableName, fileName or both. +// Return is case insensitive sorted on label (cv label or else val). If requested, return cv tag instead of mdb val. +{ // TODO: Change this to use normal mdb struct routines? +if (!hasTableName && !hasFileName) + errAbort("mdbValLabelSearch requests vals associated with neither table nor files.\n"); + +char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first + +char letter = 'A'; +struct dyString *dyQuery = dyStringNew(512); +if (limit > 0) + dyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit); +else + dyStringPrintf(dyQuery,"select distinct %c.val",letter); + +dyStringPrintf(dyQuery," from %s %c where %c.var='%s'",tableName,letter,letter,var); + +mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName,letter, hasTableName, hasFileName); +//warn("%s",dyStringContents(dyQuery)); + +struct hash *varHash = (struct hash *)cvTermHash(var); + +struct slPair *pairs = NULL; +struct sqlResult *sr = sqlGetResult(conn, dyStringContents(dyQuery)); +dyStringFree(&dyQuery); +char **row; +while ((row = sqlNextRow(sr)) != NULL) + { + char *val = row[0]; + char *label = NULL; + if (varHash != NULL) + { + struct hash *valHash = hashFindVal(varHash,val); + if (valHash != NULL) + { + label = cloneString(hashOptionalVal(valHash,CV_LABEL,row[0])); + if (tags) + { + char *tag = hashFindVal(valHash,CV_TAG); + if (tag != NULL) + val = tag; + } + } + } + if (label == NULL); + label = cloneString(row[0]); + label = strSwapChar(label,'_',' '); // vestigial _ meaning space + slPairAdd(&pairs,val,label); + } +sqlFreeResult(&sr); +slPairValSortCase(&pairs); +return pairs; +} + +struct slPair *mdbVarsSearchable(struct sqlConnection *conn, boolean hasTableName, boolean hasFileName) +// returns a white list of mdb vars that actually exist in the current DB. +// Searchable vars are only for table or file objects. Further restrict to vars associated with tableName, fileName or both. +{ +if (!hasTableName && !hasFileName) + errAbort("mdbVarsSearchable requests vals associated with neither table nor files.\n"); + +char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first + +char letter = 'A'; +struct slPair *cvApproved = cvWhiteList(TRUE,FALSE); +struct slPair *relevant = NULL; +struct dyString *dyQuery = dyStringNew(256); +while(cvApproved != NULL) + { + struct slPair *oneVar = slPopHead(&cvApproved); + dyStringClear(dyQuery); + dyStringPrintf(dyQuery, "select count(DISTINCT %c.val) from %s %c where %c.var = '%s'",letter,tableName,letter,letter,oneVar->name); + + mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName,letter, hasTableName, hasFileName); + int count = sqlQuickNum(conn,dyStringContents(dyQuery)); + //warn("%d %s",count,dyStringContents(dyQuery)); + + if(count > 1) // If there is only one value then searching on that variable is useless! + slAddHead(&relevant, oneVar); + else + slPairFree(&oneVar); + } +dyStringFree(&dyQuery); +slReverse(&relevant); +return relevant; +}