d3e338b79532a1ba0415e2a65ef1e069ba84caf1 tdreszer Tue Jan 11 13:20:47 2011 -0800 Added suppport for file searching diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 35954e7..f23dc30 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -1775,30 +1775,47 @@ return 0; } void mdbObjsSortOnVars(struct mdbObj **mdbObjs, char *vars) // Sorts on var,val pairs vars lists: fwd case-sensitive. Assumes all objs' vars are in identical order. // Optionally give list of vars "cell antibody treatment" to sort on (bringing to front of vars lists). { // NOTE: assumes all var pairs match (e.g. every obj has cell,treatment,antibody,... and missing treatment messes up sort) if(vars != NULL) mdbObjReorderVars(*mdbObjs,vars,FALSE); slSort(mdbObjs, mdbObjVarCmp); } +void mdbObjsSortOnVarPairs(struct mdbObj **mdbObjs,struct slPair *varValPairs) +// Sorts on var,val pairs vars lists: fwd case-sensitive. Assumes all objs' vars are in identical order. +// This method will use mdbObjsSortOnVars() +{ +if (varValPairs == NULL) + return; + +struct slPair *onePair = varValPairs; +struct dyString *dyTerms = dyStringNew(256); +dyStringAppend(dyTerms,onePair->name); +onePair = onePair->next; +for(; onePair != NULL; onePair = onePair->next) + dyStringPrintf(dyTerms,",%s",onePair->name); +mdbObjsSortOnVars(mdbObjs,dyStringContents(dyTerms)); +dyStringFree(&dyTerms); +} + void mdbObjRemoveVars(struct mdbObj *mdbObjs, char *vars) // Prunes list of vars for an object, freeing the memory. Doesn't touch DB. { char *cloneLine = NULL; int count = 0; char **words = NULL; if(vars != NULL) { cloneLine = cloneString(vars); count = chopByWhite(cloneLine,NULL,0); words = needMem(sizeof(char *) * count); count = chopByWhite(cloneLine,words,count); } struct mdbObj *mdbObj = NULL; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) @@ -1868,30 +1885,131 @@ for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { mdbObj->deleteThis = deleteThis; if(mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) mdbVarFree(&mdbVar); mdbObjAddVarPairs(mdbObj,vars); } } +struct mdbObj *mdbObjsFilter(struct mdbObj **pMdbObjs, char *var, char *val,boolean exclude) +// Filters mdb objects to only those that include/exclude vars. Optionally checks val too. +// Returns removed objects +{ +struct mdbObj *mdbObjsDropped = NULL; +struct mdbObj *mdbObj=*pMdbObjs; +struct mdbObj *mdbLastObj=NULL; +while (mdbObj!=NULL) + { + boolean drop = FALSE; + char *foundVal = mdbObjFindValue(mdbObj,var); + if (val == NULL) + drop = (!foundVal && !exclude) || (foundVal && exclude); + else if (foundVal) + drop = (sameWord(foundVal,val) ? exclude : !exclude); // case-insensitive + else + drop = !exclude; + if (drop) + { + if (mdbLastObj==NULL) + *pMdbObjs = mdbObj->next; + else + mdbLastObj->next = mdbObj->next; + mdbObj->next = NULL; + slAddHead(&mdbObjsDropped,mdbObj); + if (mdbLastObj==NULL) + { + mdbObj = *pMdbObjs; + continue; + } + } + else + mdbLastObj=mdbObj; + mdbObj = mdbLastObj->next; + } +return mdbObjsDropped; +} + +struct mdbObj *mdbObjsFilterTablesOrFiles(struct mdbObj **pMdbObjs,boolean tables, boolean files) +// Filters mdb objects to only those that have associated tables or files. Returns removed non-table/file objects +// Note: Since table/file objects overlap, there are 3 possibilites: tables, files, table && files +{ +assert(tables || files); // Cant exclude both + +struct mdbObj *mdbObjs = *pMdbObjs; +struct mdbObj *mdbObjsDropped = NULL; +if (tables) + mdbObjsDropped = mdbObjsFilter(&mdbObjs,"tableName",NULL,FALSE); + +if (files) + { + struct mdbObj *mdbObjsNoFileName = mdbObjsDropped = mdbObjsFilter(&mdbObjs,"fileName",NULL,FALSE); + if (mdbObjsNoFileName) + { + struct mdbObj *mdbObjsNoFileIndex = mdbObjsFilter(&mdbObjsNoFileName,"fileIndex",NULL,FALSE); + if (mdbObjsNoFileIndex) + { + mdbObjs = slCat(mdbObjs,mdbObjsNoFileName); + mdbObjsDropped = slCat(mdbObjsDropped,mdbObjsNoFileIndex); + } + } + } +slSort(&mdbObjs, &mdbObjCmp); // Need to be returned to obj order +slSort(&mdbObjsDropped,&mdbObjCmp); +*pMdbObjs = mdbObjs; + +return mdbObjsDropped; +} + +struct mdbObj *mdbObjIntersection(struct mdbObj **pA, struct mdbObj *b) +// return duplicate objs from an intersection of two mdbObj lists. +// List b is untouched but pA will contain the resulting intersection +{ +struct mdbObj *mdbObj; +struct hash *hashB = newHash(0); +for (mdbObj = b; mdbObj != NULL; mdbObj = mdbObj->next) + { + hashAdd(hashB, mdbObj->obj, mdbObj); + } + +struct mdbObj *mdbObjsDropped = NULL; +struct mdbObj *mdbObjsIntersecting = NULL; +struct mdbObj *mdbObjs=*pA; +while (mdbObjs) + { + mdbObj = slPopHead(&mdbObjs); + if (hashLookup(hashB, mdbObj->obj) != NULL) + slAddHead(&mdbObjsIntersecting,mdbObj); + else + slAddHead(&mdbObjsDropped,mdbObj); + } +hashFree(&hashB); +if (mdbObjsIntersecting) + slReverse(&mdbObjsIntersecting); +*pA = mdbObjsIntersecting; +if (mdbObjsDropped) + slReverse(&mdbObjsDropped); + +return mdbObjsDropped; +} + void mdbObjTransformToUpdate(struct mdbObj *mdbObjs, char *var, char *varType,char *val,boolean deleteThis) // Turns one or more mdbObjs into the stucture needed to add/update or delete. { struct mdbObj *mdbObj = NULL; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { mdbObj->deleteThis = deleteThis; if(mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) mdbVarFree(&mdbVar); @@ -1932,30 +2050,43 @@ AllocVar(newVar); if(mdbVar->var != NULL) newVar->var = cloneString(mdbVar->var); if(mdbVar->val != NULL) newVar->val = cloneString(mdbVar->val); newVar->varType = mdbVar->varType; if(newVar->var != NULL && newVar->val != NULL) hashAdd(newObj->varHash, newVar->var, newVar); // pointer to struct to resolve type slAddHead(&(newObj->vars),newVar); } slReverse(&(newObj->vars)); } return newObj; } +struct slName *mdbObjToSlName(struct mdbObj *mdbObjs) +// Creates slNames list of mdbObjs->obj. mdbObjs remains untouched +{ +struct slName *mdbNames = NULL; +struct mdbObj *mdbObj = mdbObjs; +for( ;mdbObj!=NULL; mdbObj=mdbObj->next) + { + slAddHead(&mdbNames,slNameNew(mdbObj->obj)); //allocates memory + } +slReverse(&mdbNames); +return mdbNames; +} + // --------------- Free at last ---------------- void mdbObjsFree(struct mdbObj **mdbObjsPtr) // Frees one or more metadata objects and any contained mdbVars. Will free any hashes as well. { if(mdbObjsPtr != NULL && *mdbObjsPtr != NULL) { // free all roots struct mdbObj *mdbObj = NULL; while((mdbObj = slPopHead(mdbObjsPtr)) != NULL) { // Free hash first (shared memory) hashFree(&(mdbObj->varHash)); // free all leaves @@ -2062,63 +2193,142 @@ return mdbObj; } const char *metadataFindValue(struct trackDb *tdb, char *var) // Finds the val associated with the var or retruns NULL { struct mdbObj *mdbObj = tdbExtrasGetOrDefault(tdb, MDB_OBJ_KEY,NULL); if(mdbObj == MDB_NOT_FOUND) // Note, only we if already looked for mdb (which requires db) mdbObj = metadataForTableFromTdb(tdb); if (mdbObj == NULL || mdbObj == METADATA_NOT_FOUND) return NULL; return mdbObjFindValue(mdbObj,var); } -struct slName *mdbObjSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files) + +struct mdbObj *mdbObjSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit) // Search the metaDb table for objs by var and val. Can restrict by op "is", "like", "in" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. -{ // TODO: Change this to use normal mdb struct routines? -if (!tables && !files) - errAbort("mdbObjSearch requests objects for neither tables or files.\n"); +{ +if (var == NULL && val == NULL) + errAbort("mdbObjSearch requests objects but provides no criteria.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first +// Build a query string struct dyString *dyQuery = dyStringNew(512); -dyStringPrintf(dyQuery,"select distinct obj from %s l1 where ",tableName); -if (!tables || !files) - { - dyStringPrintf(dyQuery,"l1.var='objType' and l1.val='%s' ",tables?"table":"file"); - dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and ",tableName); - } +dyStringPrintf(dyQuery,"select l1.obj, l1.var, l1.varType, l1.val from %s l1",tableName); +if (var != NULL || val != NULL) + dyStringPrintf(dyQuery," where exists (select l2.obj from %s l2 where l2.obj = l1.obj and ",tableName); if(var != NULL) - dyStringPrintf(dyQuery,"l2.var = '%s' and l2.val ", var); + dyStringPrintf(dyQuery,"l2.var = '%s'", var); +if(var != NULL && val != NULL) + dyStringAppend(dyQuery," and "); +if(val != NULL) + { + dyStringAppend(dyQuery,"l2.val "); if(sameString(op, "in")) dyStringPrintf(dyQuery,"in (%s)", val); // Note, must be a formatted string already: 'a','b','c' or 1,2,3 else if(sameString(op, "contains") || sameString(op, "like")) dyStringPrintf(dyQuery,"like '%%%s%%'", val); -else if (limit > 0 && strlen(val) == limit) - dyStringPrintf(dyQuery,"like '%s%%'", val); + else if (limit > 0 && strlen(val) != limit) + dyStringPrintf(dyQuery,"like '%.*s%%'", limit, val); else dyStringPrintf(dyQuery,"= '%s'", val); - -if (!tables || !files) + } dyStringAppendC(dyQuery,')'); dyStringAppend(dyQuery," order by obj"); -return sqlQuickList(conn, dyStringCannibalize(&dyQuery)); +struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dyQuery)); +verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); +struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE); + +return mdbObjs; +} + +struct mdbObj *mdbObjRepeatedSearch(struct sqlConnection *conn,struct slPair *varValPairs,boolean tables,boolean files) +// Search the metaDb table for objs by var,val pairs. Uses mdbCvSearchMethod() if available. +// This method will use mdbObjsQueryByVars() +{ +struct slPair *onePair; +struct dyString *dyTerms = dyStringNew(256); +// Build list of terms as "var1=val1 var2=val2a,val2b,val2c var3=%val3%" +for(onePair = varValPairs; onePair != NULL; onePair = onePair->next) + { + enum mdbCvSearchable searchBy = mdbCvSearchMethod(onePair->name); + // If select is by free text then like + if (searchBy == cvsSearchByMultiSelect) + { + // TO BE IMPLEMENTED + warn("mdb search by multi-select is not yet implemented."); + // The mdbVal[1] will hve to be filled cartOptionalSlNameList(cart,???) + struct slName *choices = (struct slName *)onePair->val; + if (slCount(choices) == 1) + dyStringPrintf(dyTerms,"%s=%s ",onePair->name,choices->name); + else if(choices != NULL) + { + // Then slNames will need to be assembled into a string in the form of a,b,c + dyStringPrintf(dyTerms,"%s=%s",onePair->name,choices->name); + struct slName *choice = choices->next; + for(;choice!=NULL;choice=choice->next) + dyStringPrintf(dyTerms,",%s",choice->name); + dyStringAppendC(dyTerms,' '); + } + } + else if (searchBy == cvsSearchBySingleSelect) + dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val); + else if (searchBy == cvsSearchByFreeText) + dyStringPrintf(dyTerms,"%s=%%%s%% ",onePair->name,(char *)onePair->val); + else if (searchBy == cvsSearchByDateRange || searchBy == cvsSearchByIntegerRange) + { + // TO BE IMPLEMENTED + // Requires new mdbObjSearch API and more than one (char *)onePair->val + warn("mdb search by date is not yet implemented."); + } + } +// Be sure to include table of file in selections +if (tables) + dyStringAppend(dyTerms,"tableName=? "); +if (files) + dyStringAppend(dyTerms,"fileName=? "); + +// Build the mdbByVals struct and then select all mdbObjs in one query +struct mdbByVar *mdbByVars = mdbByVarsLineParse(dyStringContents(dyTerms)); +dyStringClear(dyTerms); +struct mdbObj *mdbObjs = mdbObjsQueryByVars(conn,NULL,mdbByVars); // Uses master table metaDb not sandbox versions + +return mdbObjs; +} + +struct slName *mdbObjNameSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files) +// Search the metaDb table for objs by var and val. Can restrict by op "is", "like", "in" and accept (non-zero) limited string size +// Search is via mysql, so it's case-insensitive. Return is sorted on obj. +{ // Note: This proves faster than getting mdbObjs then converting to slNames +struct mdbObj *mdbObjs = mdbObjSearch(conn,var,val,op,limit); + +// May only be interested in tables or files: +if (tables || files) + { + struct mdbObj *mdbObjsDropped = mdbObjsFilterTablesOrFiles(&mdbObjs,tables,files); + mdbObjsFree(&mdbObjsDropped); + } + +struct slName *mdbNames = mdbObjToSlName(mdbObjs); +mdbObjsFree(&mdbObjs); +return mdbNames; } struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files) // Search the metaDb table for vals by var. Can impose (non-zero) limit on returned string size of val // Search is via mysql, so it's case-insensitive. Return is sorted on val. { // TODO: Change this to use normal mdb struct routines? struct slName *retVal; if (!tables && !files) errAbort("mdbValSearch requests values for neither table nor file objects.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct dyString *dyQuery = dyStringNew(512); if (limit > 0)