d3e338b79532a1ba0415e2a65ef1e069ba84caf1
tdreszer
  Tue Jan 11 13:20:47 2011 -0800
Added suppport for file searching
diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c
index 35954e7..f23dc30 100644
--- src/hg/lib/mdb.c
+++ src/hg/lib/mdb.c
@@ -1775,30 +1775,47 @@
 return 0;
 }
 
 
 
 void mdbObjsSortOnVars(struct mdbObj **mdbObjs, char *vars)
 // Sorts on var,val pairs vars lists: fwd case-sensitive.  Assumes all objs' vars are in identical order.
 // Optionally give list of vars "cell antibody treatment" to sort on (bringing to front of vars lists).
 {  // NOTE: assumes all var pairs match (e.g. every obj has cell,treatment,antibody,... and missing treatment messes up sort)
 if(vars != NULL)
     mdbObjReorderVars(*mdbObjs,vars,FALSE);
 
 slSort(mdbObjs, mdbObjVarCmp);
 }
 
+void mdbObjsSortOnVarPairs(struct mdbObj **mdbObjs,struct slPair *varValPairs)
+// Sorts on var,val pairs vars lists: fwd case-sensitive.  Assumes all objs' vars are in identical order.
+// This method will use mdbObjsSortOnVars()
+{
+if (varValPairs == NULL)
+    return;
+
+struct slPair *onePair = varValPairs;
+struct dyString *dyTerms = dyStringNew(256);
+dyStringAppend(dyTerms,onePair->name);
+onePair = onePair->next;
+for(; onePair != NULL; onePair = onePair->next)
+    dyStringPrintf(dyTerms,",%s",onePair->name);
+mdbObjsSortOnVars(mdbObjs,dyStringContents(dyTerms));
+dyStringFree(&dyTerms);
+}
+
 void mdbObjRemoveVars(struct mdbObj *mdbObjs, char *vars)
 // Prunes list of vars for an object, freeing the memory.  Doesn't touch DB.
 {
 char *cloneLine = NULL;
 int count = 0;
 char **words = NULL;
 if(vars != NULL)
     {
     cloneLine = cloneString(vars);
     count = chopByWhite(cloneLine,NULL,0);
     words = needMem(sizeof(char *) * count);
     count = chopByWhite(cloneLine,words,count);
     }
 struct mdbObj *mdbObj = NULL;
 for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next )
@@ -1868,30 +1885,131 @@
 for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next )
     {
     mdbObj->deleteThis = deleteThis;
 
     if(mdbObj->varHash != NULL)
         hashFree(&mdbObj->varHash);
 
     struct mdbVar *mdbVar = NULL;
     while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL)
         mdbVarFree(&mdbVar);
 
     mdbObjAddVarPairs(mdbObj,vars);
     }
 }
 
+struct mdbObj *mdbObjsFilter(struct mdbObj **pMdbObjs, char *var, char *val,boolean exclude)
+// Filters mdb objects to only those that include/exclude vars.  Optionally checks val too.
+// Returns removed objects
+{
+struct mdbObj *mdbObjsDropped = NULL;
+struct mdbObj *mdbObj=*pMdbObjs;
+struct mdbObj *mdbLastObj=NULL;
+while (mdbObj!=NULL)
+    {
+    boolean drop = FALSE;
+    char *foundVal = mdbObjFindValue(mdbObj,var);
+    if (val == NULL)
+        drop = (!foundVal && !exclude) || (foundVal && exclude);
+    else if (foundVal)
+        drop = (sameWord(foundVal,val) ? exclude : !exclude); // case-insensitive
+    else
+        drop = !exclude;
+    if (drop)
+        {
+        if (mdbLastObj==NULL)
+            *pMdbObjs          = mdbObj->next;
+        else
+            mdbLastObj->next = mdbObj->next;
+        mdbObj->next = NULL;
+        slAddHead(&mdbObjsDropped,mdbObj);
+        if (mdbLastObj==NULL)
+            {
+            mdbObj = *pMdbObjs;
+            continue;
+            }
+        }
+    else
+        mdbLastObj=mdbObj;
+    mdbObj = mdbLastObj->next;
+    }
+return mdbObjsDropped;
+}
+
+struct mdbObj *mdbObjsFilterTablesOrFiles(struct mdbObj **pMdbObjs,boolean tables, boolean files)
+// Filters mdb objects to only those that have associated tables or files. Returns removed non-table/file objects
+// Note: Since table/file objects overlap, there are 3 possibilites: tables, files, table && files
+{
+assert(tables || files); // Cant exclude both
+
+struct mdbObj *mdbObjs = *pMdbObjs;
+struct mdbObj *mdbObjsDropped  = NULL;
+if (tables)
+    mdbObjsDropped = mdbObjsFilter(&mdbObjs,"tableName",NULL,FALSE);
+
+if (files)
+    {
+    struct mdbObj *mdbObjsNoFileName = mdbObjsDropped = mdbObjsFilter(&mdbObjs,"fileName",NULL,FALSE);
+    if (mdbObjsNoFileName)
+        {
+        struct mdbObj *mdbObjsNoFileIndex = mdbObjsFilter(&mdbObjsNoFileName,"fileIndex",NULL,FALSE);
+        if (mdbObjsNoFileIndex)
+            {
+            mdbObjs        = slCat(mdbObjs,mdbObjsNoFileName);
+            mdbObjsDropped = slCat(mdbObjsDropped,mdbObjsNoFileIndex);
+            }
+        }
+    }
+slSort(&mdbObjs,       &mdbObjCmp); // Need to be returned to obj order
+slSort(&mdbObjsDropped,&mdbObjCmp);
+*pMdbObjs = mdbObjs;
+
+return mdbObjsDropped;
+}
+
+struct mdbObj *mdbObjIntersection(struct mdbObj **pA, struct mdbObj *b)
+// return duplicate objs from an intersection of two mdbObj lists.
+// List b is untouched but pA will contain the resulting intersection
+{
+struct mdbObj *mdbObj;
+struct hash *hashB = newHash(0);
+for (mdbObj = b; mdbObj != NULL; mdbObj = mdbObj->next)
+    {
+    hashAdd(hashB, mdbObj->obj, mdbObj);
+    }
+
+struct mdbObj *mdbObjsDropped = NULL;
+struct mdbObj *mdbObjsIntersecting = NULL;
+struct mdbObj *mdbObjs=*pA;
+while (mdbObjs)
+    {
+    mdbObj = slPopHead(&mdbObjs);
+    if (hashLookup(hashB, mdbObj->obj) != NULL)
+        slAddHead(&mdbObjsIntersecting,mdbObj);
+    else
+        slAddHead(&mdbObjsDropped,mdbObj);
+    }
+hashFree(&hashB);
+if (mdbObjsIntersecting)
+    slReverse(&mdbObjsIntersecting);
+*pA = mdbObjsIntersecting;
+if (mdbObjsDropped)
+    slReverse(&mdbObjsDropped);
+
+return mdbObjsDropped;
+}
+
 void mdbObjTransformToUpdate(struct mdbObj *mdbObjs, char *var, char *varType,char *val,boolean deleteThis)
 // Turns one or more mdbObjs into the stucture needed to add/update or delete.
 {
 struct mdbObj *mdbObj = NULL;
 for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next )
     {
     mdbObj->deleteThis = deleteThis;
 
     if(mdbObj->varHash != NULL)
         hashFree(&mdbObj->varHash);
 
     struct mdbVar *mdbVar = NULL;
     while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL)
         mdbVarFree(&mdbVar);
 
@@ -1932,30 +2050,43 @@
         AllocVar(newVar);
         if(mdbVar->var != NULL)
             newVar->var = cloneString(mdbVar->var);
         if(mdbVar->val != NULL)
             newVar->val = cloneString(mdbVar->val);
         newVar->varType    = mdbVar->varType;
         if(newVar->var != NULL && newVar->val != NULL)
             hashAdd(newObj->varHash, newVar->var, newVar); // pointer to struct to resolve type
         slAddHead(&(newObj->vars),newVar);
         }
     slReverse(&(newObj->vars));
     }
 return newObj;
 }
 
+struct slName *mdbObjToSlName(struct mdbObj *mdbObjs)
+// Creates slNames list of mdbObjs->obj.  mdbObjs remains untouched
+{
+struct slName *mdbNames = NULL;
+struct mdbObj *mdbObj = mdbObjs;
+for( ;mdbObj!=NULL; mdbObj=mdbObj->next)
+    {
+    slAddHead(&mdbNames,slNameNew(mdbObj->obj)); //allocates memory
+    }
+slReverse(&mdbNames);
+return mdbNames;
+}
+
 // --------------- Free at last ----------------
 void mdbObjsFree(struct mdbObj **mdbObjsPtr)
 // Frees one or more metadata objects and any contained mdbVars.  Will free any hashes as well.
 {
 
 if(mdbObjsPtr != NULL && *mdbObjsPtr != NULL)
     {
     // free all roots
     struct mdbObj *mdbObj = NULL;
     while((mdbObj = slPopHead(mdbObjsPtr)) != NULL)
         {
         // Free hash first (shared memory)
         hashFree(&(mdbObj->varHash));
 
         // free all leaves
@@ -2062,63 +2193,142 @@
 return mdbObj;
 }
 
 const char *metadataFindValue(struct trackDb *tdb, char *var)
 // Finds the val associated with the var or retruns NULL
 {
 struct mdbObj *mdbObj = tdbExtrasGetOrDefault(tdb, MDB_OBJ_KEY,NULL);
 if(mdbObj == MDB_NOT_FOUND) // Note, only we if already looked for mdb (which requires db)
     mdbObj = metadataForTableFromTdb(tdb);
 if (mdbObj == NULL || mdbObj == METADATA_NOT_FOUND)
     return NULL;
 
 return mdbObjFindValue(mdbObj,var);
 }
 
-struct slName *mdbObjSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files)
+
+struct mdbObj *mdbObjSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit)
 // Search the metaDb table for objs by var and val.  Can restrict by op "is", "like", "in" and accept (non-zero) limited string size
 // Search is via mysql, so it's case-insensitive.  Return is sorted on obj.
-{  // TODO: Change this to use normal mdb struct routines?
-if (!tables && !files)
-    errAbort("mdbObjSearch requests objects for neither tables or files.\n");
+{
+if (var == NULL && val == NULL)
+    errAbort("mdbObjSearch requests objects but provides no criteria.\n");
 
 char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first
 
+// Build a query string
 struct dyString *dyQuery = dyStringNew(512);
-dyStringPrintf(dyQuery,"select distinct obj from %s l1 where ",tableName);
-if (!tables || !files)
-    {
-    dyStringPrintf(dyQuery,"l1.var='objType' and l1.val='%s' ",tables?"table":"file");
-    dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and ",tableName);
-    }
+dyStringPrintf(dyQuery,"select l1.obj, l1.var, l1.varType, l1.val from %s l1",tableName);
 
+if (var != NULL || val != NULL)
+    dyStringPrintf(dyQuery," where exists (select l2.obj from %s l2 where l2.obj = l1.obj and ",tableName);
 if(var != NULL)
-    dyStringPrintf(dyQuery,"l2.var = '%s' and l2.val ", var);
+    dyStringPrintf(dyQuery,"l2.var = '%s'", var);
+if(var != NULL && val != NULL)
+    dyStringAppend(dyQuery," and ");
+if(val != NULL)
+    {
+    dyStringAppend(dyQuery,"l2.val ");
 if(sameString(op, "in"))
     dyStringPrintf(dyQuery,"in (%s)", val); // Note, must be a formatted string already: 'a','b','c' or  1,2,3
 else if(sameString(op, "contains") || sameString(op, "like"))
     dyStringPrintf(dyQuery,"like '%%%s%%'", val);
-else if (limit > 0 && strlen(val) == limit)
-    dyStringPrintf(dyQuery,"like '%s%%'", val);
+    else if (limit > 0 && strlen(val) != limit)
+        dyStringPrintf(dyQuery,"like '%.*s%%'", limit, val);
 else
     dyStringPrintf(dyQuery,"= '%s'", val);
-
-if (!tables || !files)
+    }
     dyStringAppendC(dyQuery,')');
 dyStringAppend(dyQuery," order by obj");
 
-return sqlQuickList(conn, dyStringCannibalize(&dyQuery));
+struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dyQuery));
+verbose(3, "rows (vars) returned: %d\n",slCount(mdb));
+struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE);
+
+return mdbObjs;
+}
+
+struct mdbObj *mdbObjRepeatedSearch(struct sqlConnection *conn,struct slPair *varValPairs,boolean tables,boolean files)
+// Search the metaDb table for objs by var,val pairs.  Uses mdbCvSearchMethod() if available.
+// This method will use mdbObjsQueryByVars()
+{
+struct slPair *onePair;
+struct dyString *dyTerms = dyStringNew(256);
+// Build list of terms as "var1=val1 var2=val2a,val2b,val2c var3=%val3%"
+for(onePair = varValPairs; onePair != NULL; onePair = onePair->next)
+    {
+    enum mdbCvSearchable searchBy = mdbCvSearchMethod(onePair->name);
+    // If select is by free text then like
+    if (searchBy == cvsSearchByMultiSelect)
+        {
+        // TO BE IMPLEMENTED
+        warn("mdb search by multi-select is not yet implemented.");
+        // The mdbVal[1] will hve to be filled cartOptionalSlNameList(cart,???)
+        struct slName *choices = (struct slName *)onePair->val;
+        if (slCount(choices) == 1)
+            dyStringPrintf(dyTerms,"%s=%s ",onePair->name,choices->name);
+        else if(choices != NULL)
+            {
+            // Then slNames will need to be assembled into a string in the form of a,b,c
+            dyStringPrintf(dyTerms,"%s=%s",onePair->name,choices->name);
+            struct slName *choice = choices->next;
+            for(;choice!=NULL;choice=choice->next)
+                dyStringPrintf(dyTerms,",%s",choice->name);
+            dyStringAppendC(dyTerms,' ');
+            }
+        }
+    else if (searchBy == cvsSearchBySingleSelect)
+        dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val);
+    else if (searchBy == cvsSearchByFreeText)
+        dyStringPrintf(dyTerms,"%s=%%%s%% ",onePair->name,(char *)onePair->val);
+    else if (searchBy == cvsSearchByDateRange || searchBy == cvsSearchByIntegerRange)
+        {
+        // TO BE IMPLEMENTED
+        // Requires new mdbObjSearch API and more than one (char *)onePair->val
+        warn("mdb search by date is not yet implemented.");
+        }
+    }
+// Be sure to include table of file in selections
+if (tables)
+    dyStringAppend(dyTerms,"tableName=? ");
+if (files)
+    dyStringAppend(dyTerms,"fileName=? ");
+
+// Build the mdbByVals struct and then select all mdbObjs in one query
+struct mdbByVar *mdbByVars = mdbByVarsLineParse(dyStringContents(dyTerms));
+dyStringClear(dyTerms);
+struct mdbObj *mdbObjs = mdbObjsQueryByVars(conn,NULL,mdbByVars); // Uses master table metaDb not sandbox versions
+
+return mdbObjs;
+}
+
+struct slName *mdbObjNameSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files)
+// Search the metaDb table for objs by var and val.  Can restrict by op "is", "like", "in" and accept (non-zero) limited string size
+// Search is via mysql, so it's case-insensitive.  Return is sorted on obj.
+{  // Note: This proves faster than getting mdbObjs then converting to slNames
+struct mdbObj *mdbObjs = mdbObjSearch(conn,var,val,op,limit);
+
+// May only be interested in tables or files:
+if (tables || files)
+    {
+    struct mdbObj *mdbObjsDropped = mdbObjsFilterTablesOrFiles(&mdbObjs,tables,files);
+    mdbObjsFree(&mdbObjsDropped);
+    }
+
+struct slName *mdbNames = mdbObjToSlName(mdbObjs);
+mdbObjsFree(&mdbObjs);
+return mdbNames;
 }
 
 struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files)
 // Search the metaDb table for vals by var.  Can impose (non-zero) limit on returned string size of val
 // Search is via mysql, so it's case-insensitive.  Return is sorted on val.
 {  // TODO: Change this to use normal mdb struct routines?
 struct slName *retVal;
 
 if (!tables && !files)
     errAbort("mdbValSearch requests values for neither table nor file objects.\n");
 
 char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first
 
 struct dyString *dyQuery = dyStringNew(512);
 if (limit > 0)