9b16243c795990dca302c0f3307af02c20678c37
tdreszer
  Thu Aug 8 15:33:00 2013 -0700
Further changes to streamline queries due to mysql 5.6 inefficiencies. Redmine #11155.  This has been tested on hgwdev-tdreszer.sdsc.edu browser, which is 5.6.  However, RR way still be a problem, given cache interference.
diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c
index 47bc910..b1f2892 100644
--- src/hg/lib/mdb.c
+++ src/hg/lib/mdb.c
@@ -3452,98 +3452,132 @@
 
 char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first
 
 char letter = 'A';
 struct dyString *dyQuery = dyStringNew(512);
 if (limit > 0)
     sqlDyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit);
 else
     sqlDyStringPrintf(dyQuery,"select distinct %c.val",letter);
 
 sqlDyStringPrintf(dyQuery," from %s %c",tableName,letter);
 
 mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName);
 
 // Need to append 'where' AFTER qualifying joins!
-sqlDyStringPrintf(dyQuery, " where %c.var='%s' order by %c.val", letter,var,letter);
+sqlDyStringPrintf(dyQuery, " where %c.var='%s'", letter,var);
 
 retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery));
 slNameSortCase(&retVal);
 return retVal;
 }
 
 struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags,
                                  boolean hasTableName, boolean hasFileName)
 // Search the metaDb table for vals by var and returns val (as pair->name)
 // and controlled vocabulary (cv) label (if it exists) (as pair->val).
 // Can impose (non-zero) limit on returned string size of name.
 // Searchable vars are only for table or file objects.
 // Further restrict to vars associated with tableName, fileName or both.
 // Return is case insensitive sorted on label (cv label or else val).
 // If requested, return cv tag instead of mdb val.
-{  // TODO: Change this to use normal mdb struct routines?
+{
 
 if (!hasTableName && !hasFileName)
     errAbort("mdbValLabelSearch requests vals associated with neither table nor files.\n");
 
 char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first
+struct hash *varHash = (struct hash *)cvTermHash(var);
+boolean isCvDefined = cvTermIsCvDefined(var);
 
-char letter = 'A';
+struct slName *vals = NULL;
 struct dyString *dyQuery = dyStringNew(512);
-if (limit > 0)
-    sqlDyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit);
-else
-    sqlDyStringPrintf(dyQuery,"select distinct %c.val",letter);
 
-sqlDyStringPrintf(dyQuery," from %s %c",tableName,letter);
+// If term is cv defined, no need to query for a list
+if (isCvDefined)
+    {
+    assert(varHash != NULL);
+    struct hashCookie varCookie = hashFirst(varHash);
+    char *val;
+    while ((val = hashNextName(&varCookie)) != NULL)
+        slNameAddHead(&vals,val);
+    }
+else
+    {
+    // simple query of vals which doesn't enforce table/file restrictions
+    sqlDyStringPrintf(dyQuery,"select val from %s where var='%s'",tableName,var);
+    vals = sqlQuickList(conn, dyStringContents(dyQuery));
+    }
+slNameSort(&vals);
 
+// Will filter results to enforce table/file restrictions
+// New mysql 5.6 is inefficient if the filtering is applied to the query above
+char letter = 'A';
+dyStringClear(dyQuery);
+sqlDyStringPrintf(dyQuery,"select 1 from %s %c",tableName,letter);
 mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName,letter, hasTableName, hasFileName);
-
-// Need to append 'where' AFTER qualifying joins!
-sqlDyStringPrintf(dyQuery, " where %c.var='%s'",letter,var);
-//warn("%s",dyStringContents(dyQuery));
-
-struct hash *varHash = (struct hash *)cvTermHash(var);
+sqlDyStringPrintf(dyQuery, " where %c.var='%s' and %c.val = ",letter,var,letter);
+int queryLengthBeforeVal = dyStringLen(dyQuery);
 
 struct slPair *pairs = NULL;
-struct sqlResult *sr = sqlGetResult(conn, dyStringContents(dyQuery));
-dyStringFree(&dyQuery);
-char **row;
-while ((row = sqlNextRow(sr)) != NULL)
+char *lastVal = "";
+struct slName *oneVal = vals;
+for ( ; oneVal != NULL; oneVal = oneVal->next)
     {
-    char *val = row[0];
+    char *val = oneVal->name;
+    if (limit > 0)
+        {
+        if (sameStringN(val,lastVal,limit))
+            continue;
+        }
+    else if (!isCvDefined && sameString(val,lastVal))// query 'distinct' is slower in mysql 5.6
+        continue;
+    lastVal = val;
+
+    // Filter to enforce file/table restrictions
+    dyStringResize(dyQuery,queryLengthBeforeVal);
+    sqlDyStringPrintf(dyQuery, "'%s' limit 1", val);
+    if (sqlQuickNum(conn, dyStringContents(dyQuery)) != 1)
+        continue;
+
     char *label = NULL;
     if (varHash != NULL)
         {
         struct hash *valHash = hashFindVal(varHash,val);
         if (valHash != NULL)
             {
-            label = cloneString(hashOptionalVal(valHash,CV_LABEL,row[0]));
+            label = cloneString(hashOptionalVal(valHash,CV_LABEL,val));
             if (tags)
                 {
                 char *tag = hashFindVal(valHash,CV_TAG);
                 if (tag != NULL)
                     val = tag;
                 }
             }
         }
     if (label == NULL)
-        label = cloneString(row[0]);
+        {
+        if (limit > 0)
+            label = cloneStringZ(val,limit);
+        else
+            label = cloneString(val);
+        }
     label = strSwapChar(label,'_',' ');  // vestigial _ meaning space
     slPairAdd(&pairs,val,label);
     }
-sqlFreeResult(&sr);
+slNameFreeList(&vals);
+dyStringFree(&dyQuery);
 if (slCount(pairs) > 0)
     {
     // should have a list sorted on the label
     enum cvDataType eCvDataType = cvDataType(var);
     if (eCvDataType == cvInteger)
         slPairValAtoiSort(&pairs);
     else
         slPairValSortCase(&pairs);
     }
 return pairs;
 }
 
 struct slPair *mdbVarsSearchable(struct sqlConnection *conn, boolean hasTableName,
                                  boolean hasFileName)
 // returns a white list of mdb vars that actually exist in the current DB.
@@ -3552,48 +3586,38 @@
 {
 if (!hasTableName && !hasFileName)
     errAbort("mdbVarsSearchable requests vals associated with neither table nor files.\n");
 
 char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first
 
 // Start with what cv says is searchable
 struct slPair *cvApproved = cvWhiteList(TRUE,FALSE);
 assert(cvApproved != NULL);
 
 // Now construct single select to find all vars in the mdb
 // (belonging to table and/or file objs)
 // Note the second selected column which is only to there to make sqlQuckHash happy
 char letter = 'A';
 struct dyString *dyQuery = dyStringNew(256);
-sqlDyStringPrintf(dyQuery, "select distinct %c.var,'.' from %s %c",letter,tableName,letter);
-
+sqlDyStringPrintf(dyQuery, "select 1 from %s %c",tableName,letter);
 mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName);
 
 // Need to append 'where' AFTER qualifying joins!
-// On the RR it is more efficient when selecting only the whitelisted vars.
-struct slPair *oneVar = cvApproved;
-sqlDyStringPrintf(dyQuery, " where %c.var in (", letter);
-for ( ; oneVar != NULL; oneVar = oneVar->next)
-    sqlDyStringPrintf(dyQuery, "'%s'%c", oneVar->name,(oneVar->next != NULL? ',':')'));
-//sqlDyStringPrintf(dyQuery, " group by %c.var",letter); // redundant, less efficient?
-//warn("%s",dyStringContents(dyQuery));
-
-// Which vars are in the mdb at this time?
-struct hash *inTable = sqlQuickHash(conn,dyStringContents(dyQuery)); // could use sqlQuickList
-dyStringFree(&dyQuery);
-if (inTable == NULL)
-    return NULL;
+sqlDyStringPrintf(dyQuery, " where %c.var = ", letter);
+int queryLengthBeforeVar = dyStringLen(dyQuery);
 
 // Filter cv approved by eliminating those vars not currently in the mdb.
 struct slPair *relevant = NULL;
 while (cvApproved != NULL)
     {
     struct slPair *oneVar = slPopHead(&cvApproved);
-    if (hashFindVal(inTable, oneVar->name) != NULL)
+    dyStringResize(dyQuery,queryLengthBeforeVar);
+    sqlDyStringPrintf(dyQuery, "'%s' limit 1", oneVar->name);
+    if (sqlQuickNum(conn, dyStringContents(dyQuery)) == 1)
         slAddHead(&relevant, oneVar);
     else
         slPairFree(&oneVar);
     }
-hashFree(&inTable);
+dyStringFree(&dyQuery);
 slReverse(&relevant);
 return relevant;
 }