9b16243c795990dca302c0f3307af02c20678c37 tdreszer Thu Aug 8 15:33:00 2013 -0700 Further changes to streamline queries due to mysql 5.6 inefficiencies. Redmine #11155. This has been tested on hgwdev-tdreszer.sdsc.edu browser, which is 5.6. However, RR way still be a problem, given cache interference. diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 47bc910..b1f2892 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -3452,98 +3452,132 @@ char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first char letter = 'A'; struct dyString *dyQuery = dyStringNew(512); if (limit > 0) sqlDyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit); else sqlDyStringPrintf(dyQuery,"select distinct %c.val",letter); sqlDyStringPrintf(dyQuery," from %s %c",tableName,letter); mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName); // Need to append 'where' AFTER qualifying joins! -sqlDyStringPrintf(dyQuery, " where %c.var='%s' order by %c.val", letter,var,letter); +sqlDyStringPrintf(dyQuery, " where %c.var='%s'", letter,var); retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery)); slNameSortCase(&retVal); return retVal; } struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags, boolean hasTableName, boolean hasFileName) // Search the metaDb table for vals by var and returns val (as pair->name) // and controlled vocabulary (cv) label (if it exists) (as pair->val). // Can impose (non-zero) limit on returned string size of name. // Searchable vars are only for table or file objects. // Further restrict to vars associated with tableName, fileName or both. // Return is case insensitive sorted on label (cv label or else val). // If requested, return cv tag instead of mdb val. -{ // TODO: Change this to use normal mdb struct routines? +{ if (!hasTableName && !hasFileName) errAbort("mdbValLabelSearch requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first +struct hash *varHash = (struct hash *)cvTermHash(var); +boolean isCvDefined = cvTermIsCvDefined(var); -char letter = 'A'; +struct slName *vals = NULL; struct dyString *dyQuery = dyStringNew(512); -if (limit > 0) - sqlDyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit); -else - sqlDyStringPrintf(dyQuery,"select distinct %c.val",letter); -sqlDyStringPrintf(dyQuery," from %s %c",tableName,letter); +// If term is cv defined, no need to query for a list +if (isCvDefined) + { + assert(varHash != NULL); + struct hashCookie varCookie = hashFirst(varHash); + char *val; + while ((val = hashNextName(&varCookie)) != NULL) + slNameAddHead(&vals,val); + } +else + { + // simple query of vals which doesn't enforce table/file restrictions + sqlDyStringPrintf(dyQuery,"select val from %s where var='%s'",tableName,var); + vals = sqlQuickList(conn, dyStringContents(dyQuery)); + } +slNameSort(&vals); +// Will filter results to enforce table/file restrictions +// New mysql 5.6 is inefficient if the filtering is applied to the query above +char letter = 'A'; +dyStringClear(dyQuery); +sqlDyStringPrintf(dyQuery,"select 1 from %s %c",tableName,letter); mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName,letter, hasTableName, hasFileName); - -// Need to append 'where' AFTER qualifying joins! -sqlDyStringPrintf(dyQuery, " where %c.var='%s'",letter,var); -//warn("%s",dyStringContents(dyQuery)); - -struct hash *varHash = (struct hash *)cvTermHash(var); +sqlDyStringPrintf(dyQuery, " where %c.var='%s' and %c.val = ",letter,var,letter); +int queryLengthBeforeVal = dyStringLen(dyQuery); struct slPair *pairs = NULL; -struct sqlResult *sr = sqlGetResult(conn, dyStringContents(dyQuery)); -dyStringFree(&dyQuery); -char **row; -while ((row = sqlNextRow(sr)) != NULL) +char *lastVal = ""; +struct slName *oneVal = vals; +for ( ; oneVal != NULL; oneVal = oneVal->next) { - char *val = row[0]; + char *val = oneVal->name; + if (limit > 0) + { + if (sameStringN(val,lastVal,limit)) + continue; + } + else if (!isCvDefined && sameString(val,lastVal))// query 'distinct' is slower in mysql 5.6 + continue; + lastVal = val; + + // Filter to enforce file/table restrictions + dyStringResize(dyQuery,queryLengthBeforeVal); + sqlDyStringPrintf(dyQuery, "'%s' limit 1", val); + if (sqlQuickNum(conn, dyStringContents(dyQuery)) != 1) + continue; + char *label = NULL; if (varHash != NULL) { struct hash *valHash = hashFindVal(varHash,val); if (valHash != NULL) { - label = cloneString(hashOptionalVal(valHash,CV_LABEL,row[0])); + label = cloneString(hashOptionalVal(valHash,CV_LABEL,val)); if (tags) { char *tag = hashFindVal(valHash,CV_TAG); if (tag != NULL) val = tag; } } } if (label == NULL) - label = cloneString(row[0]); + { + if (limit > 0) + label = cloneStringZ(val,limit); + else + label = cloneString(val); + } label = strSwapChar(label,'_',' '); // vestigial _ meaning space slPairAdd(&pairs,val,label); } -sqlFreeResult(&sr); +slNameFreeList(&vals); +dyStringFree(&dyQuery); if (slCount(pairs) > 0) { // should have a list sorted on the label enum cvDataType eCvDataType = cvDataType(var); if (eCvDataType == cvInteger) slPairValAtoiSort(&pairs); else slPairValSortCase(&pairs); } return pairs; } struct slPair *mdbVarsSearchable(struct sqlConnection *conn, boolean hasTableName, boolean hasFileName) // returns a white list of mdb vars that actually exist in the current DB. @@ -3552,48 +3586,38 @@ { if (!hasTableName && !hasFileName) errAbort("mdbVarsSearchable requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first // Start with what cv says is searchable struct slPair *cvApproved = cvWhiteList(TRUE,FALSE); assert(cvApproved != NULL); // Now construct single select to find all vars in the mdb // (belonging to table and/or file objs) // Note the second selected column which is only to there to make sqlQuckHash happy char letter = 'A'; struct dyString *dyQuery = dyStringNew(256); -sqlDyStringPrintf(dyQuery, "select distinct %c.var,'.' from %s %c",letter,tableName,letter); - +sqlDyStringPrintf(dyQuery, "select 1 from %s %c",tableName,letter); mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName); // Need to append 'where' AFTER qualifying joins! -// On the RR it is more efficient when selecting only the whitelisted vars. -struct slPair *oneVar = cvApproved; -sqlDyStringPrintf(dyQuery, " where %c.var in (", letter); -for ( ; oneVar != NULL; oneVar = oneVar->next) - sqlDyStringPrintf(dyQuery, "'%s'%c", oneVar->name,(oneVar->next != NULL? ',':')')); -//sqlDyStringPrintf(dyQuery, " group by %c.var",letter); // redundant, less efficient? -//warn("%s",dyStringContents(dyQuery)); - -// Which vars are in the mdb at this time? -struct hash *inTable = sqlQuickHash(conn,dyStringContents(dyQuery)); // could use sqlQuickList -dyStringFree(&dyQuery); -if (inTable == NULL) - return NULL; +sqlDyStringPrintf(dyQuery, " where %c.var = ", letter); +int queryLengthBeforeVar = dyStringLen(dyQuery); // Filter cv approved by eliminating those vars not currently in the mdb. struct slPair *relevant = NULL; while (cvApproved != NULL) { struct slPair *oneVar = slPopHead(&cvApproved); - if (hashFindVal(inTable, oneVar->name) != NULL) + dyStringResize(dyQuery,queryLengthBeforeVar); + sqlDyStringPrintf(dyQuery, "'%s' limit 1", oneVar->name); + if (sqlQuickNum(conn, dyStringContents(dyQuery)) == 1) slAddHead(&relevant, oneVar); else slPairFree(&oneVar); } -hashFree(&inTable); +dyStringFree(&dyQuery); slReverse(&relevant); return relevant; }