f54f526071669fd7d44e52871eb7ba48ebbaa7df tdreszer Fri Jul 2 13:23:49 2010 -0700 Rearranged code and comments as per Jims suggestion. Made selection by var=val pairs more full proof diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index ffc8c7e..b122e6f 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -665,42 +665,62 @@ for(thisWord=0;thisWord<count;thisWord++) { if(strchr(words[thisWord], '=') == NULL) - { errAbort("Expected 'var=val' but found '%s'. This is not properly formatted metadata:\n\t%s\n",words[thisWord],line); - //mdbObjsFree(&mdbObj); - //return NULL; - } + + // Set up var struct from 1st half of pair AllocVar(rootVar); rootVar->var = cloneNextWordByDelimiter(&(words[thisWord]),'='); rootVar->notEqual = (rootVar->var[strlen(rootVar->var)-1] == '!'); // requested not equal if(rootVar->notEqual) rootVar->var[strlen(rootVar->var)-1] = 0; - char *val = cloneString(words[thisWord]); - if(sameWord(val,"?")) // "var=?" or "var=" will query by var name only - freez(&val); + char *val = NULL; + if (words[thisWord][0] != '\0' && words[thisWord][0] != '?') // "var=?" or "var=" will query by var name only + val = cloneString(words[thisWord]); + // Make sure this isn't a repeat struct mdbByVar *oldVar = (struct mdbByVar *)hashFindVal(varHash, rootVar->var); - if(oldVar) - { // FIXME: Could build this for 'or' queries! - verbose(1, "The same variable appears twice: %s=%s and %s=%s. Ignoring second value.\n", + if (oldVar && (oldVar->notEqual == rootVar->notEqual)) + { // This is very powerful: "cell=GM% cell!=GM12878" + if (val != NULL) + { + verbose(2, "The same variable appears twice: %s=%s and %s=%s. Adding second value.\n", oldVar->var,oldVar->vals->val,rootVar->var,val); - freeMem(rootVar->var); - freeMem(rootVar); - freeMem(val); + AllocVar(limbVal); + limbVal->val = val; + slAddTail(&oldVar->vals,limbVal); + } + mdbByVarsFree(&rootVar); + continue; + } + + // Fill in the val(s) from second half of pair + if (val != NULL) + { + // handle comma separated list of vals (if unquoted) + if (val[0] != '\'' && val[0] != '"' && strchr(val,',') != NULL) + { + char * aVal = NULL; + while((aVal = cloneNextWordByDelimiter(&val,',')) != NULL) + { + AllocVar(limbVal); + limbVal->val = aVal; + slAddTail(&rootVar->vals,limbVal); + } } else { AllocVar(limbVal); limbVal->val = val; rootVar->vals = limbVal; + } + } hashAdd(varHash, rootVar->var, rootVar); slAddHead(&mdbByVars,rootVar); } - } freeMem(words); slReverse(&mdbByVars); verbose(3, "mdbByVarsLineParse() parsed:%d first: %s%s='%s'.\n", - slCount(mdbByVars->vals),mdbByVars->var,(mdbByVars->notEqual?"!":""),mdbByVars->vals->val); + slCount(mdbByVars),mdbByVars->var,(mdbByVars->notEqual?"!":""),(mdbByVars->vals?mdbByVars->vals->val:"")); return mdbByVars; } @@ -1233,7 +1253,22 @@ // Query the metadata table by one or more var=val pairs to find the distinct set of objs that satisfy ALL conditions. // Returns new mdbObj struct fully populated and sorted in obj,var order. { -// select obj,var,val where (var= [and val in (val1,val2)]) or (var= [and val in (val1,val2)]) order by obj,var +// MOST POPULAR WAY TO QUERY MDB. Building example queries like: +// "cell=GM12878" or "cell!=GM12878" +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val = 'GM12878') ORDER BY T1.obj, T1.var; +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val != 'GM12878') ORDER BY T1.obj, T1.var; +// "cell=GM%" or "cell!=GM%" +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val LIKE 'GM%') ORDER BY T1.obj, T1.var; +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val NOT LIKE 'GM%') ORDER BY T1.obj, T1.var; +// "cell=" or "cell!=" +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell') ORDER BY T1.obj, T1.var; +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE NOT EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell') ORDER BY T1.obj, T1.var; +// "cell=GM12878,K562" or "cell!=GM12878,K562" +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val IN ('GM12878','K562')) ORDER BY T1.obj, T1.var; +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val NOT IN ('K562','GM12878')) ORDER BY T1.obj, T1.var; +// "cell=GM% cell!=GM12878" (very powerful) +// SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val LIKE 'GM%') +// AND EXISTS (SELECT T3.obj FROM metaDb T3 WHERE T3.obj = T1.obj AND T3.var = 'cell' AND T3.val != 'GM12878') ORDER BY T1.obj, T1.var; if(table == NULL) table = MDB_DEFAULT_NAME; @@ -1248,8 +1283,8 @@ int tix; for(rootVar=mdbByVars,tix=2;rootVar!=NULL;rootVar=rootVar->next,tix++) { -/////////////// - boolean hasVal = (rootVar->vals != NULL && rootVar->vals->val != NULL && strlen(rootVar->vals->val) > 0); + boolean hasVal = (rootVar->vals != NULL); + //boolean hasVal = (rootVar->vals != NULL && rootVar->vals->val != NULL && strlen(rootVar->vals->val) > 0); if(!gotVar) { dyStringPrintf(dy, " WHERE "); @@ -1270,21 +1305,6 @@ dyStringPrintf(dy, "%s '%s'", (strchr(rootVar->var,'%')?"LIKE":"="), rootVar->var); -/////////////// -// if(!gotVar) -// { -// dyStringPrintf(dy, " where t1.obj in "); -// gotVar=TRUE; -// } -// else -// dyStringPrintf(dy, " AND t1.obj in "); -// dyStringPrintf(dy, "(select t%d.obj from %s t%d where t%d.obj = t1.obj and t%d.var ",tix,table,tix,tix,tix); -// -// if(rootVar->notEqual && rootVar->vals == NULL) -// dyStringPrintf(dy, "%s",strchr(rootVar->var,'%')?"NOT ":"!"); -// -// dyStringPrintf(dy, "%s '%s'", -// (strchr(rootVar->var,'%')?"like":"="), rootVar->var); struct mdbLimbVal *limbVal; boolean multiVals = FALSE; @@ -1297,7 +1317,7 @@ { dyStringPrintf(dy, " AND T%d.val ",tix); if(rootVar->notEqual) - dyStringPrintf(dy, "%s",strchr(limbVal->val,'%')?"NOT ":"!"); + dyStringPrintf(dy, "%s",(strchr(limbVal->val,'%') || limbVal->next)?"NOT ":"!"); if(limbVal->next == NULL) // only one val { dyStringPrintf(dy, "%s '%s'", @@ -1505,176 +1525,6 @@ return count; } -void mdbObjPrintUpdateLines(struct mdbObj **mdbObjs,char *dbToUpdate,char *tableToUpdate, char *varsToSelect,char *varsToSet) -// prints mdbUpdate lines to allow taking vars from one db to another (sorts mdbObjs so pass pointer) -{ -if(dbToUpdate == NULL || tableToUpdate == NULL || varsToSelect == NULL || varsToSet == NULL) - errAbort("mdbObjPrintUpdateLines is missing important parameter.\n"); - -int selCount = 0; -char **selectVars = NULL; -if(differentWord(varsToSelect,"obj")) - { - // Sort objs to avoid duplicate mdbUpdate statements - mdbObjsSortOnVars(mdbObjs, varsToSelect); - - // Parse list of selcting vars (could be simply expId or expId,replicate,view) - selCount = chopByChar(varsToSelect,',',NULL,0); - if(selCount <= 0) - errAbort("mdbObjPrintUpdateLines is missing experiment defining variables.\n"); - selectVars = needMem(sizeof(char *) * selCount); - selCount = chopByChar(varsToSelect,',',selectVars,selCount); - } -// Parse list of vars to update -int updCount = chopByChar(varsToSet,',',NULL,0); -if(updCount <= 0) - errAbort("mdbObjPrintUpdateLines is missing variables to set.\n"); -char **updateVars = needMem(sizeof(char *) * updCount); -updCount = chopByChar(varsToSet,',',updateVars,updCount); -int ix=0; -boolean updExpId = (updCount == 1 && startsWithWordByDelimiter("expId",'=',updateVars[0])); -int startingId=0; -if(updExpId) - { - startingId = sqlSigned(skipBeyondDelimit(updateVars[0],'=')); - updateVars[0][strlen("expId")] = '\0'; - } - -struct mdbObj *mdbObj = NULL; -struct dyString *thisSelection = newDyString(256); -struct dyString *lastSelection = newDyString(256); -for(mdbObj=*mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) - { - if(mdbObj->obj == NULL || mdbObj->deleteThis) - continue; - - // Build this selection string - dyStringClear(thisSelection); - if(sameWord(varsToSelect,"obj")) - { - dyStringPrintf(thisSelection,"-obj=%s",mdbObj->obj); - } - else - { - dyStringPrintf(thisSelection,"-vars=\""); - for(ix = 0;ix < selCount; ix++) - { - char *val = mdbObjFindValue(mdbObj,selectVars[ix]); - if(val != NULL) // TODO what to do for NULLS? - { - if(strchr(val, ' ') != NULL) // Has blanks - dyStringPrintf(thisSelection,"%s='%s' ",selectVars[ix],val);// FIXME: Need to make single quotes work since already within double quotes! - else - dyStringPrintf(thisSelection,"%s=%s ",selectVars[ix],val); - } - } - dyStringPrintf(thisSelection,"\""); - } - - // Don't bother making another mdpUpdate line if selection is the same. - if(sameString(dyStringContents(lastSelection),dyStringContents(thisSelection))) - continue; - dyStringClear(lastSelection); - dyStringAppend(lastSelection,dyStringContents(thisSelection)); - - printf("mdbUpdate %s table=%s %s",dbToUpdate,tableToUpdate,dyStringContents(thisSelection)); - - // Now look up the value of each var to update - printf(" -setVars=\""); - for(ix = 0;ix < updCount; ix++) - { - if(updExpId) - printf("expId=%u",startingId++);// FIXME: Need to make single quotes work since already within double quotes! - else - { - char *val = mdbObjFindValue(mdbObj,updateVars[ix]); - if(val != NULL) // What to do for NULLS? Ignore - { - printf("%s=",updateVars[ix]); - if(strchr(val, ' ') != NULL) // Has blanks - printf("'%s' ",val);// FIXME: Need to make single quotes work since already within double quotes! - else - printf("%s ",val); - } - } - } - printf("\" -test\n"); // Always test first - } -dyStringFree(&thisSelection); -dyStringFree(&lastSelection); -} - -void mdbObjPrintInsertToExperimentsTable(struct mdbObj **mdbObjs,char *expTableName, char *expDefiningVars) -// prints insert statments for the experiments taable to backfile experiments submitted before the experiments table existed -{ -if(expTableName == NULL || expDefiningVars == NULL) - errAbort("mdbObjPrintInsertToExpTbl is missing important parameter.\n"); - -int varCount = 0; -char **expVars = NULL; -if(sameWord(expDefiningVars,"obj")) - errAbort("mdbObjPrintInsertToExpTbl 'obj' is an invalid experiment defining variable.\n"); - -// Sort objs to avoid duplicate mdbUpdate statements -mdbObjsSortOnVars(mdbObjs, expDefiningVars); - -// Parse list of selcting vars (could be simply expId or expId,replicate,view) -varCount = chopByChar(expDefiningVars,',',NULL,0); -if(varCount <= 0) - errAbort("mdbObjPrintInsertToExpTbl is missing experiment defining variables.\n"); -expVars = needMem(sizeof(char *) * varCount); -varCount = chopByChar(expDefiningVars,',',expVars,varCount); -int ix=0; - -struct mdbObj *mdbObj = NULL; -struct dyString *varNames = newDyString(256); -struct dyString *varVals = newDyString(256); -struct dyString *lastVals = newDyString(256); -for(mdbObj=*mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) - { - if(mdbObj->obj == NULL || mdbObj->deleteThis) - continue; - - // Build this selection string - dyStringClear(varNames); - dyStringClear(varVals); - - boolean first=TRUE; - // "insert into expTable (cell,antibody) values ('GM12878','CTCF'); - for(ix = 0;ix < varCount; ix++) - { - char *val = mdbObjFindValue(mdbObj,expVars[ix]); - if(val != NULL) // TODO what to do for NULLS? - { - if(first) - first=FALSE; - else - { - dyStringPrintf(varNames,","); - dyStringPrintf(varVals,","); - } - dyStringPrintf(varNames,"%s", expVars[ix]); - if(countLeadingDigits(val) == strlen(val)) - dyStringPrintf(varVals,"%s",val); - else - dyStringPrintf(varVals,"'%s'",val); - } - } - - // Don't bother making another mdpUpdate line if selection is the same. - if(sameString(dyStringContents(lastVals),dyStringContents(varVals))) - continue; - dyStringClear(lastVals); - dyStringAppend(lastVals,dyStringContents(varVals)); - - printf("INSERT INTO %s (%s) VALUES (%s);\n",expTableName,dyStringContents(varNames),dyStringContents(varVals)); - - } -dyStringFree(&varNames); -dyStringFree(&varVals); -dyStringFree(&lastVals); -} - // ----------------- Utilities ----------------- char *mdbObjFindValue(struct mdbObj *mdbObj, char *var)