4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 250b3d3..dcd7f7a 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -1,3660 +1,3660 @@ /* mdb.c was originally generated by the autoSql program, which also * generated mdb.h and mdb.sql. This module links the database and * the RAM representation of objects. */ /* Copyright (C) 2014 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "hdb.h" #include "cv.h" #include "mdb.h" #include "encode/encodeExp.h" #include "trackHub.h" #include "hubConnect.h" void mdbStaticLoad(char **row, struct mdb *ret) /* Load a row from mdb table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->obj = row[0]; ret->var = row[1]; ret->val = row[2]; } struct mdb *mdbLoadByQuery(struct sqlConnection *conn, char *query) /* Load all mdb from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with mdbFreeList(). */ { struct mdb *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = mdbLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void mdbSaveToDb(struct sqlConnection *conn, struct mdb *el, char *tableName, int updateSize) /* Save mdb as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. Strings are automatically escaped to allow insertion into the database. */ { struct dyString *update = newDyString(updateSize); sqlDyStringPrintf(update, "insert into %s set obj='%s', var='%s', val='%s'", tableName, el->obj, el->var, el->val); sqlUpdate(conn, update->string); freeDyString(&update); } struct mdb *mdbLoad(char **row) /* Load a mdb from row fetched with select * from mdb * from database. Dispose of this with mdbFree(). */ { struct mdb *ret; AllocVar(ret); ret->obj = cloneString(row[0]); ret->var = cloneString(row[1]); ret->val = cloneString(row[2]); return ret; } struct mdb *mdbLoadAll(char *fileName) /* Load all mdb from a whitespace-separated file. * Dispose of this with mdbFreeList(). */ { struct mdb *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[4]; while (lineFileRow(lf, row)) { el = mdbLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct mdb *mdbLoadAllByChar(char *fileName, char chopper) /* Load all mdb from a chopper separated file. * Dispose of this with mdbFreeList(). */ { struct mdb *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[4]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = mdbLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct mdb *mdbCommaIn(char **pS, struct mdb *ret) /* Create a mdb out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new mdb */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->obj = sqlStringComma(&s); ret->var = sqlStringComma(&s); ret->val = sqlStringComma(&s); *pS = s; return ret; } void mdbFree(struct mdb **pEl) /* Free a single dynamically allocated mdb such as created * with mdbLoad(). */ { struct mdb *el; if ((el = *pEl) == NULL) return; freeMem(el->obj); freeMem(el->var); freeMem(el->val); freez(pEl); } void mdbFreeList(struct mdb **pList) /* Free a list of dynamically allocated mdb's */ { struct mdb *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; mdbFree(&el); } *pList = NULL; } void mdbOutput(struct mdb *el, FILE *f, char sep, char lastSep) /* Print out mdb. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->obj); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->var); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->val); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void mdbJsonOutput(struct mdb *el, FILE *f) /* Print out mdb in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"obj"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->obj); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"var"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->var); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"val"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->val); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ #include "ra.h" #include "hgConfig.h" #include "obscure.h" #define MDB_METADATA_KEY "metadata" #define MDB_METAOBJ_RAKEY "metaObject" #define MDB_METAVAR_RAKEY "metaVariable" // ------- Sorts for low level mdb structs ------- int mdbCmp(const void *va, const void *vb) // Compare mdb to sort on obj,var, case-sensitive. Needed to override ORDER_BY performance hit { const struct mdb *a = *((struct mdb **)va); const struct mdb *b = *((struct mdb **)vb); int ret = strcmp(a->obj, b->obj); if (ret == 0) ret = strcmp(a->var, b->var); return ret; } int mdbVarValCmp(const void *va, const void *vb) // Compare mdb to sort on var,val case-sensitive. Needed to override ORDER_BY performance hit { const struct mdb *a = *((struct mdb **)va); const struct mdb *b = *((struct mdb **)vb); int ret = strcmp(a->var, b->var); if (ret == 0) ret = strcmp(a->val, b->val); if (ret == 0) ret = strcmp(a->obj, b->obj); return ret; } // ------- (static) convert from autoSql ------- static void mdbLeafObjFree(struct mdbLeafObj **leafObjPtr) // Frees a single mdbVar struct { freeMem((*leafObjPtr)->obj); freez(leafObjPtr); } static void mdbLimbValFree(struct mdbLimbVal **limbValPtr) // Frees a single mdbVar struct { struct mdbLimbVal *limbVal = *limbValPtr; // Free hash first (shared memory) hashFree(&(limbVal->objHash)); struct mdbLeafObj *leafObj = NULL; while ((leafObj = slPopHead(&(limbVal->objs))) != NULL) mdbLeafObjFree(&leafObj); freeMem(limbVal->val); freez(limbValPtr); } static struct mdbVar *mdbVarNew(char *var, void *val) // Creates a new mdbVar and adds it onto the head of the list { struct mdbVar *mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(var); mdbVar->val = cloneString(val); return mdbVar; } static struct mdbVar *mdbVarAdd(struct mdbVar **pMdbVars, char *var, void *val) // Creates a new mdbVar and adds it onto the head of the list { struct mdbVar *mdbVar = mdbVarNew(var,val); slAddHead(pMdbVars, mdbVar); return mdbVar; } static struct mdbObj *mdbObjsLoadFromMemory(struct mdb **mdbPtr,boolean buildHashes) // Load all mdbObjs from in memory mdb struct, cannibalize strings. Expects sorted order. { struct mdbObj *mdbObj = NULL; struct mdbObj *mdbObjs = NULL; struct mdbVar *mdbVar; struct mdb *thisRow; while ((thisRow = slPopHead(mdbPtr)) != NULL) { if (mdbObj == NULL || differentString(thisRow->obj,mdbObj->obj) ) { // Finish last object before starting next! if (mdbObj!= NULL) slReverse(&(mdbObjs->vars)); // Start new object AllocVar(mdbObj); mdbObj->obj = thisRow->obj; if ( buildHashes ) mdbObj->varHash = hashNew(8); slAddHead(&mdbObjs,mdbObj); } else { freeMem(thisRow->obj); // Already got this from prev row } AllocVar(mdbVar); mdbVar->var = thisRow->var; mdbVar->val = thisRow->val; slAddHead(&(mdbObj->vars),mdbVar); if ( buildHashes ) hashAddUnique(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type freeMem(thisRow); } // Finish very last object if (mdbObjs && mdbObjs->vars) slReverse(&(mdbObjs->vars)); if (mdbObjs) slReverse(&mdbObjs); return mdbObjs; } static struct mdbByVar *mdbByVarsLoadFromMemory(struct mdb **mdbPtr,boolean buildHashes) // Load all mdbVars from in memorys mdb struct, cannibalize strings. Expects sorted order. { struct mdbByVar *rootVars = NULL; struct mdbByVar *rootVar = NULL; struct mdbLimbVal *limbVal = NULL; struct mdbLeafObj *leafObj; struct mdb *thisRow; while ((thisRow = slPopHead(mdbPtr)) != NULL) { // Start at root if (rootVar == NULL || differentString(thisRow->var,rootVar->var) ) { // Finish last var before starting next! if (rootVars && rootVars->vals && rootVars->vals->objs) slReverse(&(rootVars->vals->objs)); if (rootVars && rootVars->vals) slReverse(&(rootVars->vals)); // Start new var AllocVar(rootVar); limbVal = NULL; // Very important! rootVar->var = thisRow->var; if ( buildHashes ) rootVar->valHash = hashNew(8); slAddHead(&rootVars,rootVar); } else { freeMem(thisRow->var); // Already got this from prev row } // Continue with limb if (limbVal == NULL || differentString(thisRow->val,limbVal->val) ) { // Finish last val before starting next! if (limbVal != NULL && limbVal->objs != NULL) slReverse(&(limbVal->objs)); // Start new val AllocVar(limbVal); limbVal->val = thisRow->val; if ( buildHashes ) { // Pointer to struct to get to objHash hashAddUnique(rootVar->valHash, limbVal->val, limbVal); limbVal->objHash = hashNew(10); } slAddHead(&(rootVar->vals),limbVal); } else freeMem(thisRow->val); // Already got this from prev row // End with leaf AllocVar(leafObj); leafObj->obj = thisRow->obj; if ( buildHashes ) hashAddUnique(limbVal->objHash, leafObj->obj, leafObj);// Pointer to struct to resolve type slAddHead(&(limbVal->objs),leafObj); freeMem(thisRow); } // Finish very last object if (rootVars && rootVars->vals && rootVars->vals->objs) slReverse(&(rootVars->vals->objs)); if (rootVars && rootVars->vals) slReverse(&(rootVars->vals)); if (rootVars && rootVars->vals) slReverse(&rootVars); return rootVars; } static int mdbObjCRC(struct mdbObj *mdbObjs) // returns a summ of all individual CRC values of all metObj strings { int crc = 0; struct mdbObj *mdbObj = NULL; for (mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if (mdbObj->obj != NULL) crc += hashCrc(mdbObj->obj); struct mdbVar *mdbVar = NULL; for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (mdbVar->var != NULL) crc += hashCrc(mdbVar->var); if (mdbVar->val != NULL) crc += hashCrc(mdbVar->val); } } return crc; } // -------------- Sort primitives -------------- int mdbObjCmp(const void *va, const void *vb) // Compare mdbObj to sort on obj name, case-insensitive. { const struct mdbObj *a = *((struct mdbObj **)va); const struct mdbObj *b = *((struct mdbObj **)vb); return strcasecmp(a->obj, b->obj); } int mdbVarCmp(const void *va, const void *vb) // Compare mdbVar to sort on var name, case-insensitive. { const struct mdbVar *a = *((struct mdbVar **)va); const struct mdbVar *b = *((struct mdbVar **)vb); return strcasecmp(a->var, b->var); } // ------ Parsing lines ------ struct mdbObj *mdbObjAddVarPairs(struct mdbObj *oldObj,char *varPairs) // Parses line of var=val pairs adding to a mdbObj. Creates mdbObj if NULL { struct mdbObj *mdbObj = oldObj; struct mdbVar *mdbVar; char *cloneVars = cloneString(varPairs); // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneVars,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneVars,words,count); if (count < 1 || words[0] == NULL) { errAbort("This is not formatted var=val pairs:\n\t%s\n",varPairs); } verbose(3, "mdbObjAddVarPairs() word count:%d\n\t%s\n",count,varPairs); if (mdbObj == NULL) AllocVar(mdbObj); if (mdbObj->varHash == NULL) mdbObj->varHash = hashNew(8); int ix; for (ix = 0;ix<count;ix++) { if (*words[ix] == '#') break; AllocVar(mdbVar); if (strchr(words[ix], '=') == NULL) // treat this the same as "var=" { mdbVar->var = cloneString(words[ix]); mdbVar->val = NULL; } else { mdbVar->var = cloneNextWordByDelimiter(&(words[ix]),'='); mdbVar->val = cloneString(words[ix]); } verbose(3, "mdbObjAddVarPairs() var=val: %s=%s\n",mdbVar->var,mdbVar->val); struct mdbVar *oldVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, mdbVar->var); if (oldVar) { verbose(1,"The same variable appears twice: %s=%s and %s=%s. " "Ignoring second value.\n\t%s\n", oldVar->var,oldVar->val,mdbVar->var,mdbVar->val,varPairs); mdbVarFree(&mdbVar); } else { hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } } freeMem(words); freeMem(cloneVars); // Special for old style ENCODE metadata #define ENCODE_ALN "Alignments" #define ENCODE_RSIG "RawSignal" if (mdbObj->obj == NULL) { char * tableName = NULL; char * fileName = NULL; for (mdbVar = mdbObj->vars; mdbVar != NULL && (tableName == NULL || fileName == NULL); mdbVar = mdbVar->next) { if (sameString(mdbVar->var,MDB_VAR_TABLENAME)) tableName = mdbVar->val; else if (sameString(mdbVar->var,MDB_VAR_FILENAME)) fileName = mdbVar->val; } mdbVar = NULL; // assertably so, but this is conditioanally created below if (tableName != NULL) { verbose(3, "%s:%s\n",MDB_VAR_TABLENAME,tableName); if (fileName == NULL || startsWithWordByDelimiter(tableName,'.',fileName)) { mdbObj->obj = cloneString(tableName); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); mdbVar->val = cloneString(MDB_OBJ_TYPE_TABLE); } else if (stringIn(ENCODE_ALN,fileName) // Messier case where the file has "Alignment" && stringIn(ENCODE_RSIG,tableName)) // but the table has "RawSignal" { char *tmpFilName = cloneString(fileName); strSwapStrs(tmpFilName, strlen(tmpFilName),ENCODE_ALN, ENCODE_RSIG); if (startsWithWordByDelimiter(tableName,'.',tmpFilName)) { mdbObj->obj = cloneString(tableName); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); mdbVar->val = cloneString(MDB_OBJ_TYPE_TABLE); } freeMem(tmpFilName); } } else if (fileName != NULL) { verbose(3, "%s:%s\n",MDB_VAR_FILENAME,fileName); // NOTE: that the file object is the root of the name, // so both file.fastq.gz and file.fastq are same obj! mdbObj->obj = cloneFirstWordByDelimiter(fileName,'.'); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); mdbVar->val = cloneString(MDB_OBJ_TYPE_FILE); } if (mdbVar != NULL) // Just determined an objType { verbose(3, "mdbObjAddVarPairs() var=val: %s=%s\n",mdbVar->var,mdbVar->val); struct mdbVar *oldVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, mdbVar->var); if (oldVar) mdbVarFree(&mdbVar); else { hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } } } if (mdbObj->obj == NULL) // NOTE: Should this be a hard error! errAbort("No obj found. This is not properly formatted metadata:\n\t%s\n",varPairs); //slReverse(&(mdbObj->vars)); Could have added vars so sort instead slSort(&(mdbObj->vars),&mdbVarCmp); // Should be in determined order mdbVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, MDB_OBJ_TYPE); if (mdbVar == NULL) mdbVar = mdbObj->vars; verbose(3, "mdbObjAddVarPairs() obj=%s %s=%s\n", mdbObj->obj, mdbVar->var,mdbVar->val); return mdbObj; } struct mdbObj *metadataLineParse(char *line) /* Parses a single formatted metadata line into mdbObj for updates or queries. */ { char *fromTheTop = line; char*nibbledWord = cloneNextWordByDelimiter(&line,' '); if (nibbledWord == NULL || differentWord(nibbledWord,MDB_METADATA_KEY)) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); freeMem(nibbledWord); struct mdbObj *mdbObj = NULL; char*varPairs = line; nibbledWord = cloneNextWordByDelimiter(&line,' ');; if (nibbledWord == NULL) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); if (strchr(nibbledWord, '=') == NULL) // If this is not a var=val then it should be obj { AllocVar(mdbObj); mdbObj->obj = nibbledWord; verbose(3, "metadataLineParse() %s=%s\n",MDB_OBJ,mdbObj->obj); varPairs = line; while (strlen(line) > 0) { nibbledWord = cloneNextWordByDelimiter(&line,' ');; if (nibbledWord == NULL) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); if (*nibbledWord == '#' // IS comment || strchr(nibbledWord, '=') != NULL) // OR start of var=val pairs break; if (sameWord(nibbledWord,"delete")) mdbObj->deleteThis = TRUE; else errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); varPairs = line; freeMem(nibbledWord); } } if (varPairs != NULL && strlen(varPairs) > 0 && *varPairs != '#') mdbObj = mdbObjAddVarPairs(mdbObj,varPairs); else if (mdbObj->deleteThis == FALSE) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); return mdbObj; } struct mdbByVar *mdbByVarsLineParse(char *line) /* Parses a line of "var1=val1 var2=val2 into a mdbByVar object for queries. */ { int thisWord = 0; struct mdbByVar *mdbByVars = NULL; struct mdbByVar *rootVar = NULL; struct mdbLimbVal *limbVal = NULL; char *cloneLine = cloneString(line); // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneLine,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneLine,words,count); verbose(3, "mdbByVarsLineParse() word count:%d\n\t%s\n",count,line); // All words are expected to be var=val pairs! for (thisWord=0; thisWord<count; thisWord++) { if (strchr(words[thisWord], '=') == NULL) errAbort("Expected '%s=%s' but found '%s'. This is not properly formatted metadata:\n" "\t%s\n",MDB_VAR,MDB_VAL,words[thisWord],line); // Set up var struct from 1st half of pair // NOTE: Do not try to combine repeated vars because "fob=a fob=b" is 'AND' // while "fob=a,b" is 'OR'. // Does this make sense? Yes: select * ... where fob like 'Fr%' and fob != 'Frunk' AllocVar(rootVar); rootVar->var = cloneNextWordByDelimiter(&(words[thisWord]),'='); rootVar->notEqual = (rootVar->var[strlen(rootVar->var)-1] == '!'); // requested not equal if (rootVar->notEqual) rootVar->var[strlen(rootVar->var)-1] = 0; // Fill in the val(s) from second half of pair char *val = NULL; if (words[thisWord][0] != '\0' && words[thisWord][0] != '?') // "var=?" or "var=" will query by var name only val = cloneString(words[thisWord]); if (val != NULL) { // Strip any single or double quotes first. char *end = val + strlen(val) - 1; if ((*val == '"' && *end == '"') || (*val == '\'' && *end == '\'')) { *end = '\0'; val++; } // handle comma separated list of vals (if not framed with widcards) if (strchr(val,',') != NULL && (*val != '%' || *(val + strlen(val) - 1) != '%')) { char * aVal = NULL; while ((aVal = cloneNextWordByDelimiter(&val,',')) != NULL) { AllocVar(limbVal); limbVal->val = aVal; slAddTail(&rootVar->vals,limbVal); } } else { AllocVar(limbVal); limbVal->val = val; rootVar->vals = limbVal; } } slAddHead(&mdbByVars,rootVar); } freeMem(words); slReverse(&mdbByVars); verbose(3, "mdbByVarsLineParse() parsed:%d first: %s%s='%s'.\n", slCount(mdbByVars),mdbByVars->var,(mdbByVars->notEqual?"!":""), (mdbByVars->vals?mdbByVars->vals->val:"")); return mdbByVars; } // ------ Loading from args, hashes and tdb ------ struct mdbByVar*mdbByVarCreate(char *var, char *val) /* Creates a singular var=val pair struct for metadata queries. */ { struct mdbByVar *mdbByVar = NULL; if (var == NULL) errAbort("Need variable to create mdbByVar query object.\n"); AllocVar(mdbByVar); mdbByVar->var = cloneString(var); if (val != NULL) { struct mdbLimbVal * limbVal; AllocVar(limbVal); limbVal->val = cloneString(val); mdbByVar->vals = limbVal; // Only one } return mdbByVar; } boolean mdbByVarAppend(struct mdbByVar *mdbByVars,char *var,char *val,boolean notEqual) /* Adds a another var to a list of mdbByVar pairs to be used in metadata queries. */ { // Does var already exist in mdbByVars? struct mdbByVar *mdbByVar = mdbByVars; for (;mdbByVar!=NULL;mdbByVar=mdbByVar->next) { if (sameString(mdbByVar->var,var) && mdbByVar->notEqual == notEqual) { struct mdbLimbVal * limbVal = mdbByVar->vals; for (;limbVal!=NULL;limbVal=limbVal->next) { if (sameString(limbVal->val,val)) return FALSE; // Nothing to do as this var is already there. } struct mdbLimbVal * newLimbVal; AllocVar(newLimbVal); newLimbVal->val = cloneString(val); slAddTail(&(mdbByVar->vals),newLimbVal); return TRUE; } } // Not found so add it struct mdbByVar *newVar = mdbByVarCreate(var, val); newVar->notEqual = notEqual; slAddTail(&mdbByVars,newVar); // Add to tail to avoid changing passed in pointer return TRUE; } struct mdbObj *mdbObjCreate(char *obj,char *var, char *val) /* Creates a singular mdbObj query object based on obj and all other optional params. */ { struct mdbObj *mdbObj = NULL; if (obj == NULL) errAbort("Need obj to create mdbObj object.\n"); AllocVar(mdbObj); mdbObj->obj = cloneString(obj); if (var != NULL) { struct mdbVar * mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(var); if (val != NULL) mdbVar->val = cloneString(val); mdbObj->vars = mdbVar; // Only one } return mdbObj; } struct mdbObj *mdbObjNew(char *obj,struct mdbVar *mdbVars) // Returns a new mdbObj with whatever was passed in. // An mdbObj requires and obj, so if one is not supplied it will be "[unknown]" { struct mdbObj *mdbObj = NULL; if (obj == NULL) errAbort("Need obj to create mdbObj object.\n"); if (mdbVars == NULL) { AllocVar(mdbObj); mdbObj->obj = cloneString(obj); return mdbObj; } else { mdbObj = mdbObjCreate(obj,mdbVars->var,mdbVars->val); mdbObj->varHash = hashNew(8); hashAddUnique(mdbObj->varHash, mdbVars->var, mdbObj->vars); // pointer to struct to resolve type struct mdbVar *var = mdbVars->next; for (;var != NULL;var = var->next) mdbObjSetVar(mdbObj, var->var,var->val); } return mdbObj; } struct mdbObj *mdbObjsLoadFromHashes(struct hash *objsHash) // Load all mdbObjs from a file containing metadata formatted lines { struct mdbObj *mdbObjs = NULL; struct hashEl* objEl = NULL; struct hashCookie objCookie = hashFirst(objsHash); while ((objEl = hashNext(&objCookie)) != NULL) { struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(objEl->name); mdbObj->varHash = hashNew(8); struct hash *hashedVars = objEl->val; struct hashCookie varCookie = hashFirst(hashedVars); struct hashEl* varEl = NULL; while ((varEl = hashNext(&varCookie)) != NULL) { if (sameString(varEl->name,MDB_METAOBJ_RAKEY)) continue; struct mdbVar * mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(varEl->name); mdbVar->val = cloneString(varEl->val); hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } slSort(&(mdbObj->vars),&mdbVarCmp); // Should be in determined order slAddHead(&mdbObjs,mdbObj); } slSort(&mdbObjs,&mdbObjCmp); // Should be in determined order return mdbObjs; } // ------ Loading from files ------ struct mdbObj *mdbObjsLoadFromFormattedFile(char *fileName,boolean *validated) // Load all mdbObjs from a file containing metadata formatted lines { struct mdbObj *mdbObjs = NULL; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; while (lineFileNext(lf, &line,NULL)) { char *start = skipLeadingSpaces(line); if (start == NULL || *start == '#') continue; if (startsWithWord(MDB_METAOBJ_RAKEY,line)) { // This is the RA style file!! lineFileClose(&lf); return mdbObjsLoadFromRAFile(fileName,validated); } struct mdbObj *mdbObj = metadataLineParse(line); if (mdbObj == NULL) { mdbObjsFree(&mdbObjs); return NULL; } slAddHead(&mdbObjs,mdbObj); } lineFileClose(&lf); slReverse(&mdbObjs); // Go ahead and keep this in file order if (validated) *validated = FALSE; return mdbObjs; } #define MDB_MAGIC_PREFIX "# MAGIC: " struct mdbObj *mdbObjsLoadFromRAFile(char *fileName,boolean *validated) // Load all mdbObjs from a file containing RA formatted 'metaObjects' { struct hash *mdHash = raReadAll(fileName, MDB_METAOBJ_RAKEY); if (mdHash == NULL) { verbose(1,"Missing, empty or badly formated RA file:%s\n",fileName); return NULL; } struct mdbObj *mdbObjs = mdbObjsLoadFromHashes(mdHash); hashFree(&mdHash); // Try to validate file if (validated) { *validated = FALSE; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line = lineFileSkipToLineStartingWith(lf,MDB_MAGIC_PREFIX,1000000); if (line != NULL) { int fileMagic = atoi(line+strlen(MDB_MAGIC_PREFIX)); int objsMagic = mdbObjCRC(mdbObjs); verbose(3,"Objects magic: %d Files magic: %d (%s)\n", objsMagic,fileMagic,line+strlen(MDB_MAGIC_PREFIX)); *validated = (fileMagic == objsMagic); } else verbose(3,"Can't find magic number on this file.\n"); } return mdbObjs; } // ------ Table name and creation ------ void mdbReCreate(struct sqlConnection *conn,char *tblName,boolean testOnly) // Creates ore Recreates the named mdb. { char *sqlCreate = "# Contains metadata for a table, file or other objects.\n" "CREATE TABLE %s (\n" " obj varchar(255) not null, # Object name or ID.\n" " var varchar(255) not null, # Metadata variable name.\n" " val varchar(2048) not null, # Metadata value.\n" " #Indices\n" " PRIMARY KEY(obj,var),\n" " INDEX varKey (var,val(64))\n" ")"; if (sqlTableExists(conn,tblName)) verbose(2, "Table '%s' already exists. It will be recreated.\n",tblName); struct dyString *dy = newDyString(512); sqlDyStringPrintf(dy, sqlCreate, tblName); verbose(2, "Requesting table creation:\n%s;\n", dyStringContents(dy)); if (!testOnly) sqlRemakeTable(conn, tblName, dyStringContents(dy)); dyStringFree(&dy); } #define HG_CONF_SANDBOX_MDB "db.metaDb" #define HG_CONF_SANDBOX_TDB "db.trackDb" #define SANDBOX_TDB_ROOT "trackDb" static char*mdbTableNamePreferSandbox() // returns the mdb table name or NULL if conn supplied but the table doesn't exist { char *table = cfgOption(HG_CONF_SANDBOX_MDB); if (table != NULL) return cloneString(table); // Look for trackDb name to model char *name = cfgOption(HG_CONF_SANDBOX_TDB); if (name == NULL) return cloneString(MDB_DEFAULT_NAME); // Only take the last table of a list of tables! char delimit = ','; for (table = name; (name = skipBeyondDelimit(name,delimit)) != NULL;) table = name; name = skipLeadingSpaces(table); // Divide name into root and sandbox portion char *root = NULL; char *sand = NULL; delimit = '_'; if ((sand = strchr(name,delimit)) == NULL) { delimit = '-'; sand = strchr(name,delimit); } if (sand == NULL) // No sandbox portion return cloneString(MDB_DEFAULT_NAME); root = cloneNextWordByDelimiter(&name,delimit); sand = name; // Since db.trackDb was used, make sure to swap it if (startsWith(SANDBOX_TDB_ROOT,root)) { freeMem(root); root = cloneString(MDB_DEFAULT_NAME); } else // If discovered anything other than trackDb then give up as too obscure return cloneString(MDB_DEFAULT_NAME); // Finally ready to put it together int size = strlen(root) + strlen(sand) + 2; table = needMem(size); safef(table,size,"%s%c%s",root,delimit,sand); freeMem(root); return table; } char*mdbTableName(struct sqlConnection *conn,boolean mySandBox) // returns the mdb table name or NULL if conn supplied but the table doesn't exist { char *table = NULL; if (mySandBox) table = mdbTableNamePreferSandbox(); if (table == NULL) table = cloneString(MDB_DEFAULT_NAME); // Test for table if (conn != NULL && !sqlTableExists(conn,table)) { if (!mySandBox || sameWord(table,MDB_DEFAULT_NAME)) // Then try the root { freeMem(table); return NULL; } freeMem(table); table = cloneString(MDB_DEFAULT_NAME); if (!sqlTableExists(conn,table)) { freeMem(table); return NULL; } } return table; } // -------------- Updating the DB -------------- int mdbObjsSetToDb(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObjs, boolean replace,boolean testOnly) // Adds or updates metadata obj/var pairs into the named table. Returns total rows affected { char query[8192]; struct mdbObj *mdbObj; struct mdbVar *mdbVar; int count = 0; if (tableName == NULL) tableName = mdbTableName(conn,TRUE); // defaults to sandbox, if exists, else MDB_DEFAULT_NAME else if (!sqlTableExists(conn,tableName)) errAbort("mdbObjsSetToDb attempting to update non-existent table named '%s'.\n",tableName); // Table specific lock (over-cautious, since most work is done on sandbox tables) char lock[64]; safef(lock,sizeof lock,"lock_%s",tableName); sqlGetLock(conn, lock); for (mdbObj = mdbObjs;mdbObj != NULL; mdbObj = mdbObj->next) { // Handle delete requests first if (mdbObj->deleteThis) { if (mdbObj->vars == NULL) // deletes all { sqlSafefFrag(query, sizeof(query),"%s where obj = '%s'", tableName, mdbObj->obj); int delCnt = sqlRowCount(conn,query); if (delCnt>0) { sqlSafef(query, sizeof(query), "delete from %s where obj = '%s'",tableName,mdbObj->obj); verbose(2, "Requesting delete of %d rows:\n\t%s;\n",delCnt, query); if (!testOnly) sqlUpdate(conn, query); count += delCnt; } } else // deletes selected vars { for (mdbVar = mdbObj->vars;mdbVar != NULL; mdbVar = mdbVar->next) { sqlSafef(query, sizeof(query), "select obj from %s where obj = '%s' and var = '%s'", tableName,mdbObj->obj,mdbVar->var); if (sqlExists(conn,query)) { sqlSafef(query, sizeof(query), "delete from %s where obj = '%s' and var = '%s'", tableName,mdbObj->obj,mdbVar->var); verbose(2, "Requesting delete of 1 row:\n\t%s;\n",query); if (!testOnly) sqlUpdate(conn, query); count++; } } } continue; // Done with this mdbObj } else if (replace) // If replace then clear out deadwood before inserting new vars { sqlSafefFrag(query, sizeof(query),"%s where obj = '%s'", tableName, mdbObj->obj); int delCnt = sqlRowCount(conn,query); if (delCnt>0) { sqlSafef(query, sizeof(query), "delete from %s where obj = '%s'",tableName,mdbObj->obj); verbose(2, "Requesting replacement of %d rows:\n\t%s;\n",delCnt, query); if (!testOnly) sqlUpdate(conn, query); count += delCnt; } } // Now it is time for update or add! for (mdbVar = mdbObj->vars;mdbVar != NULL; mdbVar = mdbVar->next) { stripEnclosingDoubleQuotes(mdbVar->val); // Ensures values are stripped of enclosing quotes // Be sure to check for var existence first, then update if (!replace) { struct mdbObj *objExists = mdbObjQueryByObj(conn,tableName,mdbObj->obj,mdbVar->var); if (objExists) { if (differentString(mdbVar->val,objExists->vars->val)) { sqlSafef(query, sizeof(query), "update %s set val = '%s' where obj = '%s' and var = '%s'", tableName, mdbVar->val, mdbObj->obj, mdbVar->var); verbose(2, "Requesting update of 1 row:\n\t%s;\n",query); if (!testOnly) sqlUpdate(conn, query); count++; } mdbObjsFree(&objExists); continue; // The object was found/updated so done with it } } // Finally ready to insert new vars sqlSafef(query, sizeof(query), "insert into %s set obj='%s', var='%s', val='%s'", tableName,mdbObj->obj,mdbVar->var,mdbVar->val); verbose(2, "Requesting insert of one row:\n\t%s;\n",query); if (!testOnly) sqlUpdate(conn, query); count++; } } sqlReleaseLock(conn, lock); return count; } int mdbObjsLoadToDb(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObjs, boolean testOnly) // Adds mdb Objs with minimal error checking { int count = 0; verboseTime(2, "Start of mdbObjsLoadToDb %s", tableName); if (tableName == NULL) tableName = mdbTableName(conn,TRUE); // defaults to sandbox, if exists, else MDB_DEFAULT_NAME else if (!sqlTableExists(conn,tableName)) errAbort("mdbObjsLoadToDb attempting to load non-existent table named '%s'.\n",tableName); assert(mdbObjs != NULL); // If this is the case, then be vocal #define MDB_TEMPORARY_TAB_FILE "temporaryMdb.tab" long lastTime = 0; count = mdbObjPrintToTabFile(mdbObjs,MDB_TEMPORARY_TAB_FILE); verboseTime(2, "past mdbObjPrintToTabFile()"); // Disable keys in hopes of speeding things up. No danger since it only disables non-unique keys char query[8192]; sqlSafef(query, sizeof(query),"alter table %s disable keys",tableName); sqlUpdate(conn, query); // Quick? load sqlLoadTabFile(conn, MDB_TEMPORARY_TAB_FILE, tableName, SQL_TAB_FILE_WARN_ON_ERROR|SQL_TAB_FILE_WARN_ON_WARN); verboseTime(2, "past sqlLoadTabFile()"); // Enabling the keys again sqlSafef(query, sizeof(query),"alter table %s enable keys",tableName); sqlUpdate(conn, query); verboseTime(2, "Past alter table"); //unlink(MDB_TEMPORARY_TAB_FILE); verbose(0,"%04ldms - Done loading mdb with 'LOAD DATA INFILE' mysql command.\n", (clock1000() - lastTime)); return count; } // ------------------ Querys ------------------- struct mdbObj *mdbObjQuery(struct sqlConnection *conn,char *table,struct mdbObj *mdbObj) // Query the metadata table by obj and optional vars and vals in metaObj struct. // If mdbObj is NULL query all. Returns new mdbObj struct fully populated & sorted in obj,var order { // select obj,var,val where (var= [and val=]) or ([var= and] val=) order by obj,var boolean buildHash = TRUE; if (table == NULL) table = mdbTableName(conn,TRUE); // defaults to sandbox, if exists, else MDB_DEFAULT_NAME else if (!sqlTableExists(conn,table)) return NULL; struct dyString *dy = newDyString(4096); sqlDyStringPrintf(dy, "select obj,var,val from %s", table); if (mdbObj != NULL && mdbObj->obj != NULL) { sqlDyStringPrintf(dy, " where obj %-s '%s'", (strchr(mdbObj->obj,'%') ? "like" : "="), mdbObj->obj); struct mdbVar *mdbVar; for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (mdbVar==mdbObj->vars) dyStringPrintf(dy, " and ("); else dyStringPrintf(dy, " or "); if (mdbVar->var != NULL) { if (mdbVar->val != NULL) dyStringPrintf(dy, "("); sqlDyStringPrintf(dy, "var %-s '%s'", (strchr(mdbVar->var,'%') ? "like" : "="), mdbVar->var); } if (mdbVar->val != NULL) { if (mdbVar->var != NULL) dyStringPrintf(dy, " and "); sqlDyStringPrintf(dy, "val %-s '%s'", (strchr(mdbVar->val,'%') ? "like" : "="), mdbVar->val); if (mdbVar->var != NULL) dyStringPrintf(dy, ")"); } if (mdbVar->var == NULL && mdbVar->val) errAbort("mdbObjQuery has empty mdbVar struct.\n"); buildHash = FALSE; // too few variables } if (mdbObj->vars != NULL) dyStringPrintf(dy, ")"); } verbose(2, "Requesting mdbObjQuery query:\n\t%s;\n",dyStringContents(dy)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dy)); slSort(&mdb,mdbCmp); // Use internal sort instead of ORDER BY because of mysql inefficiency struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,buildHash); verbose(3, "Returned %d object(s) with %d var(s).\n", mdbObjCount(mdbObjs,TRUE),mdbObjCount(mdbObjs,FALSE)); return mdbObjs; } struct mdbObj *mdbObjQueryByObj(struct sqlConnection *conn,char *table,char *obj,char *var) // Query a single metadata object and optional var from a table (default mdb). { if (obj == NULL) return mdbObjQuery(conn,table,NULL); struct mdbObj *queryObj = mdbObjCreate(obj,var,NULL); struct mdbObj *resultObj = mdbObjQuery(conn,table,queryObj); mdbObjsFree(&queryObj); return resultObj; } struct mdbByVar *mdbByVarsQuery(struct sqlConnection *conn,char *table,struct mdbByVar *mdbByVars) // Query the metadata table by one or more var=val pairs to find the distinct set of objs // that satisfy ANY conditions. // Returns new mdbByVar struct fully populated and sorted in var,val,obj order. { // select obj,var,val where (var= [and val in (val1,val2)]) // or (var= [and val in (val1,val2)]) order by var,val,obj if (table == NULL) table = mdbTableName(conn,TRUE); // defaults to sandbox, if exists, else MDB_DEFAULT_NAME else if (!sqlTableExists(conn,table)) return NULL; struct dyString *dy = newDyString(4096); sqlDyStringPrintf(dy, "select obj,var,val from %s", table); struct mdbByVar *rootVar; for (rootVar=mdbByVars;rootVar!=NULL;rootVar=rootVar->next) { if (rootVar==mdbByVars) dyStringPrintf(dy, " where (var "); else dyStringPrintf(dy, " OR (var "); if (rootVar->notEqual && rootVar->vals == NULL) dyStringPrintf(dy, "%s", strchr(rootVar->var,'%') ? "NOT " : "!"); // one of: "NOT LIKE". "!=" or "NOT EXISTS" if (rootVar->vals != NULL && rootVar->vals->val != NULL && strlen(rootVar->vals->val) > 0) { sqlDyStringPrintf(dy, "%-s '%s'", (strchr(rootVar->var,'%') ? "like" : "="), rootVar->var); } else dyStringPrintf(dy, "EXISTS"); struct mdbLimbVal *limbVal; boolean multiVals = FALSE; for (limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if (limbVal->val == NULL || strlen(limbVal->val) < 1) continue; if (!multiVals) { dyStringPrintf(dy, " and val "); if (rootVar->notEqual) dyStringPrintf(dy, "%s", strchr(limbVal->val,'%') ? "NOT " : "!"); if (limbVal->next == NULL) // only one val { sqlDyStringPrintf(dy, "%-s '%s'", (strchr(limbVal->val,'%')?"like":"="), limbVal->val); break; } else dyStringPrintf(dy, "in ("); multiVals=TRUE; } else dyStringPrintf(dy, ","); sqlDyStringPrintf(dy, "'%s'", limbVal->val); } if (multiVals) dyStringPrintf(dy, ")"); dyStringPrintf(dy, ")"); } verbose(2, "Requesting mdbByVarsQuery query:\n\t%s;\n",dyStringContents(dy)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dy)); verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); slSort(&mdb,mdbVarValCmp); // Use internal sort instead of ORDER BY because of mysql inefficiency struct mdbByVar *mdbByVarsFromMem = mdbByVarsLoadFromMemory(&mdb,TRUE); verbose(3, "Returned %d vars(s) with %d val(s) with %d object(s).\n", mdbByVarCount(mdbByVarsFromMem,TRUE,FALSE), mdbByVarCount(mdbByVarsFromMem,FALSE,TRUE ), mdbByVarCount(mdbByVarsFromMem,FALSE,FALSE)); return mdbByVarsFromMem; } struct mdbByVar *mdbByVarQueryByVar(struct sqlConnection *conn,char *table,char *varName,char *val) // Query a single metadata variable & optional val from a table (default mdb) for searching val->obj { if (varName == NULL) return mdbByVarsQuery(conn,table,NULL); struct mdbByVar *queryVar = mdbByVarCreate(varName,val); struct mdbByVar *resultVar = mdbByVarsQuery(conn,table,queryVar); mdbByVarsFree(&queryVar); return resultVar; } struct mdbObj *mdbObjsQueryByVars(struct sqlConnection *conn,char *table,struct mdbByVar *mdbByVars) // Query the metadata table by one or more var=val pairs // to find the distinct set of objs that satisfy ALL conditions. // Returns new mdbObj struct fully populated and sorted in obj,var order. { // MOST POPULAR WAY TO QUERY MDB. Building example queries like: // "cell=GM12878" or "cell!=GM12878" // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' AND T2.val = 'GM12878' // ORDER BY T1.obj, T1.var; // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' AND T2.val != 'GM12878' // ORDER BY T1.obj, T1.var; // "cell=GM%" or "cell!=GM%" // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' AND T2.val LIKE 'GM%' // ORDER BY T1.obj, T1.var; // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' AND T2.val NOT LIKE 'GM%' // ORDER BY T1.obj, T1.var; // "cell=" or "cell!=" NOTE the tricky and inscrutable left join for cell!= // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' // ORDER BY T1.obj, T1.var; // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // LEFT JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' WHERE T2.obj IS NULL // ORDER BY T1.obj, T1.var; // "cell=GM12878,K562" or "cell!=GM12878,K562" // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj // AND T2.var = 'cell' AND T2.val IN ('GM12878','K562')' // ORDER BY T1.obj, T1.var; // => SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj // AND T2.var = 'cell' AND T2.val NOT IN ('GM12878','K562')' // ORDER BY T1.obj, T1.var; // "cell=GM% cell!=GM12878" (very powerful) // SELECT T1.obj,T1.var,T1.val FROM metaDb T1 // JOIN metaDb T2 WHERE T1.obj = T2.obj AND T2.var = 'cell' AND T2.val LIKE 'GM%' // JOIN metaDb T3 WHERE T1.obj = T3.obj AND T3.var = 'cell' AND T3.val != 'GM12878' // ORDER BY T1.obj, T1.var; if (table == NULL) table = mdbTableName(conn,TRUE); // defaults to sandbox, if exists, else MDB_DEFAULT_NAME else if (!sqlTableExists(conn,table)) return NULL; struct dyString *dy = newDyString(4096); sqlDyStringPrintf(dy, "SELECT T1.obj,T1.var,T1.val FROM %s T1", table); struct mdbByVar *rootVar; int tix; for (rootVar=mdbByVars,tix=2;rootVar!=NULL;rootVar=rootVar->next,tix++) { boolean hasVal = (rootVar->vals != NULL); boolean varWild = (strchr(rootVar->var,'%') != NULL); // If you want objects where var='cell' does not exist, then we need the tricky and inscrutable // LEFT JOIN metaDb T2 ON T2.obj = T1.obj AND T2.var = 'cell' WHERE T2.obj is NULL if (!hasVal && rootVar->notEqual) dyStringAppend(dy, " LEFT"); sqlDyStringPrintf(dy, " JOIN %s T%d ON T%d.obj = T1.obj AND T%d.var ",table,tix,tix,tix); // var = 'x' || var != 'x' || var LIKE 'x%' || var NOT LIKE 'x%' if (hasVal && rootVar->notEqual && rootVar->vals == NULL) dyStringAppend(dy, (varWild ? "NOT " : "!")); sqlDyStringPrintf(dy, "%-s '%s'",(varWild ? "LIKE" : "="), rootVar->var); // Finish the tricky and inscrutable LEFT JOIN / WHERE NULL if (!hasVal && rootVar->notEqual) dyStringPrintf(dy, " WHERE T%d.obj IS NULL",tix); // Now 1 or more vals. First some booleans struct mdbLimbVal *limbVal; boolean multiVals = (rootVar->vals != NULL && rootVar->vals->next != NULL); boolean wilds = FALSE; for (limbVal=rootVar->vals;wilds == FALSE && limbVal!=NULL; limbVal=limbVal->next) wilds = (limbVal->val != NULL && strchr(limbVal->val,'%') != NULL); // breaks when true // Now walk through vals creating: // AND (val = 'x' or val = 'y') or val IN ('a','b','c') or val NOT LIKE 'd%' for (limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if (limbVal->val == NULL || strlen(limbVal->val) < 1) continue; if (limbVal==rootVar->vals) // First val { if (wilds && multiVals) dyStringAppend(dy, " AND ("); // starts AND (val LIKE 'd%' or ...) else dyStringAppend(dy, " AND "); // starts AND val = 'a' || AND val IN ('a','b'...) dyStringPrintf(dy, "T%d.val ",tix); } else // successive vals { if (wilds && multiVals) dyStringPrintf(dy, " or T%d.val ",tix); // continues LIKE 'd%' else dyStringAppend(dy, ","); // continues IN ('a' } if (limbVal==rootVar->vals // First val || (wilds && multiVals)) // and successive if wildcards { boolean valWild = (strchr(limbVal->val,'%') != NULL); if (rootVar->notEqual) dyStringAppend(dy, (valWild || limbVal->next)?"NOT ":"!"); dyStringAppend(dy, (valWild ? "LIKE " : (!multiVals || wilds ? "= " : "IN ("))); } sqlDyStringPrintf(dy, "'%s'", limbVal->val); } if (multiVals) dyStringPrintf(dy, ")"); // closes IN ('a','b','c') || AND (val LIKE 'd%' or val LIKE 'e%') } verbose(2, "Requesting mdbObjsQueryByVars query:\n\t%s;\n",dyStringContents(dy)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dy)); verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); slSort(&mdb,mdbCmp); // Use internal sort instead of ORDER BY because of mysql inefficiency struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE); verbose(3, "Returned %d object(s) with %d var(s).\n", mdbObjCount(mdbObjs,TRUE),mdbObjCount(mdbObjs,FALSE)); return mdbObjs; } struct mdbObj *mdbObjsQueryByVarValString(struct sqlConnection *conn,char *tableName,char *varVals) // returns mdbObjs matching varVals in form of: // [var1=val1 var2=val2a,val2b var3=v%3 var4="val 4" var5!=val5 var6=] // var2=val2a,val2b: matches asny of comma separated list // var3=v%3 : matches '%' and '?' wild cards. // var4="val 4" : matches simple double quoted strings. // var5!=val5 : matches not equal. // var6= : matches that var exists (same as var6=%). var6!= also works. { struct mdbByVar *mdbByVars = mdbByVarsLineParse(varVals); if (mdbByVars == NULL) return NULL; return mdbObjsQueryByVars(conn,tableName,mdbByVars); } struct mdbObj *mdbObjsQueryByVarPairs(struct sqlConnection *conn,char *tableName, struct slPair *varValPairs) // returns mdbObjs matching varValPairs provided. // The != logic of mdbObjsQueryByVarValString() is not possible, but other cases are supported: // as val may be NULL, a comma delimited list, double quoted string, containing wilds: % and ? { // Note: there is inefficiency in creating a string then tearing it down, but it streamlines code char *varValString = slPairListToString(varValPairs,TRUE); // quotes added when spaces found struct mdbObj *mdbObjs = mdbObjsQueryByVarValString(conn,tableName,varValString); freeMem(varValString); return mdbObjs; } struct mdbObj *mdbObjQueryCompositeObj(struct sqlConnection *conn,char *tableName, struct mdbObj *mdbObj) // returns NULL or the composite mdbObj associated with the object passed in. { char *objType = mdbObjFindValue(mdbObj,MDB_OBJ_TYPE); assert(objType != NULL); if (sameWord(objType,MDB_VAR_COMPOSITE)) return mdbObjClone(mdbObj); char *compName = mdbObjFindValue(mdbObj,MDB_VAR_COMPOSITE); if (compName == NULL) return NULL; return mdbObjQuery(conn,tableName,mdbObjCreate(compName,NULL,NULL)); } // ----------- Printing and Counting ----------- static void mdbVarValPrint(struct mdbVar *mdbVar,boolean raStyle, FILE *outF) { if (mdbVar != NULL && mdbVar->var != NULL) { if (raStyle) fprintf(outF, "\n%s ",mdbVar->var); else fprintf(outF, " %s=",mdbVar->var); if (mdbVar->val != NULL) { if (!raStyle && strchr(mdbVar->val, ' ') != NULL) // Has blanks fprintf(outF, "\"%s\"",mdbVar->val); else fprintf(outF, "%s",mdbVar->val); } } } void mdbObjPrintToStream(struct mdbObj *mdbObjs,boolean raStyle, FILE *outF ) // prints objs and var=val pairs as formatted metadata lines or ra style { // Single line: // metadata iLoveLucy table lucy=ricky ethyl=fred // ra style // metadata iLoveLucy table // lucy ricky // ethy fred // TODO: Expand for mutilple var types; strip quotes from vals on ra style struct mdbObj *mdbObj = NULL; for (mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if (mdbObj->obj == NULL) continue; fprintf(outF, "%s %s",(raStyle?MDB_METAOBJ_RAKEY:MDB_METADATA_KEY),mdbObj->obj); if (mdbObj->deleteThis) fprintf(outF, " delete"); struct mdbVar *mdbVar = NULL; // If hash available, force objType to front if (mdbObj->varHash != NULL) { mdbVar = hashFindVal(mdbObj->varHash,MDB_OBJ_TYPE); mdbVarValPrint(mdbVar,raStyle, outF); } for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (mdbObj->varHash == NULL || !sameOk(MDB_OBJ_TYPE,mdbVar->var)) mdbVarValPrint(mdbVar,raStyle, outF); } fprintf(outF, "%s",(raStyle?"\n\n":"\n")); } if (raStyle) // NOTE: currently only supporting validation of RA files fprintf(outF, "%s%d\n",MDB_MAGIC_PREFIX,mdbObjCRC(mdbObjs)); } char *mdbObjVarValPairsAsLine(struct mdbObj *mdbObj,boolean objTypeExclude,boolean cvLabels) // returns NULL or a line for a single mdbObj as "var1=val1; var2=val2 ...". Must be freed. { if (mdbObj!=NULL) { struct dyString *dyLine = dyStringNew(128); struct mdbVar *mdbVar = NULL; // If hash available, force objType to front if (!objTypeExclude && mdbObj->varHash != NULL) { mdbVar = hashFindVal(mdbObj->varHash,MDB_OBJ_TYPE); dyStringPrintf(dyLine,"%s=%s; ",mdbVar->var,mdbVar->val); } for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (!sameOk(MDB_OBJ_TYPE,mdbVar->var) || (!objTypeExclude && mdbObj->varHash == NULL)) { if (cvLabels) { char *varLabel = (char *)cvLabel(NULL,mdbVar->var); char *valLabel = (char *)cvLabel(mdbVar->var,mdbVar->val); dyStringPrintf(dyLine,"%s=%s; ",varLabel,valLabel); } else dyStringPrintf(dyLine,"%s=%s; ",mdbVar->var,mdbVar->val); } } char *line = dyStringCannibalize(&dyLine); if (line) { int len = strlen(line); if (len == 0) { freeMem(line); return NULL; } if (line[len-1] == ' ') line[len-1] = '\0'; return line; } } return NULL; } void mdbObjPrint(struct mdbObj *mdbObjs,boolean raStyle) // prints objs and var=val pairs as formatted metadata lines or ra style { mdbObjPrintToStream(mdbObjs, raStyle, stdout); } void mdbObjPrintToFile(struct mdbObj *mdbObjs,boolean raStyle, char *file) // prints (to file) objs and var=val pairs as formatted metadata lines or ra style { FILE *f = mustOpen(file, "w"); mdbObjPrintToStream(mdbObjs, raStyle, f); fclose(f); } void mdbObjPrintOrderedToStream(FILE *outF,struct mdbObj **mdbObjs,char *order, char *separator, boolean header) // prints mdbObjs as a table, but only the vars listed in comma delimited order. // Examples of separator: " " "\t\t" or "<TD>", in which case this is an HTML table. // mdbObjs list will be reordered. Sort fails when vars are missing in objs. { if (separator == NULL) separator = " "; boolean html = FALSE; if (startsWith("<T",separator) || startsWith("<t",separator)) { if (!endsWith(separator,">")) errAbort("mdbObjPrintOrdered() separator is invalid HTML '%s'.\n",separator); html = TRUE; } if (!startsWithWordByDelimiter("obj" ,',',order) && !startsWithWordByDelimiter("objName" ,',',order) && !startsWithWordByDelimiter("metaObject",',',order)) mdbObjsSortOnVars(mdbObjs, order); struct slName *vars = slNameListFromString(order, ','); struct slName *var = NULL; if (html) fprintf(outF, "<table>"); if (header) { if (html) fprintf(outF, "<tr>"); for (var = vars;var != NULL; var = var->next) { if (html) fprintf(outF, "%s%s",separator,var->name); // <td> is first else fprintf(outF, "%s%s",var->name,separator); if (html) fprintf(outF, "</td>"); } if (html) fprintf(outF, "</tr>"); fprintf(outF, "\n"); } struct mdbObj *mdbObj = *mdbObjs; for (;mdbObj != NULL; mdbObj = mdbObj->next) { if (html) fprintf(outF, "<tr>"); for (var = vars;var != NULL; var = var->next) { char *val = mdbObjFindValue(mdbObj, var->name); if (val == NULL) { /*if (sameWord(var->name,"obj") || sameWord(var->name,"objName") || sameWord(var->name,"metaObject")) val = mdbObj->obj; else*/ if (html) val = " "; else val = " "; } if (html) fprintf(outF, "%s%s",separator,val); // <td> is first else fprintf(outF, "%s%s",val,separator); if (html) fprintf(outF, "</td>"); } if (html) fprintf(outF, "</tr>"); fprintf(outF, "\n"); } if (html) fprintf(outF, "</table>\n"); } int mdbObjPrintToTabFile(struct mdbObj *mdbObjs, char *file) // prints all objs as tab delimited obj var val into file for SQL LOAD DATA. Returns count. { FILE *tabFile = mustOpen(file, "w"); int count = 0; struct mdbObj *mdbObj = NULL; for (mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if (mdbObj->obj == NULL) continue; struct mdbVar *mdbVar = NULL; for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (mdbVar->var == NULL || mdbVar->val == NULL) continue; fprintf(tabFile, "%s\t%s\t%s\n",mdbObj->obj,mdbVar->var,sqlEscapeString(mdbVar->val)); count++; } } fclose(tabFile); return count; } void mdbByVarPrint(struct mdbByVar *mdbByVars,boolean raStyle) // prints var=val pairs and objs that go with them single lines or ra style { // Single line: // mdbVariable lucy=ethyl bestFriends lifePartners // mdbVariable lucy=ricky iLoveLucy divorces // NOT QUITE ra style // metadata Fred wife=Ethyl // metadata Lucy wife=Ethyl // Results in: // mdbVariable wife Ethyl // metaObject Fred // metaObject Lucy struct mdbByVar *rootVar = NULL; for (rootVar=mdbByVars;rootVar!=NULL;rootVar=rootVar->next) { if (rootVar->var == NULL) continue; struct mdbLimbVal *limbVal = NULL; for (limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if (limbVal->val == NULL) continue; if (raStyle) printf("%s %s ",MDB_METAVAR_RAKEY,rootVar->var); else printf("%s %s=",MDB_METAVAR_RAKEY,rootVar->var); if (!raStyle && strchr(limbVal->val, ' ') != NULL) // Has blanks printf("\"%s\"",limbVal->val); else printf("%s",limbVal->val); struct mdbLeafObj *leafObj = NULL; for (leafObj=limbVal->objs;leafObj!=NULL;leafObj=leafObj->next) { if (leafObj->obj == NULL) continue; if (raStyle) printf("\n%s %s",MDB_METAOBJ_RAKEY,leafObj->obj); else printf(" %s",leafObj->obj); } printf("\n"); if (raStyle) printf("\n"); } } } int mdbObjCount(struct mdbObj *mdbObjs,boolean objs) // returns the count of vars belonging to this obj or objs; { int count = 0; struct mdbObj *mdbObj = NULL; for (mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if (mdbObj->obj == NULL) continue; if (objs) count++; else { struct mdbVar *mdbVar = NULL; for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (mdbVar->var != NULL && mdbVar->val != NULL) count++; } } } return count; } int mdbByVarCount(struct mdbByVar *mdbByVars,boolean vars, boolean vals) // returns the count of objs belonging to this set of vars; { int count = 0; struct mdbByVar *rootVar = NULL; for (rootVar=mdbByVars;rootVar!=NULL;rootVar=rootVar->next) { if (rootVar->var == NULL) continue; if (vars) count++; else { struct mdbLimbVal *limbVal = NULL; for (limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if (limbVal->val == NULL) continue; if (vals) count++; else { struct mdbLeafObj *leafObj = NULL; for (leafObj=limbVal->objs;leafObj!=NULL;leafObj=leafObj->next) { if (leafObj->obj != NULL) count++; } } } } } return count; } // ----------------- Utilities ----------------- struct hash *mdbObjsHash(struct mdbObj *mdbObjs) // Returns a hash object for this set of mdbObjs, keyed on the obj. // WARNING: any changes to the members of the mdbObjs list may lead to invalid pointers in the hash { if (mdbObjs == NULL) return NULL; struct hash *objsHash = hashNew(8); struct mdbObj *mdbObj = mdbObjs; for (;mdbObj!=NULL;mdbObj=mdbObj->next) { hashAdd(objsHash, mdbObj->obj, mdbObj); } return objsHash; } struct mdbObj *mdbObjLookUp(struct hash *mdbObjsHash, char *obj) // Returns an mdbObj from the objsHash // WARNING: any changes to the members of the mdbObjs list used to make the mdbObjsHash // may lead to invalid pointers in the hash { if (mdbObjsHash == NULL || obj == NULL) return NULL; return hashFindVal(mdbObjsHash,obj); } void mdbObjsHashFree(struct hash **pMdbObjsHash) // Frees an mdbObjs hash. { hashFree(pMdbObjsHash); } struct mdbVar *mdbObjFind(struct mdbObj *mdbObj, char *var) // Finds the mdbVar associated with the var or returns NULL { if (mdbObj == NULL) return NULL; struct mdbVar *mdbVar = NULL; if (mdbObj->varHash != NULL) mdbVar = hashFindVal(mdbObj->varHash,var); // case sensitive (unfortunately) else { for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (sameWord(var,mdbVar->var)) // case insensitive break; } } if (mdbVar == NULL) return NULL; return mdbVar; } char *mdbObjFindValue(struct mdbObj *mdbObj, char *var) // Finds the val associated with the var or retruns NULL { struct mdbVar *mdbVar = mdbObjFind(mdbObj, var); if(mdbVar == NULL) { if (sameWord(var,"obj") || sameWord(var,"objName") || sameWord(var,"metaObject")) return mdbObj->obj; return NULL; } return mdbVar->val; } struct slName *mdbObjsFindAllVals(struct mdbObj *mdbObjs, char *var, char *emptyToken) // Returns a list of all vals in mdbObjs for a requested var // Will add empty only if there is atleast one empty val and at least one val found { struct slName *vals = NULL; struct mdbObj *mdbObj = mdbObjs; boolean foundEmpty = FALSE; for (;mdbObj != NULL;mdbObj = mdbObj->next) { char *val = mdbObjFindValue(mdbObj,var); if (val != NULL) slNameStore(&vals, val); else foundEmpty = TRUE; } // Will add empty only if there is atleast one empty val and at least one val found if (foundEmpty && vals != NULL && (emptyToken != NULL)) slNameStore(&vals, emptyToken); return vals; } boolean mdbObjContains(struct mdbObj *mdbObj, char *var, char *val) // Returns TRUE if object contains var, val or both { if (mdbObj == NULL) return FALSE; if (var != NULL) { char *foundVal = mdbObjFindValue(mdbObj,var); if (foundVal == NULL) return FALSE; if (val == NULL) return TRUE; return sameOk(foundVal,val); } struct mdbVar *mdbVar = NULL; for (mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (differentStringNullOk(var,mdbVar->var) != 0) continue; if (differentStringNullOk(val,mdbVar->val) != 0) continue; return TRUE; } return FALSE; } boolean mdbObjsContainAltleastOneMatchingVar(struct mdbObj *mdbObjs, char *var, char *val) // Returns TRUE if any object in set contains var { struct mdbObj *mdbObj = mdbObjs; for (;mdbObj!=NULL; mdbObj=mdbObj->next) { if (mdbObjContains(mdbObj, var, val)) return TRUE; } return FALSE; } #define MDB_COMMON_VARS_OBJ_SEARCH_LIMIT 10 struct mdbObj *mdbObjsCommonVars(struct mdbObj *mdbObjs) // Returns a new mdbObj with all vars that are contained in every obj passed in. // Note that the returnd mdbObj has a meaningles obj name and vals. { if (mdbObjs == NULL || mdbObjs->vars == NULL) return NULL; struct mdbObj *mdbObj = mdbObjs; struct mdbObj *commonVars = mdbObjClone(mdbObj); // Clone the first obj then prune it commonVars->next = NULL; mdbObj=mdbObj->next; // No need to include first obj in search if (mdbObj != NULL) { int count = 1; // NOTE: This should not loop through all, as the list could be huge. // Just compare the first 10 for now struct dyString *dyPruneVars = dyStringNew(512); for (;mdbObj != NULL && count < MDB_COMMON_VARS_OBJ_SEARCH_LIMIT;mdbObj=mdbObj->next, count++) { struct mdbVar *mdbVar = commonVars->vars; // Will walk through the first obj's vars for (; mdbVar != NULL; mdbVar = mdbVar->next ) { if (mdbObjsContainAtleastOne(mdbObj, mdbVar->var) == FALSE) dyStringPrintf(dyPruneVars,"%s ",mdbVar->var); // var not found so add to prune list } if (dyStringLen(dyPruneVars) > 0) { mdbObjRemoveVars(commonVars,dyStringContents(dyPruneVars)); dyStringClear(dyPruneVars); } } dyStringFree(&dyPruneVars); } return commonVars; } boolean mdbByVarContains(struct mdbByVar *mdbByVar, char *val, char *obj) // Returns TRUE if var contains val, obj or both { if (mdbByVar != NULL) { struct mdbLimbVal *limbVal = NULL; struct mdbLeafObj *leafObj = NULL; if (mdbByVar->valHash != NULL && val != NULL) { limbVal = hashFindVal(mdbByVar->valHash,val); if (limbVal == NULL || limbVal->val == NULL) return FALSE; if (limbVal->objHash != NULL && obj != NULL) { leafObj = hashFindVal(limbVal->objHash,obj); if (leafObj == NULL) return FALSE; return sameOk(leafObj->obj,obj); } } for (limbVal=mdbByVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if (differentStringNullOk(val,limbVal->val) != 0) continue; for (leafObj=limbVal->objs;leafObj!=NULL;leafObj=leafObj->next) { if (differentStringNullOk(obj,leafObj->obj) != 0) continue; return TRUE; } } } return FALSE; } void mdbObjReorderVars(struct mdbObj *mdbObjs, char *vars,boolean back) // Reorders vars list based upon list of vars "cell antibody treatment". Send to front or back. { char *cloneLine = cloneString(vars); char **words = NULL; if (strchr(cloneLine, ' ') == NULL) // Tolerate alternate delimiters { if (strchr(cloneLine, ',') != NULL) // delimit by commas? strSwapChar(cloneLine,',',' '); else if (strchr(cloneLine, ';') != NULL) // delimit by semicolons? strSwapChar(cloneLine,';',' '); else if (strchr(cloneLine, '\t') != NULL) // delimit by tabs? strSwapChar(cloneLine,'\t',' '); } int count = chopByWhite(cloneLine,NULL,0); if (count) { words = needMem(sizeof(char *) * count); count = chopByWhite(cloneLine,words,count); } else errAbort("mdbObjReorderVars cannot parse vars argument.\n"); struct mdbObj *mdbObj = NULL; for ( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { int ix; struct mdbVar *orderedVars = NULL; struct mdbVar **varsToReorder = needMem(sizeof(struct mdbVar *) * count); struct mdbVar *mdbVar = NULL; while ((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) { ix = stringArrayIx(mdbVar->var,words,count); // Is case insensitive if (ix < 0) slAddHead(&orderedVars,mdbVar); else varsToReorder[ix] = mdbVar; } if (back) // add to front of backward list { for ( ix=0; ix<count; ix++ ) { // NOTE: For NULL, could add "None" if (varsToReorder[ix] != NULL) // but that would be too much "inside ball" slAddHead(&orderedVars,varsToReorder[ix]); } } slReverse(&orderedVars); if (!back) // Add to front of forward list { for ( ix=count-1; ix>=0; ix-- ) { if (varsToReorder[ix] != NULL) slAddHead(&orderedVars,varsToReorder[ix]); } } mdbObj->vars = orderedVars; freeMem(varsToReorder); } freeMem(words); } void mdbObjReorderByCv(struct mdbObj *mdbObjs, boolean includeHidden) // Reorders vars list based upon cv.ra typeOfTerms priority { struct hash *cvTermTypes = (struct hash *)cvTermTypeHash(); struct hashEl *el, *elList = hashElListHash(cvTermTypes); struct slPair *cvVars = NULL; for (el = elList; el != NULL; el = el->next) { struct hash *varHash = el->val; if (includeHidden || !cvTermIsHidden(el->name)) // Skip the hidden ones { char *priority = hashFindVal(varHash, CV_TOT_PRIORITY); if (priority != NULL) // If no priority it will randomly fall to the back of the list slPairAdd(&cvVars,el->name,(char *)sqlUnsignedLong(priority)); } } hashElFreeList(&elList); if (cvVars) { slPairIntSort(&cvVars); // sorts on the integer val // Now convert this to a string of names char *orderedVars = slPairNameToString(cvVars,' ',FALSE); slPairFreeList(&cvVars); if (orderedVars != NULL) { mdbObjReorderVars(mdbObjs, orderedVars,FALSE); // Finally we can reorder the vars freeMem(orderedVars); } } } int mdbObjVarCmp(const void *va, const void *vb) /* Compare to sort on full list of vars and vals. */ { const struct mdbObj *a = *((struct mdbObj **)va); const struct mdbObj *b = *((struct mdbObj **)vb); struct mdbVar* aVar = a->vars; struct mdbVar* bVar = b->vars; for (;aVar != NULL && bVar != NULL;aVar=aVar->next,bVar=bVar->next) { int ret = differentWord(aVar->var, bVar->var); // case insensitive if (ret != 0) { // Look for it by walking vars struct mdbVar* tryVar = bVar->next; for (;tryVar;tryVar=tryVar->next) { if (sameWord(aVar->var, tryVar->var)) return -1; // Current aVar found in B so B has extra var & A has NULL: A sorts first } tryVar = aVar->next; for (;tryVar;tryVar=tryVar->next) { if (sameWord(tryVar->var, bVar->var)) return 1; // Current bVar found in A so A has extra var & B has NULL: B sorts first } return ret; // Current aVar and bVar are not shared so prioritize them alphabetically } // (What else can I do?) ret = differentString(aVar->val, bVar->val); // case sensitive on val if (ret != 0) return ret; } if (bVar != NULL) return -1; // B has extra var and A has NULL: A sorts first if (aVar != NULL) return 1; // A has extra var and B has NULL: B sorts first return 0; } void mdbObjsSortOnVars(struct mdbObj **mdbObjs, char *vars) // Sorts on var,val pairs vars lists: fwd case-sensitive. Assumes objs' vars are in identical order // Optionally give list of vars "cell antibody treatment" to sort on (bringing to front of lists). // NOTE: assumes all var pairs match (e.g. every obj has cell,treatment,antibody,... // and missing treatment messes up sort) { if (vars != NULL) mdbObjReorderVars(*mdbObjs,vars,FALSE); slSort(mdbObjs, mdbObjVarCmp); } void mdbObjsSortOnVarPairs(struct mdbObj **mdbObjs,struct slPair *varValPairs) // Sorts on var,val pairs vars lists: fwd case-sensitive. // Assumes all objs' vars are in identical order. // This method will use mdbObjsSortOnVars() { if (varValPairs == NULL) return; struct slPair *onePair = varValPairs; struct dyString *dyTerms = dyStringNew(256); dyStringAppend(dyTerms,onePair->name); onePair = onePair->next; for (; onePair != NULL; onePair = onePair->next) dyStringPrintf(dyTerms,",%s",onePair->name); mdbObjsSortOnVars(mdbObjs,dyStringContents(dyTerms)); dyStringFree(&dyTerms); } void mdbObjsSortOnCv(struct mdbObj **mdbObjs, boolean includeHidden) // Puts obj->vars in order based upon cv.ra typeOfTerms priority, // then case-sensitively sorts all objs in list based upon that var order. // NOTE: assumes all var pairs match (e.g. every obj has cell,treatment,antibody,... // and missing treatment messes up sort) { mdbObjReorderByCv(*mdbObjs, includeHidden); slSort(mdbObjs, mdbObjVarCmp); // While sort will not be perfect (given missing values) } // it does a good job none the less. boolean mdbObjRemoveOneVar(struct mdbObj *mdbObj, char *var, char *val) // returns TRUE if var (and optional val) are found and surgically removed from one mdbObj { struct mdbVar *lastVar = NULL; struct mdbVar *mdbVar = mdbObj->vars; for (;mdbVar != NULL;lastVar=mdbVar,mdbVar=mdbVar->next) { if (sameWord(mdbVar->var,var)) { if (val && differentString(mdbVar->val,val)) break; // No need to continue if (lastVar != NULL) lastVar->next = mdbVar->next; else mdbObj->vars = mdbVar->next; mdbVar->next = NULL; if (mdbObj->varHash != NULL) hashRemove(mdbObj->varHash,mdbVar->var); mdbVarFree(&mdbVar); return TRUE; } } return FALSE; } void mdbObjRemoveVars(struct mdbObj *mdbObjs, char *vars) // Prunes list of vars for an object, freeing the memory. Doesn't touch DB. { int count = 0; char **words = NULL; if (vars != NULL) { count = chopByWhite(vars,NULL,0); if (count > 1) { words = needMem(sizeof(char *) * count); count = chopByWhite(cloneString(vars),words,count); } } struct mdbObj *mdbObj = NULL; for ( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { if (count == 0) { if (mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); mdbVarsFree(&mdbObj->vars); } else if (count == 1) mdbObjRemoveOneVar(mdbObj,vars,NULL); else { struct mdbVar *keepTheseVars = NULL; struct mdbVar *mdbVar = NULL; while ((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) { int ix = stringArrayIx(mdbVar->var,words,count); if (ix < 0) slAddHead(&keepTheseVars,mdbVar); else { if (count != 0 && mdbObj->varHash != NULL) hashRemove(mdbObj->varHash, mdbVar->var); mdbVarFree(&mdbVar); } } if (keepTheseVars != NULL) slReverse(&keepTheseVars); mdbObj->vars = keepTheseVars; } } if (words != NULL) freeMem(words); } void mdbObjRemoveHiddenVars(struct mdbObj *mdbObjs) // Prunes list of vars for mdb objs that have been declared as hidden in cv.ra typeOfTerms { // make comma delimited list of hidden vars struct hash *cvTermTypes = (struct hash *)cvTermTypeHash(); struct hashEl *el, *elList = hashElListHash(cvTermTypes); struct dyString *dyRemoveVars = dyStringNew(256); for (el = elList; el != NULL; el = el->next) { if (cvTermIsHidden(el->name)) dyStringPrintf(dyRemoveVars,"%s ",el->name); } hashElFreeList(&elList); if (dyStringLen(dyRemoveVars)) mdbObjRemoveVars(mdbObjs, dyStringContents(dyRemoveVars)); dyStringFree(&dyRemoveVars); } boolean mdbObjsHasCommonVar(struct mdbObj *mdbList, char *var, boolean missingOk) // Returns TRUE if all mbObjs passed in have the var with the same value { char *val = NULL; struct mdbObj *mdb = NULL; for (mdb = mdbList; mdb; mdb=mdb->next) { char *thisVal = mdbObjFindValue(mdb,var); if (thisVal == NULL) { if (missingOk) continue; else return FALSE; } if (val == NULL) val = thisVal; else if (differentWord(val,thisVal)) return FALSE; } return TRUE; } char *mdbRemoveCommonVar(struct mdbObj *mdbList, char *var) // Removes var from set of mdbObjs but only if all that have it have a commmon val // Returns the val if removed, else NULL { if (mdbObjsHasCommonVar(mdbList,var,TRUE)) // If var isn't found in some, that is okay { char *val = NULL; struct mdbObj *mdb = mdbList; for (; mdb; mdb=mdb->next) { if (val == NULL) { char *thisVal = mdbObjFindValue(mdb,var); if (thisVal != NULL) val = cloneString(thisVal); } mdbObjRemoveVars(mdb,var); } return val; } return NULL; } boolean mdbObjSetVar(struct mdbObj *mdbObj, char *var,char *val) // Sets the string value to a single var into an obj, preparing for DB update. // returns TRUE if updated, FALSE if added { assert(mdbObj != NULL && var != NULL && val != NULL); struct mdbVar *mdbVar = mdbObjFind(mdbObj, var); if (mdbVar != NULL) { if (mdbVar->val != NULL) freeMem(mdbVar->val); mdbVar->val = cloneString(val); if (mdbObj->varHash != NULL) hashReplace(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type return TRUE; } else { AllocVar(mdbVar); mdbVar->var = cloneString(var); mdbVar->val = cloneString(val); slAddHead(&mdbObj->vars,mdbVar); // Only one if (mdbObj->varHash != NULL) hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type return FALSE; } } boolean mdbObjSetVarInt(struct mdbObj *mdbObj, char *var,int val) // Sets an integer value to a single var in an obj, preparing for DB update. // returns TRUE if updated, FALSE if added { char buf[128]; safef(buf,sizeof(buf),"%d",val); return mdbObjSetVar(mdbObj,var,buf); } void mdbObjSwapVars(struct mdbObj *mdbObjs, char *vars,boolean deleteThis) // Replaces objs' vars with var=val pairs provided, preparing for DB update. { struct mdbObj *mdbObj = NULL; for ( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { mdbObj->deleteThis = deleteThis; if (mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); mdbVarsFree(&(mdbObj->vars)); if (vars != NULL) mdbObjAddVarPairs(mdbObj,vars); } } struct mdbObj *mdbObjsFilter(struct mdbObj **pMdbObjs, char *var, char *val,boolean returnMatches) // Filters mdb objects to only those that include/exclude vars. // Optionally checks (case insensitive) val too. // Returns matched or unmatched items objects as requested, maintaining sort order { struct mdbObj *mdbObjsReturned = NULL; struct mdbObj *mdbObjs = *pMdbObjs; *pMdbObjs = NULL; boolean wildValMatch = (val != NULL && strchr(val,'*') != NULL); // pMatchTail: Slightly faster than slAddHead/slReverse struct mdbObj **pMatchTail = returnMatches ? &mdbObjsReturned : pMdbObjs; // pNoMatchTail: Also known as too clever by half struct mdbObj **pNoMatchTail = returnMatches ? pMdbObjs : &mdbObjsReturned; while (mdbObjs!=NULL) { boolean match = FALSE; struct mdbObj *obj = slPopHead(&mdbObjs); char *foundVal = mdbObjFindValue(obj,var); // Case sensitive (unfortunately) if (val == NULL) match = (foundVal != NULL); // any val will match else if (foundVal) { if (wildValMatch) match = (wildMatch(val,foundVal)); else match = (sameWord(foundVal,val)); // must be same val (case insensitive) } if (match) { *pMatchTail = obj; pMatchTail = &((*pMatchTail)->next); } else { *pNoMatchTail = obj; pNoMatchTail = &((*pNoMatchTail)->next); } } return mdbObjsReturned; } struct mdbObj *mdbObjsFilterByVars(struct mdbObj **pMdbObjs,char *vars, boolean noneEqualsNotFound,boolean returnMatches) // Filters mdb objects to only those that include/exclude var=val pairs // (e.g. "var1=val1 var2 var3!=val3 var4=None"). // Supports != ("var!=" means var not found). Optionally supports var=None equal to var is not found // Returns matched or unmatched items objects as requested. // Multiple passes means sort order is destroyed. { struct mdbObj *mdbObjsMatch = *pMdbObjs; struct mdbObj *mdbObjsNoMatch = NULL; char *varsLine = cloneString(vars); int ix=0,count = chopByWhite(varsLine,NULL,0); char **var = needMem(count * sizeof(char *)); chopByWhite(varsLine,var,count); for (ix=0;ix<count;ix++) { boolean notEqual = FALSE; char *val = strchr(var[ix],'=');// list may be vars alone (var1=val1 var2 var3!=val3 ...) if (val != NULL) { notEqual = (*(val - 1) == '!'); if (notEqual) *(val - 1) = '\0'; *val = '\0'; val += 1; if (*val == '\0') val = NULL; } struct mdbObj *objNotMatching = mdbObjsFilter(&mdbObjsMatch,var[ix],val,notEqual); // 1st match on var=None, now match on var!= (var not defined) if (noneEqualsNotFound && val != NULL && sameWord(val,MDB_VAL_ENCODE_EDV_NONE)) mdbObjsMatch = slCat(mdbObjsMatch,mdbObjsFilter(&objNotMatching,var[ix],NULL,notEqual)); mdbObjsNoMatch = slCat(mdbObjsNoMatch,objNotMatching); // Multiple passes "cat" non-matching } // and destroys sort order freeMem(var); freeMem(varsLine); if (returnMatches) { *pMdbObjs = mdbObjsNoMatch; return mdbObjsMatch; } *pMdbObjs = mdbObjsMatch; return mdbObjsNoMatch; } struct mdbObj *mdbObjsFilterTablesOrFiles(struct mdbObj **pMdbObjs,boolean tables, boolean files) // Filters mdb objects to only those that have associated tables or files. // Returns removed non-table/file objects // Note: Since table/file objects overlap, there are 3 possibilites: tables, files, table && files { assert(tables || files); // Cant exclude both struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbObjsDropped = NULL; if (tables) mdbObjsDropped = mdbObjsFilter(&mdbObjs,MDB_OBJ_TYPE,MDB_OBJ_TYPE_TABLE,FALSE); if (files) { struct mdbObj *mdbObjsNoFileName = mdbObjsDropped = mdbObjsFilter(&mdbObjs,MDB_VAR_FILENAME, NULL,FALSE); if (mdbObjsNoFileName) { struct mdbObj *mdbObjsNoFileIndex = mdbObjsFilter(&mdbObjsNoFileName,MDB_VAR_FILEINDEX, NULL,FALSE); if (mdbObjsNoFileIndex) { mdbObjs = slCat(mdbObjs,mdbObjsNoFileName); mdbObjsDropped = slCat(mdbObjsDropped,mdbObjsNoFileIndex); } } } slSort(&mdbObjs, &mdbObjCmp); // Need to be returned to obj order slSort(&mdbObjsDropped,&mdbObjCmp); *pMdbObjs = mdbObjs; return mdbObjsDropped; } struct mdbObj *mdbObjIntersection(struct mdbObj **pA, struct mdbObj *b) // return objs removed from pA while making an intersection of two mdbObj lists. // List b is untouched but pA will contain the resulting intersection { struct mdbObj *mdbObj; struct hash *hashB = newHash(0); for (mdbObj = b; mdbObj != NULL; mdbObj = mdbObj->next) { hashAdd(hashB, mdbObj->obj, mdbObj); } struct mdbObj *mdbObjsDropped = NULL; struct mdbObj *mdbObjsIntersecting = NULL; struct mdbObj *mdbObjs=*pA; while (mdbObjs) { mdbObj = slPopHead(&mdbObjs); if (hashLookup(hashB, mdbObj->obj) != NULL) slAddHead(&mdbObjsIntersecting,mdbObj); else slAddHead(&mdbObjsDropped,mdbObj); } hashFree(&hashB); if (mdbObjsIntersecting) slReverse(&mdbObjsIntersecting); *pA = mdbObjsIntersecting; if (mdbObjsDropped) slReverse(&mdbObjsDropped); return mdbObjsDropped; } void mdbObjTransformToUpdate(struct mdbObj *mdbObjs, char *var, char *val,boolean deleteThis) // Turns one or more mdbObjs into the stucture needed to add/update or delete. { struct mdbObj *mdbObj = NULL; for ( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { mdbObj->deleteThis = deleteThis; if (mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); mdbVarsFree(&(mdbObj->vars)); if (var != NULL) { struct mdbVar *mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(var); if (val != NULL) mdbVar->val = cloneString(val); mdbObj->vars = mdbVar; // Only one } } } struct mdbObj *mdbObjClone(const struct mdbObj *mdbObj) // Clones a single mdbObj, including hash and maintining order { if (mdbObj == NULL) return NULL; struct mdbObj *newObj; AllocVar(newObj); if (mdbObj->obj != NULL) newObj->obj = cloneString(mdbObj->obj); newObj->deleteThis = mdbObj->deleteThis; if (mdbObj->vars != NULL) { if (mdbObj->varHash != NULL) newObj->varHash = hashNew(8); struct mdbVar *mdbVar = NULL; for (mdbVar = mdbObj->vars; mdbVar != NULL; mdbVar = mdbVar->next ) { struct mdbVar *newVar = NULL; AllocVar(newVar); if (mdbVar->var != NULL) newVar->var = cloneString(mdbVar->var); if (mdbVar->val != NULL) newVar->val = cloneString(mdbVar->val); if (newVar->var != NULL && newVar->val != NULL) hashAdd(newObj->varHash, newVar->var, newVar); // pointer to struct to resolve type slAddHead(&(newObj->vars),newVar); } slReverse(&(newObj->vars)); } return newObj; } struct slName *mdbObjToSlName(struct mdbObj *mdbObjs) // Creates slNames list of mdbObjs->obj. mdbObjs remains untouched { struct slName *mdbNames = NULL; struct mdbObj *mdbObj = mdbObjs; for (;mdbObj!=NULL; mdbObj=mdbObj->next) { slAddHead(&mdbNames,slNameNew(mdbObj->obj)); //allocates memory } slReverse(&mdbNames); return mdbNames; } // ----------------- Validation and specialty APIs ----------------- boolean mdbObjIsComposite(struct mdbObj *mdbObj) // returns TRUE if this is a valid composite object { char *objType = mdbObjFindValue(mdbObj,MDB_OBJ_TYPE); assert(objType != NULL); return sameWord(objType,MDB_OBJ_TYPE_COMPOSITE); } boolean mdbObjIsCompositeMember(struct mdbObj *mdbObj) // returns TRUE if this is a valid member of a composite. DOES not confirm that composite obj exists { char *objType = mdbObjFindValue(mdbObj,MDB_OBJ_TYPE); assert(objType != NULL); if (differentWord(objType,MDB_OBJ_TYPE_TABLE) && differentWord(objType,MDB_OBJ_TYPE_FILE)) return FALSE; return mdbObjContains(mdbObj,MDB_VAR_COMPOSITE,NULL); } int mdbObjsValidate(struct mdbObj *mdbObjs, boolean full) // Validates vars and vals against cv.ra. Returns count of errors found. // Full considers vars not defined in cv as invalids { //TODO: move CV_VALIDATE* support to cv.c and merge with validation there struct hash *termTypeHash = (struct hash *)cvTermTypeHash(); struct mdbObj *mdbObj = NULL; int invalids = 0; for ( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { struct mdbVar *mdbVar = NULL; for (mdbVar = mdbObj->vars;mdbVar != NULL;mdbVar=mdbVar->next) { struct hash *termHash = hashFindVal(termTypeHash,mdbVar->var); if (termHash == NULL) // No cv definition for term so no validation can be done { if (!full) continue; if (sameString(mdbVar->var,MDB_OBJ_TYPE) && ( sameString(mdbVar->val,MDB_OBJ_TYPE_TABLE) || sameString(mdbVar->val,MDB_OBJ_TYPE_FILE) || sameString(mdbVar->val,MDB_OBJ_TYPE_COMPOSITE))) continue; printf("INVALID %s '%s' not defined in %s: %s = %s in %s: %s\n",CV_TERM, mdbVar->var,CV_FILE_NAME,mdbVar->var,mdbVar->val,MDB_OBJ,mdbObj->obj); invalids++; continue; } char reason[256]; boolean valid = cvValidateTerm(mdbVar->var,mdbVar->val,reason,sizeof(reason)); if (!valid) { if (startsWith("ERROR in ",reason)) printf("%s\n",reason); else printf("%s in %s: %s\n",reason,MDB_OBJ,mdbObj->obj); invalids++; } } } return invalids; } static struct slName *mdbObjGetNamedEncodeEdvs(struct mdbObj *compObj) // returns NULL or the list of EDVs defined for this composite { char *edvs = mdbObjFindValue(compObj,MDB_VAR_ENCODE_EDVS); if (edvs == NULL) return NULL; edvs = cloneString(edvs); if (strchr( edvs,',') != NULL) // Tolerate delimit by commas strSwapChar(edvs,',',' '); else if (strchr(edvs,';') != NULL) // Tolerate delimit by semicolons strSwapChar(edvs,';',' '); struct slName *compositeEdvs = slNameListFromString(edvs,' '); freeMem(edvs); return compositeEdvs; } static struct mdbVar *mdbObjEncodeEdvsAsMdbVars(struct mdbObj *mdbObj,struct slName *compositeEdvs, boolean includeNone) // returns the EDVs and values for the composite member object // If includeNone, then defined variables not found in obj will be included as {var}="None". { struct mdbVar *edvVars = NULL; struct slName *var = compositeEdvs; for (;var!=NULL;var=var->next) { char *val = mdbObjFindValue(mdbObj,var->name); if (val) mdbVarAdd(&edvVars, var->name,val); else if (includeNone) { if (differentWord(var->name,ENCODE_EXP_FIELD_ORGANISM)) // Does not go into EDV's mdbVarAdd(&edvVars, var->name,MDB_VAL_ENCODE_EDV_NONE); // sent to encodeExp table } } slReverse(&edvVars); return edvVars; } struct slName *mdbObjFindCompositeNamedEncodeEdvs(struct sqlConnection *conn,char *tableName, struct mdbObj *mdbObj) // returns NULL or the Experiment Defining Variable names for this composite { if (!mdbObjIsCompositeMember(mdbObj)) return NULL; // This should be a valid composite memeber struct mdbObj *compObj = mdbObjQueryCompositeObj(conn,tableName,mdbObj); if (compObj == NULL) return NULL; struct slName *edvs = mdbObjGetNamedEncodeEdvs(compObj); mdbObjFree(&compObj); return edvs; } struct mdbVar *mdbObjFindEncodeEdvPairs(struct sqlConnection *conn,char *tableName, struct mdbObj *mdbObj,boolean includeNone) // returns NULL or the Experiment Defining Variables and values for this composite member object // If includeNone, then defined variables not found in obj will be included as {var}="None". { // In rare cases, the EDVs reside with the object and NOT in a objType=composite. struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(mdbObj); // looking locally first. if (compositeEdvs == NULL) { compositeEdvs = mdbObjFindCompositeNamedEncodeEdvs(conn,tableName,mdbObj); if (compositeEdvs == NULL) return NULL; } return mdbObjEncodeEdvsAsMdbVars(mdbObj,compositeEdvs,includeNone); } struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName, char *expTable, struct mdbObj **pMdbObjs,int warn, boolean createExpIfNecessary,boolean updateAccession) // Organizes objects into experiments and validates experiment IDs. // Will add/update the ids in the structures. // If warn=1, then prints to stdout all the experiments/obs with missing or wrong expIds; // warn=2, then print line for each obj with expId or warning. // createExpIfNecessary means add expId to encodeExp table. updateAccession too if necessary. // Returns a new set of mdbObjs that is what can (and should) // be used to update the mdb via mdbObjsSetToDb(). { // Here is what "experimentify" does from "mdbPrint -encodeExp" and "mdbUpdate -encodeExp": // - Uses normal selection methods to get a set of objects (e.g. one composite worth) // or all objs. (in mdbPrint and mdbUpdate) // - This API: // - Breaks up and walks through set of objects composite by composite // - Looks up EDVs (Experiment Defining Variables) for composite. // These are defined in the mdb under objType=composite expVars= // - Breaks up and walks through composite objects exp by exp as defined by EDVs // - Uses encodeExp API to determine what expId should be. // - Creates new mdbObjs list of updates needed to put expId and dccAccession into the mdb. // - From "mdbPrint", this API warns of mismatches or missing expIds // - From "mdbUpdate" (not -test) then that utility will update the mdb from this API's // return structs. If -test, will reveal what would be updated. if (pMdbObjs == NULL || *pMdbObjs == NULL) return 0; struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbProcessedObs = NULL; struct mdbObj *mdbUpdateObjs = NULL; if (expTable == NULL) expTable = ENCODE_EXP_TABLE; verbose(2, "mdbObjsEncodeExperimentify() beginning for %d objects.\n",slCount(*pMdbObjs)); // Sort all objects by composite, so that we handle composite by composite mdbObjsSortOnVars(&mdbObjs, MDB_VAR_COMPOSITE); struct dyString *dyVars = dyStringNew(256); while (mdbObjs != NULL) { // Work on a composite at a time boolean compositelessObj = FALSE; char *compName = NULL; while (mdbObjs != NULL && compName == NULL) { compName = mdbObjFindValue(mdbObjs,MDB_VAR_COMPOSITE); if (compName == NULL) { if (mdbObjFindValue(mdbObjs,MDB_VAR_ENCODE_EDVS) == NULL) { verbose(1, "Object '%s' has no %s or %s defined.\n", mdbObjs->obj,MDB_VAR_COMPOSITE,MDB_VAR_ENCODE_EDVS); mdbProcessedObs = slCat(mdbProcessedObs,slPopHead(&mdbObjs)); continue; } verbose(2, "mdbObjsEncodeExperimentify() starting on compositeless set.\n"); break; } } struct mdbObj *mdbCompositeObjs = NULL; if (compName != NULL) mdbCompositeObjs = mdbObjsFilter(&mdbObjs, MDB_VAR_COMPOSITE, compName,TRUE); else mdbCompositeObjs = slPopHead(&mdbObjs); // Rare cases there is no composite set. assert(mdbCompositeObjs != NULL); // --- At this point we have nibbled off a composite worth of objs from the full set of objects // Find the composite obj if it exists struct mdbObj *compObj = NULL; if (compName != NULL) { compObj =mdbObjsFilter(&mdbCompositeObjs, MDB_OBJ_TYPE, MDB_OBJ_TYPE_COMPOSITE,TRUE); if (compObj == NULL) // May be NULL if mdbObjs passed in was produced by { // too narrow of selection criteria compObj = mdbObjQueryCompositeObj(conn,tableName,mdbCompositeObjs); // 1st obj will do if (compObj == NULL) // This should be assertable { verbose(1, "Composite '%s' has not been defined.\n",compName); mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs); mdbCompositeObjs = NULL; continue; } } else slAddHead(&mdbProcessedObs,compObj); // We can still use the pointer, but will not "process" it. NOTE: leak the queried one } else { compObj = mdbCompositeObjs; // Should be only one compName = mdbCompositeObjs->obj; compositelessObj = TRUE; } verbose(2, "mdbObjsEncodeExperimentify() working on %s %s%s.\n", compName,MDB_VAR_COMPOSITE,(compositelessObj?"less set":"")); // Obtain experiment defining variables for the composite (or compositeless obj) struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(compObj); if (compositeEdvs == NULL) { verbose(1, "There are no experiment defining variables established for this %s%s. " "Add them to obj %s => var:%s.\n", MDB_VAR_COMPOSITE, (compositelessObj?"less set":""), compName,MDB_VAR_ENCODE_EDVS); mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs); mdbCompositeObjs = NULL; continue; } dyStringClear(dyVars); dyStringAppend(dyVars,slNameListToString(compositeEdvs, ' ')); if (warn > 0) printf("Composite%s '%s' with %d objects has %d EDVs(%s): [%s].\n", (compositelessObj?"less set":""),compName,slCount(mdbCompositeObjs), slCount(compositeEdvs),MDB_VAR_ENCODE_EDVS,dyStringContents(dyVars));// Set the stage // Organize composite objs by EDVs dyStringPrintf(dyVars, " %s %s ",MDB_VAR_VIEW,MDB_VAR_REPLICATE);// Allows for nicer sorted list char *edvSortOrder = cloneString(dyStringContents(dyVars)); // Walk through objs for an exp as defined by EDVs int expCount=0; // Count of experiments in composite int expMissing=0; // Count of objects with missing expId int accMissing=0; // Count of objects with missing accessions int expObjsCount=0; // Total of all experimental object accoss the composite int expMax=0; // Largest experiment (in number of objects) int expMin=999; // Smallest experiment (in number of objects) while (mdbCompositeObjs != NULL) { // Must sort each cycle, because sort order is lost during mdbObjs FilterByVars(); mdbObjsSortOnVars(&mdbCompositeObjs, edvSortOrder); // Get the EDVs for the first obj struct mdbVar *edvVarVals = mdbObjEncodeEdvsAsMdbVars(mdbCompositeObjs,compositeEdvs,TRUE); // use first obj on list and include Nones if (edvVarVals == NULL) { verbose(1, "There are no experiment defining variables for this object '%s'.\n", mdbCompositeObjs->obj); slAddHead(&mdbProcessedObs,slPopHead(&mdbCompositeObjs)); // We're done with this one continue; } // Construct the var=val string for filtering a single exp (set of objs) // from composite worth of objs dyStringClear(dyVars); struct mdbVar *edvVar = edvVarVals; int valsFound = 0; for (;edvVar!=NULL;edvVar=edvVar->next) { dyStringPrintf(dyVars,"%s=%s ",edvVar->var,edvVar->val); if (differentString(edvVar->val,MDB_VAL_ENCODE_EDV_NONE)) valsFound++; } dyStringContents(dyVars)[dyStringLen(dyVars) -1] = '\0'; // Nicer printing is all if (valsFound == 0) { verbose(1, "There are no experiment defining variables for this object '%s'.\n", mdbCompositeObjs->obj); slAddHead(&mdbProcessedObs,slPopHead(&mdbCompositeObjs)); // We're done with this one mdbVarsFree(&edvVarVals); continue; } // Work on one experiment at a time verbose(2, "mdbObjsEncodeExperimentify() working on EDVs: %s.\n",dyStringContents(dyVars)); struct mdbObj *mdbExpObjs = mdbObjsFilterByVars(&mdbCompositeObjs,dyStringContents(dyVars), TRUE,TRUE); // None={notFound} // --- At this point we have nibbled off an experiment worth of objects from the composite int objsInExp = slCount(mdbExpObjs); assert(objsInExp > 0); expCount++; expObjsCount += objsInExp; // Total of all experimental objects across the composite // Look up each exp in EXPERIMENTS_TABLE char experimentId[128]; int expId = ENCODE_EXP_IX_UNDEFINED; struct encodeExp *exp = encodeExpGetByMdbVarsFromTable(db, edvVarVals, expTable); // --------- BLOCK creation of expIds, at least during rollout of encodeExp // BLOCKED if (exp == NULL && createExpIfNecessary) // BLOCKED exp = encodeExpGetOrCreateByMdbVarsFromTable(db, edvVarVals, expTable); // --------- BLOCK creation of expIds, at least during rollout of encodeExp mdbVarsFree(&edvVarVals); // No longer needed // Make sure the accession is set if requested. if (createExpIfNecessary && updateAccession && exp != NULL && exp->ix != ENCODE_EXP_IX_UNDEFINED && exp->accession == NULL) encodeExpSetAccession(exp, expTable); if (exp != NULL) expId = exp->ix; if (expId == ENCODE_EXP_IX_UNDEFINED) { safef(experimentId,sizeof(experimentId),"{missing}"); if (warn > 0) printf("Experiment %s EDV: [%s] is not defined in %s.%s table.\n", experimentId,dyStringContents(dyVars), ENCODE_EXP_DATABASE, expTable); //printf("Experiment %s EDV: [%s] is not defined in %s table. Remaining:%d " // "and %d\n",experimentId,dyStringContents(dyVars),EXPERIMENTS_TABLE, // slCount(mdbCompositeObjs),slCount(mdbObjs)); if (warn < 2) // From mdbUpdate (warn=1), just interested in testing waters. { // From mdbPrint (warn=2) list all objs in exp. expMissing += slCount(mdbExpObjs); mdbProcessedObs = slCat(mdbProcessedObs,mdbExpObjs); mdbExpObjs = NULL; encodeExpFree(&exp); continue; } } else { safef(experimentId,sizeof(experimentId),"%d",expId); if (warn > 0) printf("Experiment %s has %d objects based upon %d EDVs: [%s].\n", experimentId,slCount(mdbExpObjs),valsFound,dyStringContents(dyVars)); } // Now we can walk through each obj in experiment and determine if it has the correct expId int foundId = FALSE; int errors = objsInExp; if (expMax < objsInExp) expMax = objsInExp; if (expMin > objsInExp) expMin = objsInExp; while (mdbExpObjs != NULL) { struct mdbObj *obj = slPopHead(&mdbExpObjs); { // NOTE: This list could expand but we expect only // tables and files to be objs in an experiment char *objType = mdbObjFindValue(obj,MDB_OBJ_TYPE); assert( objType != NULL && ( sameString(objType,MDB_OBJ_TYPE_TABLE) || sameString(objType,MDB_OBJ_TYPE_FILE))); } boolean updateObj = FALSE; char *val = mdbObjFindValue(obj,MDB_VAR_ENCODE_EXP_ID); if (val != NULL) { foundId = TRUE; // warn==1 will give only 1 exp wide error if no individual errors. // NOTE: would be nice if those with expId sorted to beginning, // but can't have everything. int thisId = atoi(val); if (expId == ENCODE_EXP_IX_UNDEFINED || thisId != expId) { updateObj = TRUE; // Always an error! expMissing++; printf(" ERROR %s %-60s has bad %s=%s.\n", experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID,val); } else { char *acc = mdbObjFindValue(obj,MDB_VAR_DCC_ACCESSION); if (updateAccession && !createExpIfNecessary && exp->accession == NULL) { // -test so one wasn't created exp->accession = needMem(16); safef(exp->accession, 16, "TEMP%06d", exp->ix); // TEMP since this is not an update but we want -test to work. } if (exp->accession != NULL && (acc == NULL || differentString(acc,exp->accession))) { if (updateAccession) updateObj = TRUE; accMissing++; if (acc != NULL) // Always an error printf(" ERROR %s %-60s %s set, has wrong %s: %s.\n", experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID, MDB_VAR_DCC_ACCESSION,acc); else if (warn > 1) // NOTE: Could give more info as per wrangler's desires printf(" %s %-60s %s set, needs %s.\n", experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID, MDB_VAR_DCC_ACCESSION); } else { errors--; // One less error if (warn > 1) // NOTE: Could give more info as per wrangler's desires printf(" %s %-60s %s\n", experimentId,obj->obj, (exp->accession != NULL ? exp->accession : "")); } } } else { if (expId != ENCODE_EXP_IX_UNDEFINED) { updateObj = TRUE; expMissing++; } if ((foundId && warn > 0) || warn > 1) { if (updateObj) printf(" %s %-60s needs updating to mdb.\n", experimentId,obj->obj); else printf(" %s %s\n",experimentId,obj->obj); // missing } } // This object needs to be updated. if (updateObj) { mdbObjSetVarInt(obj,MDB_VAR_ENCODE_EXP_ID,expId); struct mdbObj *newObj = mdbObjCreate(obj->obj,MDB_VAR_ENCODE_EXP_ID, experimentId); if (updateAccession && exp != NULL && exp->accession != NULL) mdbObjSetVar(newObj,MDB_VAR_DCC_ACCESSION,exp->accession); slAddHead(&mdbUpdateObjs,newObj); } slAddHead(&mdbProcessedObs,obj); } // Done with one experiment encodeExpFree(&exp); if (!foundId && errors > 0 && warn > 0) printf(" %s all %d objects are missing an %s.\n", experimentId,objsInExp,MDB_VAR_ENCODE_EXP_ID); } // Done with one composite if (expCount > 0) { printf("Composite%s '%s' has %d recognizable experiment%s with %d objects needing %s", (compositelessObj?"less set":""),compName,expCount,(expCount != 1?"s":""), expMissing,MDB_VAR_ENCODE_EXP_ID); if (accMissing > 0) printf(" and %d objects needing %s",accMissing,MDB_VAR_DCC_ACCESSION); printf(" updated.\n objects/experiment: min:%d max:%d mean:%lf.\n", expMin,expMax,((double)expObjsCount/expCount)); } if (edvSortOrder != NULL) freeMem(edvSortOrder); slNameFreeList(compositeEdvs); } // Done with all composites dyStringFree(&dyVars); *pMdbObjs = mdbProcessedObs; return mdbUpdateObjs; } boolean mdbObjIsEncode(struct mdbObj *mdb) // Return true if this metaDb object is for ENCODE { return mdbObjContains(mdb, MDB_VAR_PROJECT, MDB_VAL_ENCODE_PROJECT); // Could be more stringent: //return ( mdbObjContains(mdbObj, MDB_VAR_LAB, NULL) // && mdbObjContains(mdbObj, MDB_VAR_DATATYPE, NULL) // && mdbObjContains(mdbObj, MDB_VAR_ENCODE_SUBID,NULL)); } boolean mdbObjEncodeIsUnrestricted(struct mdbObj *mdb) // Return true if this object is still within data restriction time period { char *dateUnrestricted = mdbObjFindValue(mdb, MDB_VAR_ENCODE_DATE_UNRESTRICTED); if (dateUnrestricted == NULL) return TRUE; return (dateIsOld(dateUnrestricted, MDB_ENCODE_DATE_FORMAT)); } boolean mdbObjInComposite(struct mdbObj *mdb, char *composite) // Return true if metaDb object is in specified composite. // If composite is NULL, always return true { if (composite == NULL || sameOk(composite, mdbObjFindValue(mdb, MDB_VAR_COMPOSITE))) return TRUE; return FALSE; } // --------------- Free at last ---------------- void mdbObjsFree(struct mdbObj **mdbObjsPtr) // Frees one or more metadata objects and any contained mdbVars. Will free any hashes as well. { if (mdbObjsPtr != NULL && *mdbObjsPtr != NULL) { // free all roots struct mdbObj *mdbObj = NULL; while ((mdbObj = slPopHead(mdbObjsPtr)) != NULL) { // Free hash first (shared memory) hashFree(&(mdbObj->varHash)); // free all leaves mdbVarsFree(&(mdbObj->vars)); // The rest of root freeMem(mdbObj->obj); freeMem(mdbObj); } freez(mdbObjsPtr); } } void mdbVarsFree(struct mdbVar **mdbVarsPtr) // Frees one or more metadata vars and any val as well { struct mdbVar *mdbVar = NULL; while ((mdbVar = slPopHead(mdbVarsPtr)) != NULL) { freeMem(mdbVar->val); freeMem(mdbVar->var); freez(&mdbVar); } } void mdbByVarsFree(struct mdbByVar **mdbByVarsPtr) // Frees one or more metadata vars and any contained vals and objs. Will free any hashes as well. { if (mdbByVarsPtr != NULL && *mdbByVarsPtr != NULL) { // free all roots struct mdbByVar *rootVar = NULL; while ((rootVar = slPopHead(mdbByVarsPtr)) != NULL) { // Free hash first (shared memory) hashFree(&(rootVar->valHash)); // free all limbs struct mdbLimbVal *limbVal = NULL; while ((limbVal = slPopHead(&(rootVar->vals))) != NULL) mdbLimbValFree(&limbVal); // The rest of root if (rootVar->var) freeMem(rootVar->var); freeMem(rootVar); } freez(mdbByVarsPtr); } } // ----------------- CGI specific routines for use with tdb ----------------- #define MDB_NOT_FOUND ((struct mdbObj *)-666) #define METADATA_NOT_FOUND ((struct mdbObj *)-999) struct mdbObj *metadataForTableFromTdb(struct trackDb *tdb) // Returns the metadata for a table from a tdb setting. { char *setting = trackDbSetting(tdb, MDB_METADATA_KEY); if (setting == NULL) return NULL; struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(tdb->table?tdb->table:tdb->track); AllocVar(mdbObj->vars); mdbObj->vars->var = cloneString(MDB_OBJ_TYPE); mdbObj->vars->val = cloneString(MDB_OBJ_TYPE_TABLE); mdbObj->varHash = hashNew(8); hashAdd(mdbObj->varHash, mdbObj->vars->var, mdbObj->vars); mdbObj = mdbObjAddVarPairs(mdbObj,setting); mdbObjRemoveVars(mdbObj,MDB_VAR_TABLENAME); // NOTE: Special hint that the tdb metadata return mdbObj; // is used since no mdb metadata is found } const struct mdbObj *metadataForTable(char *db,struct trackDb *tdb,char *table) // Returns the metadata for a table. NEVER FREE THIS STRUCT! { struct mdbObj *mdbObj = NULL; if (isHubTrack(tdb->track) || isHubTrack(db)) // check is track is on a hub, or if database is hub return metadataForTableFromTdb(tdb); // FIXME: metadata setting in TDB soon to be obsolete // See of the mdbObj was already built if (tdb != NULL) { mdbObj = tdbExtrasMdb(tdb); if (mdbObj == METADATA_NOT_FOUND) // NOT in mtatbl, not in tdb metadata setting! return NULL; else if (mdbObj == MDB_NOT_FOUND) // looked mdb already and not found! return metadataForTableFromTdb(tdb); else if (mdbObj != NULL) { return mdbObj; // No reason to query the table again! } } struct sqlConnection *conn = hAllocConn(db); char *mdb = mdbTableName(conn,TRUE); // Look for sandbox name first if(tdb != NULL && tdb->table != NULL) table = tdb->table; if (mdb != NULL) mdbObj = mdbObjQueryByObj(conn,mdb,table,NULL); hFreeConn(&conn); // save the mdbObj for next time if (tdb) { if (mdbObj != NULL) tdbExtrasMdbSet(tdb,mdbObj); else { tdbExtrasMdbSet(tdb,MDB_NOT_FOUND); return metadataForTableFromTdb(tdb); // FIXME: metadata setting in TDB soon to be obsolete } } return mdbObj; } const char *metadataFindValue(struct trackDb *tdb, char *var) // Finds the val associated with the var or retruns NULL { struct mdbObj *mdbObj = tdbExtrasMdb(tdb); if (mdbObj == MDB_NOT_FOUND) // Note, only we if already looked for mdb (which requires db) mdbObj = metadataForTableFromTdb(tdb); if (mdbObj == NULL || mdbObj == METADATA_NOT_FOUND) return NULL; return mdbObjFindValue(mdbObj,var); } struct mdbObj *mdbObjSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit) // Search the metaDb table for objs by var and val. // Can restrict by op "is", "like", "in" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. { if (var == NULL && val == NULL) errAbort("mdbObjSearch requests objects but provides no criteria.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first // Build a query string struct dyString *dyQuery = dyStringNew(512); sqlDyStringPrintf(dyQuery,"select l1.obj, l1.var, l1.val from %s l1",tableName); if (var != NULL || val != NULL) sqlDyStringPrintf(dyQuery," JOIN %s l2 ON l2.obj = l1.obj and ", tableName); if (var != NULL) sqlDyStringPrintf(dyQuery,"l2.var = '%s'", var); if (var != NULL && val != NULL) dyStringAppend(dyQuery," and "); if (val != NULL) { dyStringAppend(dyQuery,"l2.val "); if (sameString(op, "in")) dyStringPrintf(dyQuery,"in (%s)", val); // Note, must be a formatted string already: 'a','b','c' or 1,2,3 else if (sameString(op, "contains") || sameString(op, "like")) sqlDyStringPrintf(dyQuery,"like '%%%s%%'", val); else if (limit > 0 && strlen(val) != limit) sqlDyStringPrintf(dyQuery,"like '%.*s%%'", limit, val); else sqlDyStringPrintf(dyQuery,"= '%s'", val); } verbose(2, "Requesting mdbObjSearch query:\n\t%s;\n",dyStringContents(dyQuery)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dyQuery)); verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); slSort(&mdb,mdbCmp); // Use internal sort instead of ORDER BY because of mysql inefficiency struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE); return mdbObjs; } struct mdbObj *mdbObjRepeatedSearch(struct sqlConnection *conn,struct slPair *varValPairs, boolean tables,boolean files) // Search the metaDb table for objs by var,val pairs. Uses mdbCvSearchMethod() if available. // This method will use mdbObjsQueryByVars() { struct slPair *onePair; struct dyString *dyTerms = dyStringNew(256); // Build list of terms as "var1=val1 var2=val2a,val2b,val2c var3=%val3%" for (onePair = varValPairs; onePair != NULL; onePair = onePair->next) { if (isEmpty(((char *)(onePair->val)))) // NOTE: All the parens are needed to get the macro continue; // to do the right thing enum cvSearchable searchBy = cvSearchMethod(onePair->name); if (searchBy == cvSearchByMultiSelect // multiSelect val will be filled with || searchBy == cvSearchBySingleSelect // comma delimited list || searchBy == cvSearchByWildList) { if (strchr((char *)onePair->val,' ')) dyStringPrintf(dyTerms,"%s=\"%s\" ",onePair->name,(char *)onePair->val); else dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val); } else if (searchBy == cvSearchByFreeText) // If select is by free text then like dyStringPrintf(dyTerms,"%s=\"%%%s%%\" ",onePair->name,(char *)onePair->val); else if (sameWord(onePair->name,MDB_VAR_COMPOSITE)) { // special case. Not directly searchable by UI but indirectly and will show up here. dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val); } else if (searchBy == cvSearchByDateRange || searchBy == cvSearchByIntegerRange) { // TO BE IMPLEMENTED } } // Be sure to include table or file in selections if (tables) dyStringPrintf(dyTerms,"%s=%s ",MDB_OBJ_TYPE,MDB_OBJ_TYPE_TABLE); if (files) dyStringPrintf(dyTerms,"%s=? ",MDB_VAR_FILENAME); // Build the mdbByVals struct and then select all mdbObjs in one query struct mdbObj *mdbObjs = mdbObjsQueryByVarVals(conn,dyStringContents(dyTerms)); dyStringFree(&dyTerms); return mdbObjs; } struct slName *mdbObjNameSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files) // Search the metaDb table for objs by var and val. // Can restrict by op "is", "like", "in" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. { // Note: This proves faster than getting mdbObjs then converting to slNames struct mdbObj *mdbObjs = mdbObjSearch(conn,var,val,op,limit); // May only be interested in tables or files: if (tables || files) { struct mdbObj *mdbObjsDropped = mdbObjsFilterTablesOrFiles(&mdbObjs,tables,files); mdbObjsFree(&mdbObjsDropped); } struct slName *mdbNames = mdbObjToSlName(mdbObjs); mdbObjsFree(&mdbObjs); return mdbNames; } static void mdbSearchableQueryRestictForTablesOrFiles(struct dyString *dyQuery,char *tableName, char letter, boolean hasTableName, boolean hasFileName) // Append table and file restrictions onto an mdb query. // letter (e.g. 'A') should be used in original query to alias table name: // "select A.val from metaDb A where A.val = 'fred'". { // A note about tables and files: objType=table may have fileNames associated, // but objType=file will not have tableNames // While objType=table should have a var=tableName, this is redundant because the obj=tableName // So the lopsided 'JOIN' queries below are meant to be the most flexible/efficient assert(isalpha(letter) && isalpha(letter + 2)); // will need one or two sub-queries. char nextLtr = letter + 1; // We are only searching for objects that are of objType table or file. // objType=composite are not search targets! if (hasTableName && !hasFileName) { // objType=table may have fileNames associated, but objType=file will not have tableNames sqlDyStringPrintf(dyQuery," JOIN %s %c ON %c.obj = %c.obj and " "%c.var='objType' and %c.val = '%s'",tableName,nextLtr,nextLtr, letter,nextLtr,nextLtr,MDB_OBJ_TYPE_TABLE); } else // tables OR files (but not objType=composite) { sqlDyStringPrintf(dyQuery," JOIN %s %c ON %c.obj = %c.obj and " "%c.var='objType' and %c.val in ('%s','%s')",tableName,nextLtr, nextLtr,letter,nextLtr,nextLtr,MDB_OBJ_TYPE_TABLE,MDB_OBJ_TYPE_FILE); } nextLtr++; // last of 3 possibilites objType either table or file but must have fileName var if (!hasTableName && hasFileName) sqlDyStringPrintf(dyQuery," JOIN %s %c ON %c.obj = %c.obj and " "%c.var in ('%s','%s')",tableName,nextLtr,nextLtr,letter, nextLtr,MDB_VAR_FILENAME,MDB_VAR_FILEINDEX); } struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean hasTableName, boolean hasFileName) // Search the metaDb table for vals by var. // Can impose (non-zero) limit on returned string size of val // Search is via mysql, so it's case-insensitive. Return is sorted on val. // Searchable vars are only for table or file objects. // Further restrict to vars associated with tableName, fileName or both. { // TODO: Change this to use normal mdb struct routines? struct slName *retVal; if (!hasTableName && !hasFileName) errAbort("mdbValSearch requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first char letter = 'A'; struct dyString *dyQuery = dyStringNew(512); if (limit > 0) sqlDyStringPrintf(dyQuery,"select distinct LEFT(%c.val,%d)",letter,limit); else sqlDyStringPrintf(dyQuery,"select distinct %c.val",letter); sqlDyStringPrintf(dyQuery," from %s %c",tableName,letter); mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName); // Need to append 'where' AFTER qualifying joins! sqlDyStringPrintf(dyQuery, " where %c.var='%s'", letter,var); retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery)); slNameSortCase(&retVal); return retVal; } struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags, boolean hasTableName, boolean hasFileName) // Search the metaDb table for vals by var and returns val (as pair->name) // and controlled vocabulary (cv) label (if it exists) (as pair->val). // Can impose (non-zero) limit on returned string size of name. // Searchable vars are only for table or file objects. // Further restrict to vars associated with tableName, fileName or both. // Return is case insensitive sorted on label (cv label or else val). // If requested, return cv tag instead of mdb val. { if (!hasTableName && !hasFileName) errAbort("mdbValLabelSearch requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct hash *varHash = (struct hash *)cvTermHash(var); boolean isCvDefined = cvTermIsCvDefined(var); struct slName *vals = NULL; struct dyString *dyQuery = dyStringNew(512); // If term is cv defined, no need to query for a list if (isCvDefined) { assert(varHash != NULL); struct hashCookie varCookie = hashFirst(varHash); char *val; while ((val = hashNextName(&varCookie)) != NULL) slNameAddHead(&vals,val); } else { // simple query of vals which doesn't enforce table/file restrictions sqlDyStringPrintf(dyQuery,"select val from %s where var='%s'",tableName,var); vals = sqlQuickList(conn, dyStringContents(dyQuery)); } slNameSort(&vals); // Will filter results to enforce table/file restrictions // New mysql 5.6 is inefficient if the filtering is applied to the query above char letter = 'A'; dyStringClear(dyQuery); sqlDyStringPrintf(dyQuery,"select 1 from %s %c",tableName,letter); mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName,letter, hasTableName, hasFileName); sqlDyStringPrintf(dyQuery, " where %c.var='%s' and %c.val = ",letter,var,letter); int queryLengthBeforeVal = dyStringLen(dyQuery); struct slPair *pairs = NULL; char *lastVal = ""; struct slName *oneVal = vals; for ( ; oneVal != NULL; oneVal = oneVal->next) { char *val = oneVal->name; if (limit > 0) { if (sameStringN(val,lastVal,limit)) continue; } else if (!isCvDefined && sameString(val,lastVal))// query 'distinct' is slower in mysql 5.6 continue; lastVal = val; // Filter to enforce file/table restrictions dyStringResize(dyQuery,queryLengthBeforeVal); sqlDyStringPrintf(dyQuery, "'%s' limit 1", val); if (sqlQuickNum(conn, dyStringContents(dyQuery)) != 1) continue; char *label = NULL; if (varHash != NULL) { struct hash *valHash = hashFindVal(varHash,val); if (valHash != NULL) { label = cloneString(hashOptionalVal(valHash,CV_LABEL,val)); if (tags) { char *tag = hashFindVal(valHash,CV_TAG); if (tag != NULL) val = tag; } } } if (label == NULL) { if (limit > 0) label = cloneStringZ(val,limit); else label = cloneString(val); } label = strSwapChar(label,'_',' '); // vestigial _ meaning space slPairAdd(&pairs,val,label); } slNameFreeList(&vals); dyStringFree(&dyQuery); if (slCount(pairs) > 0) { // should have a list sorted on the label enum cvDataType eCvDataType = cvDataType(var); if (eCvDataType == cvInteger) slPairValAtoiSort(&pairs); else slPairValSortCase(&pairs); } return pairs; } struct slPair *mdbVarsSearchable(struct sqlConnection *conn, boolean hasTableName, boolean hasFileName) // returns a white list of mdb vars that actually exist in the current DB. // Searchable vars are only for table or file objects. // Further restrict to vars associated with tableName, fileName or both. { if (!hasTableName && !hasFileName) errAbort("mdbVarsSearchable requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first // Start with what cv says is searchable struct slPair *cvApproved = cvWhiteList(TRUE,FALSE); assert(cvApproved != NULL); // Now construct single select to find all vars in the mdb // (belonging to table and/or file objs) // Note the second selected column which is only to there to make sqlQuckHash happy char letter = 'A'; struct dyString *dyQuery = dyStringNew(256); sqlDyStringPrintf(dyQuery, "select 1 from %s %c",tableName,letter); mdbSearchableQueryRestictForTablesOrFiles(dyQuery,tableName, letter, hasTableName, hasFileName); // Need to append 'where' AFTER qualifying joins! sqlDyStringPrintf(dyQuery, " where %c.var = ", letter); int queryLengthBeforeVar = dyStringLen(dyQuery); // Filter cv approved by eliminating those vars not currently in the mdb. struct slPair *relevant = NULL; while (cvApproved != NULL) { struct slPair *oneVar = slPopHead(&cvApproved); dyStringResize(dyQuery,queryLengthBeforeVar); sqlDyStringPrintf(dyQuery, "'%s' limit 1", oneVar->name); if (sqlQuickNum(conn, dyStringContents(dyQuery)) == 1) slAddHead(&relevant, oneVar); else slPairFree(&oneVar); } dyStringFree(&dyQuery); slReverse(&relevant); return relevant; }