8b6cd76349b34c42eb04ef082f72926337b3501a tdreszer Thu Jan 27 17:17:02 2011 -0800 Made mdbUpdate -recreate faster by doing minimal error checking and loading with all insert statements. diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 71e52d6..6baeb4f 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -1,2578 +1,2638 @@ /* mdb.c was originally generated by the autoSql program, which also * generated mdb.h and mdb.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "hdb.h" #include "cheapcgi.h" #include "hui.h" #include "mdb.h" static char const rcsid[] = "$Id: mdb.c,v 1.8 2010/06/11 17:11:28 tdreszer Exp $"; void mdbStaticLoad(char **row, struct mdb *ret) /* Load a row from mdb table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->obj = row[0]; ret->var = row[1]; ret->varType = row[2]; ret->val = row[3]; } struct mdb *mdbLoadByQuery(struct sqlConnection *conn, char *query) /* Load all mdb from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with mdbFreeList(). */ { struct mdb *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = mdbLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void mdbSaveToDb(struct sqlConnection *conn, struct mdb *el, char *tableName, int updateSize) /* Save mdb as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. Note that strings must be escaped to allow insertion into the database. * For example "autosql's features include" --> "autosql\'s features include" * If worried about this use mdbSaveToDbEscaped() */ { struct dyString *update = newDyString(updateSize); dyStringPrintf(update, "insert into %s values ( '%s','%s','%s',%s)", tableName, el->obj, el->var, el->varType, el->val); sqlUpdate(conn, update->string); freeDyString(&update); } void mdbSaveToDbEscaped(struct sqlConnection *conn, struct mdb *el, char *tableName, int updateSize) /* Save mdb as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size. * of a string that would contain the entire query. Automatically * escapes all simple strings (not arrays of string) but may be slower than mdbSaveToDb(). * For example automatically copies and converts: * "autosql's features include" --> "autosql\'s features include" * before inserting into database. */ { struct dyString *update = newDyString(updateSize); char *obj, *var, *varType, *val; obj = sqlEscapeString(el->obj); var = sqlEscapeString(el->var); varType = sqlEscapeString(el->varType); val = sqlEscapeString(el->val); dyStringPrintf(update, "insert into %s values ( '%s','%s','%s','%s')", tableName, obj, var, varType, val); sqlUpdate(conn, update->string); freeDyString(&update); freez(&obj); freez(&var); freez(&varType); freez(&val); } struct mdb *mdbLoad(char **row) /* Load a mdb from row fetched with select * from mdb * from database. Dispose of this with mdbFree(). */ { struct mdb *ret; AllocVar(ret); ret->obj = cloneString(row[0]); ret->var = cloneString(row[1]); ret->varType = cloneString(row[2]); ret->val = cloneString(row[3]); return ret; } struct mdb *mdbLoadAll(char *fileName) /* Load all mdb from a whitespace-separated file. * Dispose of this with mdbFreeList(). */ { struct mdb *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[4]; while (lineFileRow(lf, row)) { el = mdbLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct mdb *mdbLoadAllByChar(char *fileName, char chopper) /* Load all mdb from a chopper separated file. * Dispose of this with mdbFreeList(). */ { struct mdb *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[4]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = mdbLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct mdb *mdbCommaIn(char **pS, struct mdb *ret) /* Create a mdb out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new mdb */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->obj = sqlStringComma(&s); ret->var = sqlStringComma(&s); ret->varType = sqlStringComma(&s); ret->val = sqlStringComma(&s); *pS = s; return ret; } void mdbFree(struct mdb **pEl) /* Free a single dynamically allocated mdb such as created * with mdbLoad(). */ { struct mdb *el; if ((el = *pEl) == NULL) return; freeMem(el->obj); freeMem(el->var); freeMem(el->varType); freeMem(el->val); freez(pEl); } void mdbFreeList(struct mdb **pList) /* Free a list of dynamically allocated mdb's */ { struct mdb *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; mdbFree(&el); } *pList = NULL; } void mdbOutput(struct mdb *el, FILE *f, char sep, char lastSep) /* Print out mdb. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->obj); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->var); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->varType); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->val); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void mdbJsonOutput(struct mdb *el, FILE *f) /* Print out mdb in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"obj"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->obj); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"var"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->var); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"varType"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->varType); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"val"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->val); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ #include "ra.h" #include "hgConfig.h" #include "obscure.h" #define MDB_METADATA_KEY "metadata" #define MDB_METAOBJ_RAKEY "metaObject" #define MDB_METAVAR_RAKEY "metaVariable" #define MDB_OBJ_TYPE "objType" // ------- (static) convert from autoSql ------- static void mdbVarFree(struct mdbVar **mdbVarPtr) // Frees a single mdbVar struct { freeMem((*mdbVarPtr)->val); freeMem((*mdbVarPtr)->var); freez(mdbVarPtr); } static void mdbLeafObjFree(struct mdbLeafObj **leafObjPtr) // Frees a single mdbVar struct { freeMem((*leafObjPtr)->obj); freez(leafObjPtr); } static void mdbLimbValFree(struct mdbLimbVal **limbValPtr) // Frees a single mdbVar struct { struct mdbLimbVal *limbVal = *limbValPtr; // Free hash first (shared memory) hashFree(&(limbVal->objHash)); struct mdbLeafObj *leafObj = NULL; while((leafObj = slPopHead(&(limbVal->objs))) != NULL) mdbLeafObjFree(&leafObj); freeMem(limbVal->val); freez(limbValPtr); } static struct mdbObj *mdbObjsLoadFromMemory(struct mdb **mdbPtr,boolean buildHashes) // Load all mdbObjs from in memory mdb struct, cannibalize strings. Expects sorted order. { struct mdbObj *mdbObj = NULL; struct mdbObj *mdbObjs = NULL; struct mdbVar *mdbVar; struct mdb *thisRow; while((thisRow = slPopHead(mdbPtr)) != NULL) { if (mdbObj == NULL || differentString(thisRow->obj,mdbObj->obj) ) { // Finish last object before starting next! if(mdbObj!= NULL) slReverse(&(mdbObjs->vars)); // Start new object AllocVar(mdbObj); mdbObj->obj = thisRow->obj; if ( buildHashes ) mdbObj->varHash = hashNew(0); slAddHead(&mdbObjs,mdbObj); } else { freeMem(thisRow->obj); // Already got this from prev row } AllocVar(mdbVar); mdbVar->var = thisRow->var; mdbVar->varType = mdbVarTypeStringToEnum(thisRow->varType); mdbVar->val = thisRow->val; slAddHead(&(mdbObj->vars),mdbVar); if ( buildHashes ) hashAddUnique(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type freeMem(thisRow); } // Finish very last object if(mdbObjs && mdbObjs->vars) slReverse(&(mdbObjs->vars)); if(mdbObjs) slReverse(&mdbObjs); return mdbObjs; } static struct mdbByVar *mdbByVarsLoadFromMemory(struct mdb **mdbPtr,boolean buildHashes) // Load all mdbVars from in memorys mdb struct, cannibalize strings. Expects sorted order. { struct mdbByVar *rootVars = NULL; struct mdbByVar *rootVar = NULL; struct mdbLimbVal *limbVal = NULL; struct mdbLeafObj *leafObj; struct mdb *thisRow; while((thisRow = slPopHead(mdbPtr)) != NULL) { // Start at root if (rootVar == NULL || differentString(thisRow->var,rootVar->var) ) { // Finish last var before starting next! if(rootVars && rootVars->vals && rootVars->vals->objs) slReverse(&(rootVars->vals->objs)); if(rootVars && rootVars->vals) slReverse(&(rootVars->vals)); // Start new var AllocVar(rootVar); limbVal = NULL; // Very important! rootVar->var = thisRow->var; rootVar->varType = mdbVarTypeStringToEnum(thisRow->varType); freeMem(thisRow->varType); if ( buildHashes ) rootVar->valHash = hashNew(0); slAddHead(&rootVars,rootVar); } else { freeMem(thisRow->var); // Already got this from prev row freeMem(thisRow->varType); } // Continue with limb if (limbVal == NULL || differentString(thisRow->val,limbVal->val) ) { // Finish last val before starting next! if(limbVal != NULL && limbVal->objs != NULL) slReverse(&(limbVal->objs)); // Start new val AllocVar(limbVal); limbVal->val = thisRow->val; // FIXME: binary? if ( buildHashes ) { hashAddUnique(rootVar->valHash, limbVal->val, limbVal); // Pointer to struct to get to objHash limbVal->objHash = hashNew(0); } slAddHead(&(rootVar->vals),limbVal); } else freeMem(thisRow->val); // Already got this from prev row // End with leaf AllocVar(leafObj); leafObj->obj = thisRow->obj; if ( buildHashes ) hashAddUnique(limbVal->objHash, leafObj->obj, leafObj); // Pointer to struct to resolve type! slAddHead(&(limbVal->objs),leafObj); freeMem(thisRow); } // Finish very last object if(rootVars && rootVars->vals && rootVars->vals->objs) slReverse(&(rootVars->vals->objs)); if(rootVars && rootVars->vals) slReverse(&(rootVars->vals)); if(rootVars && rootVars->vals) slReverse(&rootVars); return rootVars; } static int mdbObjCRC(struct mdbObj *mdbObjs) // returns a summ of all individual CRC values of all metObj strings { int crc = 0; struct mdbObj *mdbObj = NULL; for(mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if(mdbObj->obj != NULL) crc += hashCrc(mdbObj->obj); struct mdbVar *mdbVar = NULL; for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if(mdbVar->var != NULL) crc += hashCrc(mdbVar->var); if(mdbVar->varType == vtTxt && mdbVar->val != NULL) crc += hashCrc(mdbVar->val); } } return crc; } // -------------- Sort primitives -------------- int mdbObjCmp(const void *va, const void *vb) /* Compare to sort on label. */ { const struct mdbObj *a = *((struct mdbObj **)va); const struct mdbObj *b = *((struct mdbObj **)vb); return strcasecmp(a->obj, b->obj); } int mdbVarCmp(const void *va, const void *vb) /* Compare to sort on label. */ { const struct mdbVar *a = *((struct mdbVar **)va); const struct mdbVar *b = *((struct mdbVar **)vb); return strcasecmp(a->var, b->var); } // -------------- Enum to Strings -------------- enum mdbVarType mdbVarTypeStringToEnum(char *varType) // Convert metadata varType string to enum { if(sameWord(varType,"txt")) return vtTxt; if(sameWord(varType,"binary")) return vtBinary; return vtUnknown; } char *mdbVarTypeEnumToString(enum mdbVarType varType) // Convert metadata varType enum string { switch (varType) { case vtTxt: return "txt"; case vtBinary: return "binary"; default: return "unknown"; } } // ------ Parsing lines ------ struct mdbObj *mdbObjAddVarPairs(struct mdbObj *oldObj,char *varPairs) // Parses line of var=val pairs adding to a mdbObj. Creates mdbObj if NULL { struct mdbObj *mdbObj = oldObj; struct mdbVar *mdbVar; char *cloneVars = cloneString(varPairs); // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneVars,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneVars,words,count); if(count < 1 || words[0] == NULL) { errAbort("This is not formatted var=val pairs:\n\t%s\n",varPairs); } verbose(3, "mdbObjAddVarPairs() word count:%d\n\t%s\n",count,varPairs); if(mdbObj == NULL) AllocVar(mdbObj); if(mdbObj->varHash == NULL) mdbObj->varHash = hashNew(0); int ix; for(ix = 0;ix<count;ix++) { if(*words[ix] == '#') break; if(strchr(words[ix], '=') == NULL) errAbort("This is not formatted var=val pairs: '%s'\n\t%s\n",words[ix],varPairs); AllocVar(mdbVar); mdbVar->var = cloneNextWordByDelimiter(&(words[ix]),'='); mdbVar->varType = vtTxt; // FIXME: binary? mdbVar->val = cloneString(words[ix]); verbose(3, "mdbObjAddVarPairs() var=val: %s=%s\n",mdbVar->var,mdbVar->val); struct mdbVar *oldVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, mdbVar->var); if(oldVar) { verbose(1, "The same variable appears twice: %s=%s and %s=%s. Ignoring second value.\n\t%s\n", oldVar->var,oldVar->val,mdbVar->var,mdbVar->val,varPairs); mdbVarFree(&mdbVar); } else { hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } } freeMem(words); freeMem(cloneVars); // Special for old style ENCODE metadata #define ENCODE_ALN "Alignments" #define ENCODE_RSIG "RawSignal" if(mdbObj->obj == NULL) { char * tableName = NULL; char * fileName = NULL; for(mdbVar = mdbObj->vars; mdbVar != NULL && (tableName == NULL || fileName == NULL); mdbVar = mdbVar->next) { if(sameString(mdbVar->var,"tableName")) tableName = mdbVar->val; else if(sameString(mdbVar->var,"fileName")) fileName = mdbVar->val; } mdbVar = NULL; // assertably so, but this is conditioanally created below if(tableName != NULL) { verbose(3, "tableName:%s\n",tableName); if(fileName == NULL || startsWithWordByDelimiter(tableName,'.',fileName)) { mdbObj->obj = cloneString(tableName); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); mdbVar->val = cloneString("table"); } else if(stringIn(ENCODE_ALN,fileName) && stringIn(ENCODE_RSIG,tableName))// Messier case where the file has "Alignment" but the table has "RawSignal" { char *tmpFilName = cloneString(fileName); strSwapStrs(tmpFilName, strlen(tmpFilName),ENCODE_ALN, ENCODE_RSIG); if(startsWithWordByDelimiter(tableName,'.',tmpFilName)) { mdbObj->obj = cloneString(tableName); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); mdbVar->val = cloneString("table"); } freeMem(tmpFilName); } } else if(fileName != NULL) { verbose(3, "fileName:%s\n",fileName); // NOTE: that the file object is the root of the name, so both file.fastq.gz and file.fastq are same obj! mdbObj->obj = cloneFirstWordByDelimiter(fileName,'.'); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); mdbVar->val = cloneString("file"); } if(mdbVar != NULL) // Just determined an objType { verbose(3, "mdbObjAddVarPairs() var=val: %s=%s\n",mdbVar->var,mdbVar->val); struct mdbVar *oldVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, mdbVar->var); if(oldVar) mdbVarFree(&mdbVar); else { hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } } } if(mdbObj->obj == NULL) // NOTE: Should this be a hard error! errAbort("No obj found. This is not properly formatted metadata:\n\t%s\n",varPairs); //slReverse(&(mdbObj->vars)); Could have added vars so sort instead slSort(&(mdbObj->vars),&mdbVarCmp); // Should be in determined order mdbVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, MDB_OBJ_TYPE); if(mdbVar == NULL) mdbVar = mdbObj->vars; verbose(3, "mdbObjAddVarPairs() obj=%s %s=%s\n", mdbObj->obj, mdbVar->var,mdbVar->val); return mdbObj; } struct mdbObj *metadataLineParse(char *line) /* Parses a single formatted metadata line into mdbObj for updates or queries. */ { char *fromTheTop = line; char*nibbledWord = cloneNextWordByDelimiter(&line,' '); if(nibbledWord == NULL || differentWord(nibbledWord,MDB_METADATA_KEY)) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); freeMem(nibbledWord); struct mdbObj *mdbObj = NULL; char*varPairs = line; nibbledWord = cloneNextWordByDelimiter(&line,' ');; if(nibbledWord == NULL) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); if(strchr(nibbledWord, '=') == NULL) // If this is not a var=val then it should be obj { AllocVar(mdbObj); mdbObj->obj = nibbledWord; verbose(3, "metadataLineParse() obj=%s\n",mdbObj->obj); varPairs = line; while(strlen(line) > 0) { nibbledWord = cloneNextWordByDelimiter(&line,' ');; if(nibbledWord == NULL) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); if(*nibbledWord == '#' || strchr(nibbledWord, '=') != NULL) // IS commnet OR start of var=val pairs break; if(sameWord(nibbledWord,"delete")) mdbObj->deleteThis = TRUE; else errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); varPairs = line; freeMem(nibbledWord); } } if(varPairs != NULL && strlen(varPairs) > 0 && *varPairs != '#') mdbObj = mdbObjAddVarPairs(mdbObj,varPairs); else if(mdbObj->deleteThis == FALSE) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); return mdbObj; } struct mdbByVar *mdbByVarsLineParse(char *line) /* Parses a line of "var1=val1 var2=val2 into a mdbByVar object for queries. */ { int thisWord = 0; struct mdbByVar *mdbByVars = NULL; struct mdbByVar *rootVar = NULL; struct mdbLimbVal *limbVal = NULL; char *cloneLine = cloneString(line); struct hash* varHash; // There must not be multiple occurrances of the same var // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneLine,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneLine,words,count); verbose(3, "mdbByVarsLineParse() word count:%d\n\t%s\n",count,line); // Get obj and figure out if this is a delete line varHash = hashNew(0); // All words are expected to be var=val pairs! for (thisWord=0; thisWord<count; thisWord++) { if (strchr(words[thisWord], '=') == NULL) errAbort("Expected 'var=val' but found '%s'. This is not properly formatted metadata:\n\t%s\n",words[thisWord],line); // Set up var struct from 1st half of pair AllocVar(rootVar); rootVar->var = cloneNextWordByDelimiter(&(words[thisWord]),'='); rootVar->notEqual = (rootVar->var[strlen(rootVar->var)-1] == '!'); // requested not equal if (rootVar->notEqual) rootVar->var[strlen(rootVar->var)-1] = 0; // Do not try to combine repeated vars because "foo=a foo=b" is 'AND' while "foo=a,b" is 'OR'. // Fill in the val(s) from second half of pair char *val = NULL; if (words[thisWord][0] != '\0' && words[thisWord][0] != '?') // "var=?" or "var=" will query by var name only val = cloneString(words[thisWord]); if (val != NULL) { // handle comma separated list of vals (if unquoted) if (val[0] != '\'' && val[0] != '"' && strchr(val,',') != NULL) { char * aVal = NULL; while((aVal = cloneNextWordByDelimiter(&val,',')) != NULL) { AllocVar(limbVal); limbVal->val = aVal; slAddTail(&rootVar->vals,limbVal); } } else { AllocVar(limbVal); limbVal->val = val; rootVar->vals = limbVal; } } hashAdd(varHash, rootVar->var, rootVar); slAddHead(&mdbByVars,rootVar); } freeMem(words); slReverse(&mdbByVars); verbose(3, "mdbByVarsLineParse() parsed:%d first: %s%s='%s'.\n", slCount(mdbByVars),mdbByVars->var,(mdbByVars->notEqual?"!":""),(mdbByVars->vals?mdbByVars->vals->val:"")); return mdbByVars; } // ------ Loading from args, hashes and tdb ------ struct mdbByVar*mdbByVarCreate(char *var, char *varType,char *val) /* Creates a singular var=val pair struct for metadata queries. */ { struct mdbByVar *mdbByVar = NULL; if(var == NULL) errAbort("Need variable to create mdbByVar query object.\n"); AllocVar(mdbByVar); mdbByVar->var = cloneString(var); mdbByVar->varType = (varType==NULL?vtUnknown:mdbVarTypeStringToEnum(varType)); if(val != NULL) { struct mdbLimbVal * limbVal; AllocVar(limbVal); limbVal->val = cloneString(val); mdbByVar->vals = limbVal; // Only one } return mdbByVar; } struct mdbObj *mdbObjCreate(char *obj,char *var, char *varType,char *val) /* Creates a singular mdbObj query object based on obj and all other optional params. */ { struct mdbObj *mdbObj = NULL; if(obj == NULL) errAbort("Need obj to create mdbObj query object.\n"); AllocVar(mdbObj); mdbObj->obj = cloneString(obj); if(var != NULL) { struct mdbVar * mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(var); mdbVar->varType = (varType==NULL?vtUnknown:mdbVarTypeStringToEnum(varType)); if(val != NULL) mdbVar->val = cloneString(val); mdbObj->vars = mdbVar; // Only one } return mdbObj; } struct mdbObj *mdbObjsLoadFromHashes(struct hash *objsHash) // Load all mdbObjs from a file containing metadata formatted lines { struct mdbObj *mdbObjs = NULL; struct hashEl* objEl = NULL; struct hashCookie objCookie = hashFirst(objsHash); while((objEl = hashNext(&objCookie)) != NULL) { struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(objEl->name); mdbObj->varHash = hashNew(0); struct hash *hashedVars = objEl->val; struct hashCookie varCookie = hashFirst(hashedVars); struct hashEl* varEl = NULL; while((varEl = hashNext(&varCookie)) != NULL) { if(sameString(varEl->name,MDB_METAOBJ_RAKEY)) continue; struct mdbVar * mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(varEl->name); mdbVar->varType = vtTxt; // FIXME: binary? mdbVar->val = cloneString(varEl->val); hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } slSort(&(mdbObj->vars),&mdbVarCmp); // Should be in determined order slAddHead(&mdbObjs,mdbObj); } slSort(&mdbObjs,&mdbObjCmp); // Should be in determined order return mdbObjs; } // ------ Loading from files ------ struct mdbObj *mdbObjsLoadFromFormattedFile(char *fileName,boolean *validated) // Load all mdbObjs from a file containing metadata formatted lines { struct mdbObj *mdbObjs = NULL; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; while (lineFileNext(lf, &line,NULL)) { char *start = skipLeadingSpaces(line); if(start == NULL || *start == '#') continue; if(startsWithWord(MDB_METAOBJ_RAKEY,line)) { // This is the RA style file!! lineFileClose(&lf); return mdbObjsLoadFromRAFile(fileName,validated); } struct mdbObj *mdbObj = metadataLineParse(line); if(mdbObj == NULL) { mdbObjsFree(&mdbObjs); return NULL; } slAddHead(&mdbObjs,mdbObj); } lineFileClose(&lf); slReverse(&mdbObjs); // Go ahead and keep this in file order if(validated) *validated = FALSE; return mdbObjs; } #define MDB_MAGIC_PREFIX "# MAGIC: " struct mdbObj *mdbObjsLoadFromRAFile(char *fileName,boolean *validated) // Load all mdbObjs from a file containing RA formatted 'metaObjects' { struct hash *mdHash = raReadAll(fileName, MDB_METAOBJ_RAKEY); if(mdHash == NULL) { verbose(1,"Missing, empty or badly formated RA file:%s\n",fileName); return NULL; } struct mdbObj *mdbObjs = mdbObjsLoadFromHashes(mdHash); hashFree(&mdHash); // Try to validate file if(validated) { *validated = FALSE; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line = lineFileSkipToLineStartingWith(lf,MDB_MAGIC_PREFIX,1000000); if(line != NULL) { int fileMagic = atoi(line+strlen(MDB_MAGIC_PREFIX)); int objsMagic = mdbObjCRC(mdbObjs); verbose(3,"Objects magic: %d Files magic: %d (%s)\n",objsMagic,fileMagic,line+strlen(MDB_MAGIC_PREFIX)); *validated = (fileMagic == objsMagic); } else verbose(3,"Can't find magic number on this file.\n"); } return mdbObjs; } // ------ Table name and creation ------ void mdbReCreate(struct sqlConnection *conn,char *tblName,boolean testOnly) // Creates ore Recreates the named mdb. { char *sqlCreate = "# Contains metadata for a table, file or other objects.\n" "CREATE TABLE %s (\n" " obj varchar(255) not null, # Object name or ID.\n" " var varchar(255) not null, # Metadata variable name.\n" " varType enum ('txt','binary') # Most vars are txt\n" " not null default 'txt',\n" " val longblob not null, # Metadata value.\n" " #Indices\n" " PRIMARY KEY(obj,var),\n" " UNIQUE(var,val(32),obj)\n" ")"; if(sqlTableExists(conn,tblName)) verbose(2, "Table '%s' already exists. It will be recreated.\n",tblName); struct dyString *dy = newDyString(512); dyStringPrintf(dy, sqlCreate, tblName); verbose(2, "Requesting table creation:\n%s;\n", dyStringContents(dy)); if(!testOnly) sqlRemakeTable(conn, tblName, dyStringContents(dy)); dyStringFree(&dy); } static char*mdbTableNamePreferSandbox() // returns the mdb table name or NULL if conn supplied but the table doesn't exist { char *table = cfgOption("db.metaDb"); if(table != NULL) return cloneString(table); // Look for trackDb name to model char *name = cfgOption("db.trackDb"); if(name == NULL) return cloneString(MDB_DEFAULT_NAME); // Only take the last table of a list of tables! char delimit = ','; for (table = name; (name = skipBeyondDelimit(name,delimit)) != NULL;) table = name; name = skipLeadingSpaces(table); // Divide name into root and sandbox portion char *root = NULL; char *sand = NULL; delimit = '_'; if ((sand = strchr(name,delimit)) == NULL) { delimit = '-'; sand = strchr(name,delimit); } if (sand == NULL) // No sandbox portion return cloneString(MDB_DEFAULT_NAME); root = cloneNextWordByDelimiter(&name,delimit); sand = name; // Since db.trackDb was used, make sure to swap it if (startsWith("trackDb",root)) { freeMem(root); root = cloneString(MDB_DEFAULT_NAME); } else // If discovered anything other than trackDb then give up as too obscure return cloneString(MDB_DEFAULT_NAME); // Finally ready to put it together int size = strlen(root) + strlen(sand) + 2; table = needMem(size); safef(table,size,"%s%c%s",root,delimit,sand); freeMem(root); return table; } char*mdbTableName(struct sqlConnection *conn,boolean mySandBox) // returns the mdb table name or NULL if conn supplied but the table doesn't exist { char *table = NULL; if (mySandBox) table = mdbTableNamePreferSandbox(); if (table == NULL) table = cloneString(MDB_DEFAULT_NAME); // Test for table if (conn != NULL && !sqlTableExists(conn,table)) { if (!mySandBox || sameWord(table,MDB_DEFAULT_NAME)) // Then try the root { freeMem(table); return NULL; } freeMem(table); table = cloneString(MDB_DEFAULT_NAME); if (!sqlTableExists(conn,table)) { freeMem(table); return NULL; } } return table; } // -------------- Updating the DB -------------- int mdbObjsSetToDb(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObjs,boolean replace,boolean testOnly) // Adds or updates metadata obj/var pairs into the named table. Returns total rows affected { char query[8192]; struct mdbObj *mdbObj; struct mdbVar *mdbVar; int count = 0; if(tableName == NULL) tableName = MDB_DEFAULT_NAME; if(!sqlTableExists(conn,tableName)) errAbort("mdbObjsSetToDb attempting to update non-existent table named '%s'.\n",tableName); for(mdbObj = mdbObjs;mdbObj != NULL; mdbObj = mdbObj->next) { // Handle delete requests first if(mdbObj->deleteThis) { if(mdbObj->vars == NULL) // deletes all { safef(query, sizeof(query),"%s where obj = '%s'",tableName,mdbObj->obj); int delCnt = sqlRowCount(conn,query); if(delCnt>0) { safef(query, sizeof(query), "delete from %s where obj = '%s'",tableName,mdbObj->obj); verbose(2, "Requesting delete of %d rows:\n\t%s;\n",delCnt, query); if(!testOnly) sqlUpdate(conn, query); count += delCnt; } } else // deletes selected vars { for(mdbVar = mdbObj->vars;mdbVar != NULL; mdbVar = mdbVar->next) { safef(query, sizeof(query), "select obj from %s where obj = '%s' and var = '%s'", tableName,mdbObj->obj,mdbVar->var); if(sqlExists(conn,query)) { safef(query, sizeof(query), "delete from %s where obj = '%s' and var = '%s'", tableName,mdbObj->obj,mdbVar->var); verbose(2, "Requesting delete of 1 row:\n\t%s;\n",query); if(!testOnly) sqlUpdate(conn, query); count++; } } } continue; // Done with this mdbObj } else if (replace) // If replace then clear out deadwood before inserting new vars { safef(query, sizeof(query),"%s where obj = '%s'",tableName,mdbObj->obj); int delCnt = sqlRowCount(conn,query); if(delCnt>0) { safef(query, sizeof(query), "delete from %s where obj = '%s'",tableName,mdbObj->obj); verbose(2, "Requesting replacement of %d rows:\n\t%s;\n",delCnt, query); if(!testOnly) sqlUpdate(conn, query); count += delCnt; } } // Now it is time for update or add! for(mdbVar = mdbObj->vars;mdbVar != NULL; mdbVar = mdbVar->next) { stripEnclosingDoubleQuotes(mdbVar->val); // Ensures values are stripped of enclosing quotes // Be sure to check for var existence first, then update if (!replace) { struct mdbObj *objExists = mdbObjQueryByObj(conn,tableName,mdbObj->obj,mdbVar->var); if(objExists) { if(differentString(mdbVar->val,objExists->vars->val) || mdbVar->varType != objExists->vars->varType) { safef(query, sizeof(query), "update %s set varType = '%s', val = '%s' where obj = '%s' and var = '%s'", tableName, mdbVarTypeEnumToString(mdbVar->varType),sqlEscapeString(mdbVar->val), // FIXME: binary val? mdbObj->obj,mdbVar->var); verbose(2, "Requesting update of 1 row:\n\t%s;\n",query); if(!testOnly) sqlUpdate(conn, query); count++; } mdbObjsFree(&objExists); continue; // The object was found/updated so done with it } } // Finally ready to insert new vars safef(query, sizeof(query), "insert into %s values ( '%s','%s','%s','%s')", tableName,mdbObj->obj,mdbVar->var,mdbVarTypeEnumToString(mdbVar->varType), sqlEscapeString(mdbVar->val)); // FIXME: binary val? // FIXME Strip quotes verbose(2, "Requesting insert of one row:\n\t%s;\n",query); if(!testOnly) sqlUpdate(conn, query); count++; } } return count; } +int mdbObjsLoadToDb(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObjs,boolean testOnly) +// Adds mdb Objs with minimal error checking +{ +char query[8192]; +struct mdbObj *mdbObj; +struct mdbVar *mdbVar; +int count = 0; + +if (tableName == NULL) + tableName = MDB_DEFAULT_NAME; + +if (!sqlTableExists(conn,tableName)) + errAbort("mdbObjsSetToDb attempting to update non-existent table named '%s'.\n",tableName); + +for(mdbObj = mdbObjs;mdbObj != NULL; mdbObj = mdbObj->next) + { + if (mdbObj->deleteThis) + continue; + + for(mdbVar = mdbObj->vars;mdbVar != NULL; mdbVar = mdbVar->next) + { + stripEnclosingDoubleQuotes(mdbVar->val); // Ensures values are stripped of enclosing quotes + + // Finally ready to insert new vars + safef(query, sizeof(query), + "insert into %s values ( '%s','%s','%s','%s')", + tableName,mdbObj->obj,mdbVar->var,mdbVarTypeEnumToString(mdbVar->varType), + sqlEscapeString(mdbVar->val)); // FIXME: binary val? // FIXME Strip quotes + verbose(2, "Requesting insert of one row:\n\t%s;\n",query); + if (!testOnly) + { + // Use the sqlGetResultExt() instead of the normal sqlUpdate() in order to getany error messages + unsigned int errorNo = 0; + char *errorMsg = NULL; + struct sqlResult *sr = sqlGetResultExt(conn, query, &errorNo, &errorMsg); + if (errorNo != 0) + verbose(1, "INSERT failed: %s\n",errorMsg); + else + count++; + if (sr) // Should will be null, but just for good measure + sqlFreeResult(&sr); + } + else + { + struct mdbObj *objExists = mdbObjQueryByObj(conn,tableName,mdbObj->obj,mdbVar->var); + if(objExists) + { + verbose(1, "INSERT will fail for obj:%s and var:%s\n",mdbObj->obj,mdbVar->var); + mdbObjsFree(&objExists); + } + else + count++; // Of course this does not find duplicates within the mdbObjs list. + } + } + } +return count; +} + // ------------------ Querys ------------------- struct mdbObj *mdbObjQuery(struct sqlConnection *conn,char *table,struct mdbObj *mdbObj) // Query the metadata table by obj and optional vars and vals in metaObj struct. If mdbObj is NULL query all. // Returns new mdbObj struct fully populated and sorted in obj,var order. { // select obj,var,val where (var= [and val=]) or ([var= and] val=) order by obj,var boolean buildHash = TRUE; if(table == NULL) table = MDB_DEFAULT_NAME; if(!sqlTableExists(conn,table)) return NULL; struct dyString *dy = newDyString(4096); dyStringPrintf(dy, "select obj,var,varType,val from %s", table); if(mdbObj != NULL && mdbObj->obj != NULL) { dyStringPrintf(dy, " where obj %s '%s'", (strchr(mdbObj->obj,'%')?"like":"="),mdbObj->obj); struct mdbVar *mdbVar; for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if(mdbVar==mdbObj->vars) dyStringPrintf(dy, " and ("); else dyStringPrintf(dy, " or "); if(mdbVar->var != NULL) { if(mdbVar->val != NULL) dyStringPrintf(dy, "("); dyStringPrintf(dy, "var %s '%s'", (strchr(mdbVar->var,'%')?"like":"="),mdbVar->var); } if(mdbVar->val != NULL) { if(mdbVar->var != NULL) dyStringPrintf(dy, " and "); dyStringPrintf(dy, "val %s '%s'", (strchr(mdbVar->val,'%')?"like":"="), sqlEscapeString(mdbVar->val)); if(mdbVar->var != NULL) dyStringPrintf(dy, ")"); } if(mdbVar->var == NULL && mdbVar->val) errAbort("mdbObjQuery has empty mdbVar struct.\n"); buildHash = FALSE; // too few variables } if(mdbObj->vars != NULL) dyStringPrintf(dy, ")"); } dyStringPrintf(dy, " order by obj, var"); verbose(2, "Requesting query:\n\t%s;\n",dyStringContents(dy)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dy)); struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,buildHash); verbose(3, "Returned %d object(s) with %d var(s).\n", mdbObjCount(mdbObjs,TRUE),mdbObjCount(mdbObjs,FALSE)); return mdbObjs; } struct mdbObj *mdbObjQueryByObj(struct sqlConnection *conn,char *table,char *obj,char *var) // Query a single metadata object and optional var from a table (default mdb). { if(obj == NULL) return mdbObjQuery(conn,table,NULL); struct mdbObj *queryObj = mdbObjCreate(obj,var,NULL,NULL); struct mdbObj *resultObj = mdbObjQuery(conn,table,queryObj); mdbObjsFree(&queryObj); return resultObj; } struct mdbByVar *mdbByVarsQuery(struct sqlConnection *conn,char *table,struct mdbByVar *mdbByVars) // Query the metadata table by one or more var=val pairs to find the distinct set of objs that satisfy ANY conditions. // Returns new mdbByVar struct fully populated and sorted in var,val,obj order. { // select obj,var,val where (var= [and val in (val1,val2)]) or (var= [and val in (val1,val2)]) order by var,val,obj if(table == NULL) table = MDB_DEFAULT_NAME; if(!sqlTableExists(conn,table)) return NULL; struct dyString *dy = newDyString(4096); dyStringPrintf(dy, "select obj,var,varType,val from %s", table); struct mdbByVar *rootVar; for(rootVar=mdbByVars;rootVar!=NULL;rootVar=rootVar->next) { if(rootVar==mdbByVars) dyStringPrintf(dy, " where (var "); else dyStringPrintf(dy, " OR (var "); if(rootVar->notEqual && rootVar->vals == NULL) dyStringPrintf(dy, "%s",strchr(rootVar->var,'%')?"NOT ":"!"); // one of: "NOT LIKE". "!=" or "NOT EXISTS" if(rootVar->vals != NULL && rootVar->vals->val != NULL && strlen(rootVar->vals->val) > 0) { dyStringPrintf(dy, "%s '%s'", (strchr(rootVar->var,'%')?"like":"="), rootVar->var); } else dyStringPrintf(dy, "EXISTS"); struct mdbLimbVal *limbVal; boolean multiVals = FALSE; for(limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if(limbVal->val == NULL || strlen(limbVal->val) < 1) continue; if(!multiVals) { dyStringPrintf(dy, " and val "); if(rootVar->notEqual) dyStringPrintf(dy, "%s",strchr(limbVal->val,'%')?"NOT ":"!"); if(limbVal->next == NULL) // only one val { dyStringPrintf(dy, "%s '%s'", (strchr(limbVal->val,'%')?"like":"="), sqlEscapeString(limbVal->val)); break; } else dyStringPrintf(dy, "in ("); multiVals=TRUE; } else dyStringPrintf(dy, ","); dyStringPrintf(dy, "'%s'", sqlEscapeString(limbVal->val)); } if(multiVals) dyStringPrintf(dy, ")"); dyStringPrintf(dy, ")"); } dyStringPrintf(dy, " order by var, val, obj"); verbose(2, "Requesting query:\n\t%s;\n",dyStringContents(dy)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dy)); verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); struct mdbByVar *mdbByVarsFromMem = mdbByVarsLoadFromMemory(&mdb,TRUE); verbose(3, "Returned %d vars(s) with %d val(s) with %d object(s).\n", mdbByVarCount(mdbByVarsFromMem,TRUE ,FALSE), mdbByVarCount(mdbByVarsFromMem,FALSE,TRUE ), mdbByVarCount(mdbByVarsFromMem,FALSE,FALSE)); return mdbByVarsFromMem; } struct mdbByVar *mdbByVarQueryByVar(struct sqlConnection *conn,char *table,char *varName,char *val) // Query a single metadata variable and optional val from a table (default mdb) for searching val->obj. { if(varName == NULL) return mdbByVarsQuery(conn,table,NULL); struct mdbByVar *queryVar = mdbByVarCreate(varName,NULL,val); struct mdbByVar *resultVar = mdbByVarsQuery(conn,table,queryVar); mdbByVarsFree(&queryVar); return resultVar; } struct mdbObj *mdbObjsQueryByVars(struct sqlConnection *conn,char *table,struct mdbByVar *mdbByVars) // Query the metadata table by one or more var=val pairs to find the distinct set of objs that satisfy ALL conditions. // Returns new mdbObj struct fully populated and sorted in obj,var order. { // MOST POPULAR WAY TO QUERY MDB. Building example queries like: // "cell=GM12878" or "cell!=GM12878" // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val = 'GM12878') ORDER BY T1.obj, T1.var; // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val != 'GM12878') ORDER BY T1.obj, T1.var; // "cell=GM%" or "cell!=GM%" // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val LIKE 'GM%') ORDER BY T1.obj, T1.var; // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val NOT LIKE 'GM%') ORDER BY T1.obj, T1.var; // "cell=" or "cell!=" // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell') ORDER BY T1.obj, T1.var; // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE NOT EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell') ORDER BY T1.obj, T1.var; // "cell=GM12878,K562" or "cell!=GM12878,K562" // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val IN ('GM12878','K562')) ORDER BY T1.obj, T1.var; // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val NOT IN ('K562','GM12878')) ORDER BY T1.obj, T1.var; // "cell=GM% cell!=GM12878" (very powerful) // SELECT T1.obj,T1.var,T1.varType,T1.val FROM metaDb T1 WHERE EXISTS (SELECT T2.obj FROM metaDb T2 WHERE T2.obj = T1.obj AND T2.var = 'cell' AND T2.val LIKE 'GM%') // AND EXISTS (SELECT T3.obj FROM metaDb T3 WHERE T3.obj = T1.obj AND T3.var = 'cell' AND T3.val != 'GM12878') ORDER BY T1.obj, T1.var; if(table == NULL) table = MDB_DEFAULT_NAME; if(!sqlTableExists(conn,table)) return NULL; struct dyString *dy = newDyString(4096); dyStringPrintf(dy, "SELECT T1.obj,T1.var,T1.varType,T1.val FROM %s T1", table); struct mdbByVar *rootVar; boolean gotVar = FALSE; int tix; for(rootVar=mdbByVars,tix=2;rootVar!=NULL;rootVar=rootVar->next,tix++) { boolean hasVal = (rootVar->vals != NULL); //boolean hasVal = (rootVar->vals != NULL && rootVar->vals->val != NULL && strlen(rootVar->vals->val) > 0); if(!gotVar) { dyStringPrintf(dy, " WHERE "); gotVar=TRUE; } else dyStringPrintf(dy, " AND "); if(!hasVal && rootVar->notEqual) dyStringPrintf(dy, "NOT EXISTS "); else dyStringPrintf(dy, "EXISTS "); dyStringPrintf(dy, "(SELECT T%d.obj FROM %s T%d WHERE T%d.obj = T1.obj AND T%d.var ",tix,table,tix,tix,tix); if(hasVal && rootVar->notEqual && rootVar->vals == NULL) dyStringPrintf(dy, "%s",strchr(rootVar->var,'%')?"NOT ":"!"); dyStringPrintf(dy, "%s '%s'", (strchr(rootVar->var,'%')?"LIKE":"="), rootVar->var); struct mdbLimbVal *limbVal; boolean multiVals = FALSE; for(limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if(limbVal->val == NULL || strlen(limbVal->val) < 1) continue; if(!multiVals) { dyStringPrintf(dy, " AND T%d.val ",tix); if(rootVar->notEqual) dyStringPrintf(dy, "%s",(strchr(limbVal->val,'%') || limbVal->next)?"NOT ":"!"); if(limbVal->next == NULL) // only one val { dyStringPrintf(dy, "%s '%s'", (strchr(limbVal->val,'%')?"LIKE":"="), sqlEscapeString(limbVal->val)); break; } else dyStringPrintf(dy, "IN ("); multiVals=TRUE; } else dyStringPrintf(dy, ","); dyStringPrintf(dy, "'%s'", sqlEscapeString(limbVal->val)); } if(multiVals) dyStringPrintf(dy, ")"); dyStringPrintf(dy, ")"); } dyStringPrintf(dy, " ORDER BY T1.obj, T1.var"); verbose(2, "Requesting query:\n\t%s;\n",dyStringContents(dy)); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dy)); verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE); verbose(3, "Returned %d object(s) with %d var(s).\n", mdbObjCount(mdbObjs,TRUE),mdbObjCount(mdbObjs,FALSE)); return mdbObjs; } // ----------- Printing and Counting ----------- static void mdbVarValPrint(struct mdbVar *mdbVar,boolean raStyle, FILE *outF) { if(mdbVar != NULL && mdbVar->var != NULL) { if(raStyle) fprintf(outF, "\n%s ",mdbVar->var); else fprintf(outF, " %s=",mdbVar->var); if(mdbVar->val != NULL) { if(mdbVar->varType == vtBinary) fprintf(outF, "binary"); else if(!raStyle && strchr(mdbVar->val, ' ') != NULL) // Has blanks fprintf(outF, "\"%s\"",mdbVar->val); else fprintf(outF, "%s",mdbVar->val); } } } void mdbObjPrintToStream(struct mdbObj *mdbObjs,boolean raStyle, FILE *outF ) // prints objs and var=val pairs as formatted metadata lines or ra style { // Single line: // metadata iLoveLucy table lucy=ricky ethyl=fred // ra style // metadata iLoveLucy table // lucy ricky // ethy fred // TODO: Expand for mutilple var types; strip quotes from vals on ra style struct mdbObj *mdbObj = NULL; for(mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if(mdbObj->obj == NULL) continue; fprintf(outF, "%s %s",(raStyle?MDB_METAOBJ_RAKEY:MDB_METADATA_KEY),mdbObj->obj); if(mdbObj->deleteThis) fprintf(outF, " delete"); struct mdbVar *mdbVar = NULL; // If hash available, force objType to front if(mdbObj->varHash != NULL) { mdbVar = hashFindVal(mdbObj->varHash,MDB_OBJ_TYPE); mdbVarValPrint(mdbVar,raStyle, outF); } for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if(mdbObj->varHash == NULL || !sameOk(MDB_OBJ_TYPE,mdbVar->var)) mdbVarValPrint(mdbVar,raStyle, outF); } fprintf(outF, "%s",(raStyle?"\n\n":"\n")); } if(raStyle) // NOTE: currently only supporting validation of RA files fprintf(outF, "%s%d\n",MDB_MAGIC_PREFIX,mdbObjCRC(mdbObjs)); } char *mdbObjVarValPairsAsLine(struct mdbObj *mdbObj,boolean objTypeExclude) // returns NULL or a line for a single mdbObj as "var1=val1; var2=val2 ...". Must be freed. { if (mdbObj!=NULL) { struct dyString *dyLine = dyStringNew(128); struct mdbVar *mdbVar = NULL; // If hash available, force objType to front if (!objTypeExclude && mdbObj->varHash != NULL) { mdbVar = hashFindVal(mdbObj->varHash,MDB_OBJ_TYPE); dyStringPrintf(dyLine,"%s=%s; ",mdbVar->var,mdbVar->val); } for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (!sameOk(MDB_OBJ_TYPE,mdbVar->var) || (!objTypeExclude && mdbObj->varHash == NULL)) { if (mdbVar->varType == vtTxt) dyStringPrintf(dyLine,"%s=%s; ",mdbVar->var,mdbVar->val); } } char *line = dyStringCannibalize(&dyLine); if (line) { int len = strlen(line); if (len == 0) { freeMem(line); return NULL; } if (line[len-1] == ' ') line[len-1] = '\0'; return line; } } return NULL; } void mdbObjPrint(struct mdbObj *mdbObjs,boolean raStyle) // prints objs and var=val pairs as formatted metadata lines or ra style { mdbObjPrintToStream(mdbObjs, raStyle, stdout); } void mdbObjPrintToFile(struct mdbObj *mdbObjs,boolean raStyle, char *file) // prints objs and var=val pairs as formatted metadata lines or ra style { FILE *f = mustOpen(file, "w"); mdbObjPrintToStream(mdbObjs, raStyle, f); fclose(f); } void mdbByVarPrint(struct mdbByVar *mdbByVars,boolean raStyle) // prints var=val pairs and objs that go with them single lines or ra style { // Single line: // mdbVariable lucy=ethyl bestFriends lifePartners // mdbVariable lucy=ricky iLoveLucy divorces // NOT QUITE ra style // metadata Fred wife=Ethyl // metadata Lucy wife=Ethyl // Results in: // mdbVariable wife Ethyl // metaObject Fred // metaObject Lucy struct mdbByVar *rootVar = NULL; for(rootVar=mdbByVars;rootVar!=NULL;rootVar=rootVar->next) { if(rootVar->var == NULL) continue; struct mdbLimbVal *limbVal = NULL; for(limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if(limbVal->val == NULL) continue; if(raStyle) printf("%s %s ",MDB_METAVAR_RAKEY,rootVar->var); else printf("%s %s=",MDB_METAVAR_RAKEY,rootVar->var); if(rootVar->varType == vtBinary) printf("binary"); else if(!raStyle && strchr(limbVal->val, ' ') != NULL) // Has blanks printf("\"%s\"",limbVal->val); else printf("%s",limbVal->val); struct mdbLeafObj *leafObj = NULL; for(leafObj=limbVal->objs;leafObj!=NULL;leafObj=leafObj->next) { if(leafObj->obj == NULL) continue; if(raStyle) printf("\n%s %s",MDB_METAOBJ_RAKEY,leafObj->obj); else printf(" %s",leafObj->obj); } printf("\n"); if(raStyle) printf("\n"); } } } int mdbObjCount(struct mdbObj *mdbObjs,boolean objs) // returns the count of vars belonging to this obj or objs; { int count = 0; struct mdbObj *mdbObj = NULL; for(mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if(mdbObj->obj == NULL) continue; if(objs) count++; else { struct mdbVar *mdbVar = NULL; for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if(mdbVar->var != NULL && mdbVar->val != NULL) count++; } } } return count; } int mdbByVarCount(struct mdbByVar *mdbByVars,boolean vars, boolean vals) // returns the count of objs belonging to this set of vars; { int count = 0; struct mdbByVar *rootVar = NULL; for(rootVar=mdbByVars;rootVar!=NULL;rootVar=rootVar->next) { if(rootVar->var == NULL) continue; if(vars) count++; else { struct mdbLimbVal *limbVal = NULL; for(limbVal=rootVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if(limbVal->val == NULL) continue; if(vals) count++; else { struct mdbLeafObj *leafObj = NULL; for(leafObj=limbVal->objs;leafObj!=NULL;leafObj=leafObj->next) { if(leafObj->obj != NULL) count++; } } } } } return count; } // ----------------- Utilities ----------------- struct mdbVar *mdbObjFind(struct mdbObj *mdbObj, char *var) // Finds the mdbVar associated with the var or returns NULL { if (mdbObj == NULL) return NULL; struct mdbVar *mdbVar = NULL; if(mdbObj->varHash != NULL) mdbVar = hashFindVal(mdbObj->varHash,var); else { for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if(sameOk(var,mdbVar->var)) break; } } if(mdbVar == NULL) return NULL; return mdbVar; } char *mdbObjFindValue(struct mdbObj *mdbObj, char *var) // Finds the val associated with the var or retruns NULL { struct mdbVar *mdbVar = mdbObjFind(mdbObj, var); if(mdbVar == NULL) return NULL; return mdbVar->val; } boolean mdbObjContains(struct mdbObj *mdbObj, char *var, char *val) // Returns TRUE if object contains var, val or both { if (mdbObj == NULL) return FALSE; if(var != NULL) { char *foundVal = mdbObjFindValue(mdbObj,var); if(foundVal == NULL) return FALSE; if(val == NULL) return TRUE; return sameOk(foundVal,val); } struct mdbVar *mdbVar = NULL; for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if(differentStringNullOk(var,mdbVar->var) != 0) continue; if(differentStringNullOk(val,mdbVar->val) != 0) continue; return TRUE; } return FALSE; } boolean mdbObjsContainAtleastOne(struct mdbObj *mdbObjs, char *var) // Returns TRUE if any object in set contains var { struct mdbObj *mdbObj = mdbObjs; for(;mdbObj!=NULL; mdbObj=mdbObj->next) { if(mdbObjContains(mdbObj, var, NULL)) return TRUE; } return FALSE; } struct mdbObj *mdbObjsCommonVars(struct mdbObj *mdbObjs) // Returns a new mdbObj with all vars that are contained in every obj passed in. // Note that the returnd mdbObj has a meaningles obj name and vals. { if (mdbObjs == NULL || mdbObjs->vars == NULL) return NULL; struct mdbObj *mdbObj = mdbObjs; struct mdbObj *commonVars = mdbObjClone(mdbObj); // Clone the first obj then prune it commonVars->next = NULL; -struct mdbVar *mdbVar = mdbObj->vars; // Will walk through the first obj's vars mdbObj=mdbObj->next; // No need to include first obj in search if (mdbObj != NULL) { + int count = 1; + // NOTE: This should not loop through all, as the list could be huge. Just compare the first 10 for now struct dyString *dyPruneVars = dyStringNew(512); + for(;mdbObj != NULL && count < 10;mdbObj=mdbObj->next, count++) + { + struct mdbVar *mdbVar = commonVars->vars; // Will walk through the first obj's vars for(; mdbVar != NULL; mdbVar = mdbVar->next ) { if (mdbObjsContainAtleastOne(mdbObj, mdbVar->var) == FALSE) dyStringPrintf(dyPruneVars,"%s ",mdbVar->var); // var not found so add to prune list } if (dyStringLen(dyPruneVars) > 0) + { mdbObjRemoveVars(commonVars,dyStringContents(dyPruneVars)); + dyStringClear(dyPruneVars); + } + } dyStringFree(&dyPruneVars); } return commonVars; } boolean mdbByVarContains(struct mdbByVar *mdbByVar, char *val, char *obj) // Returns TRUE if var contains val, obj or both { if (mdbByVar != NULL) { struct mdbLimbVal *limbVal = NULL; struct mdbLeafObj *leafObj = NULL; if(mdbByVar->valHash != NULL && val != NULL) { limbVal = hashFindVal(mdbByVar->valHash,val); if(limbVal == NULL || limbVal->val == NULL) return FALSE; if(limbVal->objHash != NULL && obj != NULL) { leafObj = hashFindVal(limbVal->objHash,obj); if(leafObj == NULL) return FALSE; return sameOk(leafObj->obj,obj); } } for(limbVal=mdbByVar->vals;limbVal!=NULL;limbVal=limbVal->next) { if(differentStringNullOk(val,limbVal->val) != 0) continue; for(leafObj=limbVal->objs;leafObj!=NULL;leafObj=leafObj->next) { if(differentStringNullOk(obj,leafObj->obj) != 0) continue; return TRUE; } } } return FALSE; } void mdbObjReorderVars(struct mdbObj *mdbObjs, char *vars,boolean back) // Reorders vars list based upon list of vars "cell antibody treatment". Send to front or back. { //char *words[48]; char *cloneLine = cloneString(vars); char **words = NULL; int count = chopByWhite(cloneLine,NULL,0); if(count) { words = needMem(sizeof(char *) * count); count = chopByWhite(cloneLine,words,count); } else { char try = ','; count = chopByChar(cloneLine,try,NULL,0); if(count <= 0) { char try = '\t'; count = chopByChar(cloneLine,try,NULL,0); } if(count) { words = needMem(sizeof(char *) * count); count = chopByChar(cloneLine,try,words,count); } } if(count == 0) errAbort("mdbObjReorderVars cannot parse vars argument.\n"); struct mdbObj *mdbObj = NULL; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { int ix; struct mdbVar *orderedVars = NULL; struct mdbVar **varsToReorder = needMem(sizeof(struct mdbVar *) * count); struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) { ix = stringArrayIx(mdbVar->var,words,count); if(ix < 0) slAddHead(&orderedVars,mdbVar); else varsToReorder[ix] = mdbVar; } if(back) // add to front of backward list { for( ix=0; ix<count; ix++ ) { if(varsToReorder[ix] != NULL) slAddHead(&orderedVars,varsToReorder[ix]); } } slReverse(&orderedVars); if(!back) // Add to front of forward list { for( ix=count-1; ix>=0; ix-- ) { if(varsToReorder[ix] != NULL) slAddHead(&orderedVars,varsToReorder[ix]); } } mdbObj->vars = orderedVars; freeMem(varsToReorder); } freeMem(words); } int mdbObjVarCmp(const void *va, const void *vb) /* Compare to sort on full list of vars and vals. */ { const struct mdbObj *a = *((struct mdbObj **)va); const struct mdbObj *b = *((struct mdbObj **)vb); struct mdbVar* aVar = a->vars; struct mdbVar* bVar = b->vars; for(;aVar != NULL && bVar != NULL;aVar=aVar->next,bVar=bVar->next) { int ret = strcmp(aVar->var, bVar->var); if(ret != 0) return ret; ret = strcmp(aVar->val, bVar->val); if(ret != 0) return ret; } if(aVar != NULL) return -1; if(bVar != NULL) return 1; return 0; } void mdbObjsSortOnVars(struct mdbObj **mdbObjs, char *vars) // Sorts on var,val pairs vars lists: fwd case-sensitive. Assumes all objs' vars are in identical order. // Optionally give list of vars "cell antibody treatment" to sort on (bringing to front of vars lists). { // NOTE: assumes all var pairs match (e.g. every obj has cell,treatment,antibody,... and missing treatment messes up sort) if(vars != NULL) mdbObjReorderVars(*mdbObjs,vars,FALSE); slSort(mdbObjs, mdbObjVarCmp); } void mdbObjsSortOnVarPairs(struct mdbObj **mdbObjs,struct slPair *varValPairs) // Sorts on var,val pairs vars lists: fwd case-sensitive. Assumes all objs' vars are in identical order. // This method will use mdbObjsSortOnVars() { if (varValPairs == NULL) return; struct slPair *onePair = varValPairs; struct dyString *dyTerms = dyStringNew(256); dyStringAppend(dyTerms,onePair->name); onePair = onePair->next; for(; onePair != NULL; onePair = onePair->next) dyStringPrintf(dyTerms,",%s",onePair->name); mdbObjsSortOnVars(mdbObjs,dyStringContents(dyTerms)); dyStringFree(&dyTerms); } void mdbObjRemoveVars(struct mdbObj *mdbObjs, char *vars) // Prunes list of vars for an object, freeing the memory. Doesn't touch DB. { char *cloneLine = NULL; int count = 0; char **words = NULL; if(vars != NULL) { cloneLine = cloneString(vars); count = chopByWhite(cloneLine,NULL,0); words = needMem(sizeof(char *) * count); count = chopByWhite(cloneLine,words,count); } struct mdbObj *mdbObj = NULL; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { int ix; struct mdbVar *keepTheseVars = NULL; if(count == 0 && mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) { if(count == 0) ix = 1; else ix = stringArrayIx(mdbVar->var,words,count); if(ix < 0) slAddHead(&keepTheseVars,mdbVar); else { if(count != 0 && mdbObj->varHash != NULL) hashRemove(mdbObj->varHash, mdbVar->var); mdbVarFree(&mdbVar); } } if(keepTheseVars != NULL) slReverse(&keepTheseVars); mdbObj->vars = keepTheseVars; } if(words != NULL) freeMem(words); } char *mdbRemoveCommonVar(struct mdbObj *mdbList, char *var) // Removes var from set of mdbObjs but only if all that have it have a commmon val // Returns the val if removed, else NULL { char *val = NULL; struct mdbObj *mdb = NULL; for(mdb = mdbList; mdb; mdb=mdb->next) { char *thisVal = mdbObjFindValue(mdb,var); if (thisVal == NULL) // If var isn't found in some, that is okay continue; if (val == NULL) val = thisVal; else if(differentWord(val,thisVal)) return NULL; } if (val) { val = cloneString(val); for(mdb = mdbList;mdb;mdb=mdb->next) mdbObjRemoveVars(mdb,var); } return val; } void mdbObjSwapVars(struct mdbObj *mdbObjs, char *vars,boolean deleteThis) // Replaces objs' vars with var=vap pairs provided, preparing for DB update. { struct mdbObj *mdbObj = NULL; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { mdbObj->deleteThis = deleteThis; if(mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) mdbVarFree(&mdbVar); mdbObjAddVarPairs(mdbObj,vars); } } struct mdbObj *mdbObjsFilter(struct mdbObj **pMdbObjs, char *var, char *val,boolean exclude) // Filters mdb objects to only those that include/exclude vars. Optionally checks val too. // Returns removed objects { struct mdbObj *mdbObjsDropped = NULL; struct mdbObj *mdbObj=*pMdbObjs; struct mdbObj *mdbLastObj=NULL; while (mdbObj!=NULL) { boolean drop = FALSE; char *foundVal = mdbObjFindValue(mdbObj,var); if (val == NULL) drop = (!foundVal && !exclude) || (foundVal && exclude); else if (foundVal) drop = (sameWord(foundVal,val) ? exclude : !exclude); // case-insensitive else drop = !exclude; if (drop) { if (mdbLastObj==NULL) *pMdbObjs = mdbObj->next; else mdbLastObj->next = mdbObj->next; mdbObj->next = NULL; slAddHead(&mdbObjsDropped,mdbObj); if (mdbLastObj==NULL) { mdbObj = *pMdbObjs; continue; } } else mdbLastObj=mdbObj; mdbObj = mdbLastObj->next; } return mdbObjsDropped; } struct mdbObj *mdbObjsFilterTablesOrFiles(struct mdbObj **pMdbObjs,boolean tables, boolean files) // Filters mdb objects to only those that have associated tables or files. Returns removed non-table/file objects // Note: Since table/file objects overlap, there are 3 possibilites: tables, files, table && files { assert(tables || files); // Cant exclude both struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbObjsDropped = NULL; if (tables) mdbObjsDropped = mdbObjsFilter(&mdbObjs,"tableName",NULL,FALSE); if (files) { struct mdbObj *mdbObjsNoFileName = mdbObjsDropped = mdbObjsFilter(&mdbObjs,"fileName",NULL,FALSE); if (mdbObjsNoFileName) { struct mdbObj *mdbObjsNoFileIndex = mdbObjsFilter(&mdbObjsNoFileName,"fileIndex",NULL,FALSE); if (mdbObjsNoFileIndex) { mdbObjs = slCat(mdbObjs,mdbObjsNoFileName); mdbObjsDropped = slCat(mdbObjsDropped,mdbObjsNoFileIndex); } } } slSort(&mdbObjs, &mdbObjCmp); // Need to be returned to obj order slSort(&mdbObjsDropped,&mdbObjCmp); *pMdbObjs = mdbObjs; return mdbObjsDropped; } struct mdbObj *mdbObjIntersection(struct mdbObj **pA, struct mdbObj *b) // return duplicate objs from an intersection of two mdbObj lists. // List b is untouched but pA will contain the resulting intersection { struct mdbObj *mdbObj; struct hash *hashB = newHash(0); for (mdbObj = b; mdbObj != NULL; mdbObj = mdbObj->next) { hashAdd(hashB, mdbObj->obj, mdbObj); } struct mdbObj *mdbObjsDropped = NULL; struct mdbObj *mdbObjsIntersecting = NULL; struct mdbObj *mdbObjs=*pA; while (mdbObjs) { mdbObj = slPopHead(&mdbObjs); if (hashLookup(hashB, mdbObj->obj) != NULL) slAddHead(&mdbObjsIntersecting,mdbObj); else slAddHead(&mdbObjsDropped,mdbObj); } hashFree(&hashB); if (mdbObjsIntersecting) slReverse(&mdbObjsIntersecting); *pA = mdbObjsIntersecting; if (mdbObjsDropped) slReverse(&mdbObjsDropped); return mdbObjsDropped; } void mdbObjTransformToUpdate(struct mdbObj *mdbObjs, char *var, char *varType,char *val,boolean deleteThis) // Turns one or more mdbObjs into the stucture needed to add/update or delete. { struct mdbObj *mdbObj = NULL; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { mdbObj->deleteThis = deleteThis; if(mdbObj->varHash != NULL) hashFree(&mdbObj->varHash); struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) mdbVarFree(&mdbVar); if(var != NULL) { AllocVar(mdbVar); mdbVar->var = cloneString(var); mdbVar->varType = (varType==NULL?vtUnknown:mdbVarTypeStringToEnum(varType)); if(val != NULL) mdbVar->val = cloneString(val); mdbObj->vars = mdbVar; // Only one } } } struct mdbObj *mdbObjClone(const struct mdbObj *mdbObj) // Clones a single mdbObj, including hash and maintining order { if(mdbObj == NULL) return NULL; struct mdbObj *newObj; AllocVar(newObj); if(mdbObj->obj != NULL) newObj->obj = cloneString(mdbObj->obj); newObj->deleteThis = mdbObj->deleteThis; if(mdbObj->vars != NULL) { if(mdbObj->varHash != NULL) newObj->varHash = hashNew(0); struct mdbVar *mdbVar = NULL; for(mdbVar = mdbObj->vars; mdbVar != NULL; mdbVar = mdbVar->next ) { struct mdbVar *newVar = NULL; AllocVar(newVar); if(mdbVar->var != NULL) newVar->var = cloneString(mdbVar->var); if(mdbVar->val != NULL) newVar->val = cloneString(mdbVar->val); newVar->varType = mdbVar->varType; if(newVar->var != NULL && newVar->val != NULL) hashAdd(newObj->varHash, newVar->var, newVar); // pointer to struct to resolve type slAddHead(&(newObj->vars),newVar); } slReverse(&(newObj->vars)); } return newObj; } struct slName *mdbObjToSlName(struct mdbObj *mdbObjs) // Creates slNames list of mdbObjs->obj. mdbObjs remains untouched { struct slName *mdbNames = NULL; struct mdbObj *mdbObj = mdbObjs; for( ;mdbObj!=NULL; mdbObj=mdbObj->next) { slAddHead(&mdbNames,slNameNew(mdbObj->obj)); //allocates memory } slReverse(&mdbNames); return mdbNames; } // --------------- Free at last ---------------- void mdbObjsFree(struct mdbObj **mdbObjsPtr) // Frees one or more metadata objects and any contained mdbVars. Will free any hashes as well. { if(mdbObjsPtr != NULL && *mdbObjsPtr != NULL) { // free all roots struct mdbObj *mdbObj = NULL; while((mdbObj = slPopHead(mdbObjsPtr)) != NULL) { // Free hash first (shared memory) hashFree(&(mdbObj->varHash)); // free all leaves struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(&(mdbObj->vars))) != NULL) mdbVarFree(&mdbVar); // The rest of root freeMem(mdbObj->obj); freeMem(mdbObj); } freez(mdbObjsPtr); } } void mdbByVarsFree(struct mdbByVar **mdbByVarsPtr) // Frees one or more metadata vars and any contained vals and objs. Will free any hashes as well. { if(mdbByVarsPtr != NULL && *mdbByVarsPtr != NULL) { // free all roots struct mdbByVar *rootVar = NULL; while((rootVar = slPopHead(mdbByVarsPtr)) != NULL) { // Free hash first (shared memory) hashFree(&(rootVar->valHash)); // free all limbs struct mdbLimbVal *limbVal = NULL; while((limbVal = slPopHead(&(rootVar->vals))) != NULL) mdbLimbValFree(&limbVal); // The rest of root if(rootVar->var) freeMem(rootVar->var); freeMem(rootVar); } freez(mdbByVarsPtr); } } // ----------------- CGI specific routines for use with tdb ----------------- #define MDB_NOT_FOUND ((struct mdbObj *)-666) #define METADATA_NOT_FOUND ((struct mdbObj *)-999) #define MDB_OBJ_KEY "mdbObj" static struct mdbObj *metadataForTableFromTdb(struct trackDb *tdb) // Returns the metadata for a table from a tdb setting. { char *setting = trackDbSetting(tdb, MDB_METADATA_KEY); if(setting == NULL) return NULL; struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(tdb->table); AllocVar(mdbObj->vars); mdbObj->vars->var = cloneString(MDB_OBJ_TYPE); mdbObj->vars->val = cloneString("table"); mdbObj->varHash = hashNew(0); hashAdd(mdbObj->varHash, mdbObj->vars->var, mdbObj->vars); mdbObj = mdbObjAddVarPairs(mdbObj,setting); mdbObjRemoveVars(mdbObj,"tableName"); // NOTE: Special hint that the tdb metadata is used since no mdb metadata is found return mdbObj; } const struct mdbObj *metadataForTable(char *db,struct trackDb *tdb,char *table) // Returns the metadata for a table. NEVER FREE THIS STRUCT! { struct mdbObj *mdbObj = NULL; // See of the mdbObj was already built if(tdb != NULL) { mdbObj = tdbExtrasGetOrDefault(tdb, MDB_OBJ_KEY,NULL); if(mdbObj == METADATA_NOT_FOUND) // NOT in mtatbl, not in tdb metadata setting! return NULL; else if(mdbObj == MDB_NOT_FOUND) // looked mdb already and not found! return metadataForTableFromTdb(tdb); else if(mdbObj != NULL) { return mdbObj; // No reason to query the table again! } } struct sqlConnection *conn = hAllocConn(db); char *mdb = mdbTableName(conn,TRUE); // Look for sandbox name first if(tdb != NULL && tdb->table != NULL) table = tdb->table; if(mdb != NULL) mdbObj = mdbObjQueryByObj(conn,mdb,table,NULL); hFreeConn(&conn); // save the mdbObj for next time if(tdb) { if(mdbObj != NULL) tdbExtrasAddOrUpdate(tdb,MDB_OBJ_KEY,mdbObj); else { tdbExtrasAddOrUpdate(tdb,MDB_OBJ_KEY,MDB_NOT_FOUND); return metadataForTableFromTdb(tdb); // FIXME: metadata setting in TDB is soon to be obsolete } } return mdbObj; } const char *metadataFindValue(struct trackDb *tdb, char *var) // Finds the val associated with the var or retruns NULL { struct mdbObj *mdbObj = tdbExtrasGetOrDefault(tdb, MDB_OBJ_KEY,NULL); if(mdbObj == MDB_NOT_FOUND) // Note, only we if already looked for mdb (which requires db) mdbObj = metadataForTableFromTdb(tdb); if (mdbObj == NULL || mdbObj == METADATA_NOT_FOUND) return NULL; return mdbObjFindValue(mdbObj,var); } struct mdbObj *mdbObjSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit) // Search the metaDb table for objs by var and val. Can restrict by op "is", "like", "in" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. { if (var == NULL && val == NULL) errAbort("mdbObjSearch requests objects but provides no criteria.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first // Build a query string struct dyString *dyQuery = dyStringNew(512); dyStringPrintf(dyQuery,"select l1.obj, l1.var, l1.varType, l1.val from %s l1",tableName); if (var != NULL || val != NULL) dyStringPrintf(dyQuery," where exists (select l2.obj from %s l2 where l2.obj = l1.obj and ",tableName); if(var != NULL) dyStringPrintf(dyQuery,"l2.var = '%s'", var); if(var != NULL && val != NULL) dyStringAppend(dyQuery," and "); if(val != NULL) { dyStringAppend(dyQuery,"l2.val "); if(sameString(op, "in")) dyStringPrintf(dyQuery,"in (%s)", val); // Note, must be a formatted string already: 'a','b','c' or 1,2,3 else if(sameString(op, "contains") || sameString(op, "like")) dyStringPrintf(dyQuery,"like '%%%s%%'", val); else if (limit > 0 && strlen(val) != limit) dyStringPrintf(dyQuery,"like '%.*s%%'", limit, val); else dyStringPrintf(dyQuery,"= '%s'", val); } dyStringAppendC(dyQuery,')'); dyStringAppend(dyQuery," order by obj"); struct mdb *mdb = mdbLoadByQuery(conn, dyStringCannibalize(&dyQuery)); verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE); return mdbObjs; } struct mdbObj *mdbObjRepeatedSearch(struct sqlConnection *conn,struct slPair *varValPairs,boolean tables,boolean files) // Search the metaDb table for objs by var,val pairs. Uses mdbCvSearchMethod() if available. // This method will use mdbObjsQueryByVars() { struct slPair *onePair; struct dyString *dyTerms = dyStringNew(256); // Build list of terms as "var1=val1 var2=val2a,val2b,val2c var3=%val3%" for(onePair = varValPairs; onePair != NULL; onePair = onePair->next) { enum mdbCvSearchable searchBy = mdbCvSearchMethod(onePair->name); // If select is by free text then like if (searchBy == cvsSearchByMultiSelect) { // TO BE IMPLEMENTED warn("mdb search by multi-select is not yet implemented."); // The mdbVal[1] will hve to be filled cartOptionalSlNameList(cart,???) struct slName *choices = (struct slName *)onePair->val; if (slCount(choices) == 1) dyStringPrintf(dyTerms,"%s=%s ",onePair->name,choices->name); else if(choices != NULL) { // Then slNames will need to be assembled into a string in the form of a,b,c dyStringPrintf(dyTerms,"%s=%s",onePair->name,choices->name); struct slName *choice = choices->next; for(;choice!=NULL;choice=choice->next) dyStringPrintf(dyTerms,",%s",choice->name); dyStringAppendC(dyTerms,' '); } } else if (searchBy == cvsSearchBySingleSelect) dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val); else if (searchBy == cvsSearchByFreeText) dyStringPrintf(dyTerms,"%s=%%%s%% ",onePair->name,(char *)onePair->val); else if (searchBy == cvsSearchByDateRange || searchBy == cvsSearchByIntegerRange) { // TO BE IMPLEMENTED // Requires new mdbObjSearch API and more than one (char *)onePair->val warn("mdb search by date is not yet implemented."); } } // Be sure to include table of file in selections if (tables) dyStringAppend(dyTerms,"tableName=? "); if (files) dyStringAppend(dyTerms,"fileName=? "); // Build the mdbByVals struct and then select all mdbObjs in one query struct mdbByVar *mdbByVars = mdbByVarsLineParse(dyStringContents(dyTerms)); dyStringClear(dyTerms); struct mdbObj *mdbObjs = mdbObjsQueryByVars(conn,NULL,mdbByVars); // Uses master table metaDb not sandbox versions return mdbObjs; } struct slName *mdbObjNameSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files) // Search the metaDb table for objs by var and val. Can restrict by op "is", "like", "in" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. { // Note: This proves faster than getting mdbObjs then converting to slNames struct mdbObj *mdbObjs = mdbObjSearch(conn,var,val,op,limit); // May only be interested in tables or files: if (tables || files) { struct mdbObj *mdbObjsDropped = mdbObjsFilterTablesOrFiles(&mdbObjs,tables,files); mdbObjsFree(&mdbObjsDropped); } struct slName *mdbNames = mdbObjToSlName(mdbObjs); mdbObjsFree(&mdbObjs); return mdbNames; } struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files) // Search the metaDb table for vals by var. Can impose (non-zero) limit on returned string size of val // Search is via mysql, so it's case-insensitive. Return is sorted on val. { // TODO: Change this to use normal mdb struct routines? struct slName *retVal; if (!tables && !files) errAbort("mdbValSearch requests values for neither table nor file objects.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct dyString *dyQuery = dyStringNew(512); if (limit > 0) dyStringPrintf(dyQuery,"select distinct LEFT(val,%d)",limit); else dyStringPrintf(dyQuery,"select distinct val"); dyStringPrintf(dyQuery," from %s l1 where l1.var='%s' ",tableName,var); if (!tables || !files) dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", tableName,tables?"table":"file"); dyStringAppend(dyQuery," order by val"); retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery)); slNameSortCase(&retVal); return retVal; } // TODO: decide to make this public or hide it away inside the one function so far that uses it. static struct hash *cvHash = NULL; static char *cv_file() // return default location of cv.ra { static char filePath[PATH_LEN]; safef(filePath, sizeof(filePath), "%s/encode/cv.ra", hCgiRoot()); if(!fileExists(filePath)) errAbort("Error: can't locate cv.ra; %s doesn't exist\n", filePath); return filePath; } struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files) // Search the metaDb table for vals by var and returns controlled vocabulary (cv) label // (if it exists) and val as a pair. Can impose (non-zero) limit on returned string size of name. // Return is case insensitive sorted on name (label or else val). { // TODO: Change this to use normal mdb struct routines? if (!tables && !files) errAbort("mdbValSearch requests values for neither table nor file objects.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct dyString *dyQuery = dyStringNew(512); if (limit > 0) dyStringPrintf(dyQuery,"select distinct LEFT(val,%d)",limit); else dyStringPrintf(dyQuery,"select distinct val"); dyStringPrintf(dyQuery," from %s l1 where l1.var='%s' ",tableName,var); if (!tables || !files) dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", tableName,tables?"table":"file"); dyStringAppend(dyQuery," order by val"); // Establish cv hash if (cvHash == NULL) cvHash = raReadAll(cgiUsualString("ra", cv_file()), "term"); struct slPair *pairs = NULL, *pair; struct sqlResult *sr = sqlGetResult(conn, dyStringContents(dyQuery)); dyStringFree(&dyQuery); char **row; struct hash *ra = NULL; while ((row = sqlNextRow(sr)) != NULL) { AllocVar(pair); char *name = cloneString(row[0]); pair = slPairNew(name,name); // defaults the label to the metaDb.val ra = hashFindVal(cvHash,name); if (ra == NULL && sameString(var,"lab")) // FIXME: ugly special case to be removed when metaDb is cleaned up! { char *val = cloneString(name); ra = hashFindVal(cvHash,strUpper(val)); if (ra == NULL) ra = hashFindVal(cvHash,strLower(val)); freeMem(val); } if (ra != NULL) { char *label = hashFindVal(ra,"label"); if (label != NULL) { freeMem(pair->name); // Allocated when pair was created pair->name = strSwapChar(cloneString(label),'_',' '); // vestigial _ meaning space if (limit > 0 && strlen(pair->name) > limit) pair->name[limit] = '\0'; } } slAddHead(&pairs, pair); } sqlFreeResult(&sr); slPairSortCase(&pairs); return pairs; } struct hash *mdbCvTermTypeHash() // returns a hash of hashes of mdb and controlled vocabulary (cv) term types // Those terms should contain label,description,searchable,cvDefined,hidden { static struct hash *cvHashOfTermTypes = NULL; // Establish cv hash of Term Types if it doesn't already exist if (cvHashOfTermTypes == NULL) { cvHashOfTermTypes = raReadWithFilter(cv_file(), "term","type","typeOfTerm"); // Patch up an ugly inconsistency with 'cell' struct hash *cellHash = hashRemove(cvHashOfTermTypes,"cellType"); if (cellHash) { hashAdd(cvHashOfTermTypes,"cell",cellHash); hashReplace(cellHash, "term", cloneString("cell")); // spilling memory of 'cellType' val } struct hash *abHash = hashRemove(cvHashOfTermTypes,"Antibody"); if (abHash) { hashAdd(cvHashOfTermTypes,"antibody",abHash); hashReplace(abHash, "term", cloneString("antibody")); // spilling memory of 'Antibody' val } } return cvHashOfTermTypes; } struct slPair *mdbCvWhiteList(boolean searchTracks, boolean cvDefined) // returns the official mdb/controlled vocabulary terms that have been whitelisted for certain uses. // TODO: change to return struct that includes searchable! { struct slPair *whitePairs = NULL; // Get the list of term types from thew cv struct hash *termTypeHash = mdbCvTermTypeHash(); struct hashCookie hc = hashFirst(termTypeHash); struct hashEl *hEl; while ((hEl = hashNext(&hc)) != NULL) { char *setting = NULL; struct hash *typeHash = (struct hash *)hEl->val; //if (!includeHidden) { setting = hashFindVal(typeHash,"hidden"); if(SETTING_IS_ON(setting)) continue; } if (searchTracks) { setting = hashFindVal(typeHash,"searchable"); -#ifdef CV_SEARCH_SUPPORTS_FREETEXT if (setting == NULL || (differentWord(setting,"select") && differentWord(setting,"freeText"))) -#else///ifndef CV_SEARCH_SUPPORTS_FREETEXT - if (setting == NULL || differentWord(setting,"select")) // TODO: Currently only 'select's are supported -#endif///ndef CV_SEARCH_SUPPORTS_FREETEXT continue; } if (cvDefined) { setting = hashFindVal(typeHash,"cvDefined"); if(SETTING_NOT_ON(setting)) continue; } char *term = hEl->name; char *label = hashFindVal(typeHash,"label"); if (label == NULL) label = term; slPairAdd(&whitePairs, term, cloneString(label)); // Term gets cloned in slPairAdd } if (whitePairs != NULL) slPairValSortCase(&whitePairs); return whitePairs; } -#ifdef CV_SEARCH_SUPPORTS_FREETEXT enum mdbCvSearchable mdbCvSearchMethod(char *term) // returns whether the term is searchable // TODO: replace with mdbCvWhiteList() returning struct { // Get the list of term types from thew cv struct hash *termTypeHash = mdbCvTermTypeHash(); struct hash *termHash = hashFindVal(termTypeHash,term); if (termHash != NULL) { char *searchable = hashFindVal(termHash,"searchable"); if (searchable != NULL) { if (sameWord(searchable,"select")) return cvsSearchBySingleSelect; if (sameWord(searchable,"freeText")) return cvsSearchByFreeText; } } return cvsNotSearchable; } -#endif///ndef CV_SEARCH_SUPPORTS_FREETEXT const char *cvLabel(char *term) // returns cv label if term found or else just term { // Get the list of term types from thew cv struct hash *termTypeHash = mdbCvTermTypeHash(); struct hash *termHash = hashFindVal(termTypeHash,term); if (termHash != NULL) { char *label = hashFindVal(termHash,"label"); if (label != NULL) return label; } return term; }