a233918b8efa335e93b8432c08ac7d5dd2c019bd tdreszer Wed Mar 23 12:48:02 2011 -0700 Better management of mdb vars and cv terms string literals in anticilation of splitting out cv lib. Also removed the varType once and for all diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 58cea2d..7c60a36 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -223,31 +223,30 @@ fprintf(f, "%s", el->val); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ #include "ra.h" #include "hgConfig.h" #include "obscure.h" #define MDB_METADATA_KEY "metadata" #define MDB_METAOBJ_RAKEY "metaObject" #define MDB_METAVAR_RAKEY "metaVariable" -#define MDB_OBJ_TYPE "objType" // ------- (static) convert from autoSql ------- static void mdbVarFree(struct mdbVar **mdbVarPtr) // Frees a single mdbVar struct { freeMem((*mdbVarPtr)->val); freeMem((*mdbVarPtr)->var); freez(mdbVarPtr); } static void mdbVarsFree(struct mdbVar **mdbVarPtr) // Frees an mdbVars list { struct mdbVar *mdbVar = NULL; while((mdbVar = slPopHead(mdbVarPtr)) != NULL) @@ -505,69 +504,69 @@ } freeMem(words); freeMem(cloneVars); // Special for old style ENCODE metadata #define ENCODE_ALN "Alignments" #define ENCODE_RSIG "RawSignal" if(mdbObj->obj == NULL) { char * tableName = NULL; char * fileName = NULL; for(mdbVar = mdbObj->vars; mdbVar != NULL && (tableName == NULL || fileName == NULL); mdbVar = mdbVar->next) { - if(sameString(mdbVar->var,"tableName")) + if(sameString(mdbVar->var,MDB_VAR_TABLENAME)) tableName = mdbVar->val; - else if(sameString(mdbVar->var,"fileName")) + else if(sameString(mdbVar->var,MDB_VAR_FILENAME)) fileName = mdbVar->val; } mdbVar = NULL; // assertably so, but this is conditioanally created below if(tableName != NULL) { - verbose(3, "tableName:%s\n",tableName); + verbose(3, "%s:%s\n",MDB_VAR_TABLENAME,tableName); if(fileName == NULL || startsWithWordByDelimiter(tableName,'.',fileName)) { mdbObj->obj = cloneString(tableName); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); - mdbVar->val = cloneString("table"); + mdbVar->val = cloneString(MDB_OBJ_TYPE_TABLE); } else if(stringIn(ENCODE_ALN,fileName) && stringIn(ENCODE_RSIG,tableName))// Messier case where the file has "Alignment" but the table has "RawSignal" { char *tmpFilName = cloneString(fileName); strSwapStrs(tmpFilName, strlen(tmpFilName),ENCODE_ALN, ENCODE_RSIG); if(startsWithWordByDelimiter(tableName,'.',tmpFilName)) { mdbObj->obj = cloneString(tableName); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); - mdbVar->val = cloneString("table"); + mdbVar->val = cloneString(MDB_OBJ_TYPE_TABLE); } freeMem(tmpFilName); } } else if(fileName != NULL) { - verbose(3, "fileName:%s\n",fileName); + verbose(3, "%s:%s\n",MDB_VAR_FILENAME,fileName); // NOTE: that the file object is the root of the name, so both file.fastq.gz and file.fastq are same obj! mdbObj->obj = cloneFirstWordByDelimiter(fileName,'.'); AllocVar(mdbVar); mdbVar->var = cloneString(MDB_OBJ_TYPE); - mdbVar->val = cloneString("file"); + mdbVar->val = cloneString(MDB_OBJ_TYPE_FILE); } if(mdbVar != NULL) // Just determined an objType { verbose(3, "mdbObjAddVarPairs() var=val: %s=%s\n",mdbVar->var,mdbVar->val); struct mdbVar *oldVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, mdbVar->var); if(oldVar) mdbVarFree(&mdbVar); else { hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } } } @@ -591,31 +590,31 @@ char *fromTheTop = line; char*nibbledWord = cloneNextWordByDelimiter(&line,' '); if(nibbledWord == NULL || differentWord(nibbledWord,MDB_METADATA_KEY)) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); freeMem(nibbledWord); struct mdbObj *mdbObj = NULL; char*varPairs = line; nibbledWord = cloneNextWordByDelimiter(&line,' ');; if(nibbledWord == NULL) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); if(strchr(nibbledWord, '=') == NULL) // If this is not a var=val then it should be obj { AllocVar(mdbObj); mdbObj->obj = nibbledWord; - verbose(3, "metadataLineParse() obj=%s\n",mdbObj->obj); + verbose(3, "metadataLineParse() %s=%s\n",MDB_OBJ,mdbObj->obj); varPairs = line; while(strlen(line) > 0) { nibbledWord = cloneNextWordByDelimiter(&line,' ');; if(nibbledWord == NULL) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); if(*nibbledWord == '#' || strchr(nibbledWord, '=') != NULL) // IS commnet OR start of var=val pairs break; if(sameWord(nibbledWord,"delete")) mdbObj->deleteThis = TRUE; else errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); varPairs = line; freeMem(nibbledWord); @@ -639,31 +638,31 @@ struct hash* varHash; // There must not be multiple occurrances of the same var // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneLine,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneLine,words,count); verbose(3, "mdbByVarsLineParse() word count:%d\n\t%s\n",count,line); // Get obj and figure out if this is a delete line varHash = hashNew(0); // All words are expected to be var=val pairs! for (thisWord=0; thisWord<count; thisWord++) { if (strchr(words[thisWord], '=') == NULL) - errAbort("Expected 'var=val' but found '%s'. This is not properly formatted metadata:\n\t%s\n",words[thisWord],line); + errAbort("Expected '%s=%s' but found '%s'. This is not properly formatted metadata:\n\t%s\n",MDB_VAR,MDB_VAL,words[thisWord],line); // Set up var struct from 1st half of pair AllocVar(rootVar); rootVar->var = cloneNextWordByDelimiter(&(words[thisWord]),'='); rootVar->notEqual = (rootVar->var[strlen(rootVar->var)-1] == '!'); // requested not equal if (rootVar->notEqual) rootVar->var[strlen(rootVar->var)-1] = 0; // Do not try to combine repeated vars because "foo=a foo=b" is 'AND' while "foo=a,b" is 'OR'. // Fill in the val(s) from second half of pair char *val = NULL; if (words[thisWord][0] != '\0' && words[thisWord][0] != '?') // "var=?" or "var=" will query by var name only val = cloneString(words[thisWord]); if (val != NULL) { @@ -896,102 +895,90 @@ verbose(3,"Objects magic: %d Files magic: %d (%s)\n",objsMagic,fileMagic,line+strlen(MDB_MAGIC_PREFIX)); *validated = (fileMagic == objsMagic); } else verbose(3,"Can't find magic number on this file.\n"); } return mdbObjs; } // ------ Table name and creation ------ void mdbReCreate(struct sqlConnection *conn,char *tblName,boolean testOnly) // Creates ore Recreates the named mdb. { char *sqlCreate = -#define MDB_STILL_HAS_VARTYPE -#ifdef MDB_STILL_HAS_VARTYPE "# Contains metadata for a table, file or other objects.\n" "CREATE TABLE %s (\n" " obj varchar(255) not null, # Object name or ID.\n" " var varchar(255) not null, # Metadata variable name.\n" -" varType enum ('txt','binary') # OBSOLETE All vars are txt\n" -" not null default 'txt',\n" " val varchar(2048) not null, # Metadata value.\n" " #Indices\n" " PRIMARY KEY(obj,var),\n" " INDEX varKey (var,val(32),obj)\n" ")"; -#else///ifndef MDB_STILL_HAS_VARTYPE -"# Contains metadata for a table, file or other objects.\n" -"CREATE TABLE %s (\n" -" obj varchar(255) not null, # Object name or ID.\n" -" var varchar(255) not null, # Metadata variable name.\n" -" val varchar(2048) not null, # Metadata value.\n" -" #Indices\n" -" PRIMARY KEY(obj,var),\n" -" INDEX varKey (var,val(32),obj)\n" -")"; -#endif///ndef MDB_STILL_HAS_VARTYPE if(sqlTableExists(conn,tblName)) verbose(2, "Table '%s' already exists. It will be recreated.\n",tblName); struct dyString *dy = newDyString(512); dyStringPrintf(dy, sqlCreate, tblName); verbose(2, "Requesting table creation:\n%s;\n", dyStringContents(dy)); if(!testOnly) sqlRemakeTable(conn, tblName, dyStringContents(dy)); dyStringFree(&dy); } +#define HG_CONF_SANDBOX_MDB "db.metaDb" +#define HG_CONF_SANDBOX_TDB "db.trackDb" +#define SANDBOX_TDB_ROOT "trackDb" static char*mdbTableNamePreferSandbox() // returns the mdb table name or NULL if conn supplied but the table doesn't exist { -char *table = cfgOption("db.metaDb"); +char *table = cfgOption(HG_CONF_SANDBOX_MDB); if(table != NULL) return cloneString(table); // Look for trackDb name to model -char *name = cfgOption("db.trackDb"); +char *name = cfgOption(HG_CONF_SANDBOX_TDB); if(name == NULL) return cloneString(MDB_DEFAULT_NAME); // Only take the last table of a list of tables! char delimit = ','; for (table = name; (name = skipBeyondDelimit(name,delimit)) != NULL;) table = name; name = skipLeadingSpaces(table); // Divide name into root and sandbox portion char *root = NULL; char *sand = NULL; delimit = '_'; if ((sand = strchr(name,delimit)) == NULL) { delimit = '-'; sand = strchr(name,delimit); } if (sand == NULL) // No sandbox portion return cloneString(MDB_DEFAULT_NAME); root = cloneNextWordByDelimiter(&name,delimit); sand = name; // Since db.trackDb was used, make sure to swap it -if (startsWith("trackDb",root)) +if (startsWith(SANDBOX_TDB_ROOT,root)) { freeMem(root); root = cloneString(MDB_DEFAULT_NAME); } else // If discovered anything other than trackDb then give up as too obscure return cloneString(MDB_DEFAULT_NAME); // Finally ready to put it together int size = strlen(root) + strlen(sand) + 2; table = needMem(size); safef(table,size,"%s%c%s",root,delimit,sand); freeMem(root); return table; } @@ -1140,53 +1127,50 @@ int mdbObjsLoadToDb(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObjs,boolean testOnly) // Adds mdb Objs with minimal error checking { int count = 0; if (tableName == NULL) tableName = MDB_DEFAULT_NAME; if (!sqlTableExists(conn,tableName)) errAbort("mdbObjsLoadToDb attempting to load non-existent table named '%s'.\n",tableName); assert(mdbObjs != NULL); // If this is the case, then be vocal #define MDB_TEMPORARY_TAB_FILE "temporaryMdb.tab" -#define SHOW_TIMING(last,msg) { int now = clock1000(); verbose(0,"%04ldms - %s\n",(now - (last)),(msg)); (last) = now; } long lastTime = 0; -SHOW_TIMING(lastTime,"Before printing tab delimited file"); count = mdbObjPrintToTabFile(mdbObjs,MDB_TEMPORARY_TAB_FILE); // Disable keys in hopes of speeding things up. No danger since it only disables non-unique keys char query[8192]; safef(query, sizeof(query),"alter table %s disable keys",tableName); sqlUpdate(conn, query); // Quick? load -SHOW_TIMING(lastTime,"Before loading the mdb from tab-delimited file"); sqlLoadTabFile(conn, MDB_TEMPORARY_TAB_FILE, tableName, SQL_TAB_FILE_WARN_ON_ERROR|SQL_TAB_FILE_WARN_ON_WARN); -SHOW_TIMING(lastTime,"Done 'LOAD DATA INFILE' mysql command for the mdb"); // Enabling the keys again safef(query, sizeof(query),"alter table %s enable keys",tableName); sqlUpdate(conn, query); -SHOW_TIMING(lastTime,"Done re-enabling keys"); unlink(MDB_TEMPORARY_TAB_FILE); +verbose(0,"%04ldms - Done loading mdb with 'LOAD DATA INFILE' mysql command.\n",(clock1000() - lastTime)); + return count; } // ------------------ Querys ------------------- struct mdbObj *mdbObjQuery(struct sqlConnection *conn,char *table,struct mdbObj *mdbObj) // Query the metadata table by obj and optional vars and vals in metaObj struct. If mdbObj is NULL query all. // Returns new mdbObj struct fully populated and sorted in obj,var order. { // select obj,var,val where (var= [and val=]) or ([var= and] val=) order by obj,var boolean buildHash = TRUE; if(table == NULL) table = MDB_DEFAULT_NAME; if(!sqlTableExists(conn,table)) @@ -1554,35 +1538,31 @@ { FILE *tabFile = mustOpen(file, "w"); int count = 0; struct mdbObj *mdbObj = NULL; for(mdbObj=mdbObjs;mdbObj!=NULL;mdbObj=mdbObj->next) { if(mdbObj->obj == NULL) continue; struct mdbVar *mdbVar = NULL; for(mdbVar=mdbObj->vars;mdbVar!=NULL;mdbVar=mdbVar->next) { if (mdbVar->var == NULL || mdbVar->val == NULL) continue; -#ifdef MDB_STILL_HAS_VARTYPE - fprintf(tabFile, "%s\t%s\ttxt\t%s\n",mdbObj->obj,mdbVar->var,sqlEscapeString(mdbVar->val)); -#else///ifndef MDB_STILL_HAS_VARTYPE fprintf(tabFile, "%s\t%s\t%s\n",mdbObj->obj,mdbVar->var,sqlEscapeString(mdbVar->val)); -#endif///ndef MDB_STILL_HAS_VARTYPE count++; } } fclose(tabFile); return count; } void mdbByVarPrint(struct mdbByVar *mdbByVars,boolean raStyle) // prints var=val pairs and objs that go with them single lines or ra style { // Single line: // mdbVariable lucy=ethyl bestFriends lifePartners // mdbVariable lucy=ricky iLoveLucy divorces // NOT QUITE ra style @@ -2018,31 +1998,31 @@ if(words != NULL) freeMem(words); } void mdbObjRemoveHiddenVars(struct mdbObj *mdbObjs) // Prunes list of vars for mdb objs that have been declared as hidden in cv.ra typeOfTerms { // make comma delimited list of hidden vars struct hash *cvTermTypes = (struct hash *)mdbCvTermTypeHash(); struct hashEl *el, *elList = hashElListHash(cvTermTypes); struct dyString *dyRemoveVars = dyStringNew(256); for (el = elList; el != NULL; el = el->next) { struct hash *varHash = el->val; - if (SETTING_IS_ON(hashFindVal(varHash, "hidden"))) + if (SETTING_IS_ON(hashFindVal(varHash, CV_TOT_HIDDEN))) { assert(mdbCvSearchMethod(el->name) == cvsNotSearchable); // Good idea to assert but cv.ra is a user updatable file dyStringPrintf(dyRemoveVars,"%s ",el->name); } } hashElFreeList(&elList); if (dyStringLen(dyRemoveVars)) mdbObjRemoveVars(mdbObjs, dyStringContents(dyRemoveVars)); dyStringFree(&dyRemoveVars); } char *mdbRemoveCommonVar(struct mdbObj *mdbList, char *var) // Removes var from set of mdbObjs but only if all that have it have a commmon val @@ -2170,62 +2150,62 @@ for(ix=0;ix<count;ix++) { boolean notEqual = FALSE; char *val = strchr(var[ix],'='); // list may be vars alone! (var1=val1 var2 var3!=val3 var4=None) if (val != NULL) { notEqual = (*(val - 1) == '!'); if (notEqual) *(val - 1) = '\0'; *val = '\0'; val += 1; if (*val == '\0') val = NULL; } struct mdbObj *objNotMatching = mdbObjsFilter(&mdbObjsMatch,var[ix],val,notEqual); // exclude non-matching - if (noneEqualsNotFound && val != NULL && sameWord(val,"None")) + if (noneEqualsNotFound && val != NULL && sameWord(val,MDB_VAL_ENCODE_EDV_NONE)) mdbObjsMatch = slCat(mdbObjsMatch,mdbObjsFilter(&objNotMatching,var[ix],NULL,notEqual)); // 1st match on var=None, now match on var!= (var not defined) mdbObjsNoMatch = slCat(mdbObjsNoMatch,objNotMatching); // Multiple passes "cat" non-matching and destroys sort order } freeMem(var); freeMem(varsLine); if (returnMatches) { *pMdbObjs = mdbObjsNoMatch; return mdbObjsMatch; } *pMdbObjs = mdbObjsMatch; return mdbObjsNoMatch; } struct mdbObj *mdbObjsFilterTablesOrFiles(struct mdbObj **pMdbObjs,boolean tables, boolean files) // Filters mdb objects to only those that have associated tables or files. Returns removed non-table/file objects // Note: Since table/file objects overlap, there are 3 possibilites: tables, files, table && files { assert(tables || files); // Cant exclude both struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbObjsDropped = NULL; if (tables) - mdbObjsDropped = mdbObjsFilter(&mdbObjs,"objType","table",FALSE); + mdbObjsDropped = mdbObjsFilter(&mdbObjs,MDB_OBJ_TYPE,MDB_OBJ_TYPE_TABLE,FALSE); if (files) { - struct mdbObj *mdbObjsNoFileName = mdbObjsDropped = mdbObjsFilter(&mdbObjs,"fileName",NULL,FALSE); + struct mdbObj *mdbObjsNoFileName = mdbObjsDropped = mdbObjsFilter(&mdbObjs,MDB_VAR_FILENAME,NULL,FALSE); if (mdbObjsNoFileName) { - struct mdbObj *mdbObjsNoFileIndex = mdbObjsFilter(&mdbObjsNoFileName,"fileIndex",NULL,FALSE); + struct mdbObj *mdbObjsNoFileIndex = mdbObjsFilter(&mdbObjsNoFileName,MDB_VAR_FILEINDEX,NULL,FALSE); if (mdbObjsNoFileIndex) { mdbObjs = slCat(mdbObjs,mdbObjsNoFileName); mdbObjsDropped = slCat(mdbObjsDropped,mdbObjsNoFileIndex); } } } slSort(&mdbObjs, &mdbObjCmp); // Need to be returned to obj order slSort(&mdbObjsDropped,&mdbObjCmp); *pMdbObjs = mdbObjs; return mdbObjsDropped; } struct mdbObj *mdbObjIntersection(struct mdbObj **pA, struct mdbObj *b) @@ -2340,283 +2320,282 @@ // Full considers vars not defined in cv as invalids { struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); struct mdbObj *mdbObj = NULL; int invalids = 0; for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { struct mdbVar *mdbVar = NULL; for(mdbVar = mdbObj->vars;mdbVar != NULL;mdbVar=mdbVar->next) { struct hash *termHash = hashFindVal(termTypeHash,mdbVar->var); if (termHash == NULL) // No cv definition for term so no validation can be done { if (!full) continue; - if (sameString(mdbVar->var,"objType") - && (sameString(mdbVar->val,"table") || sameString(mdbVar->val,"file") || sameString(mdbVar->val,"composite"))) + if (sameString(mdbVar->var,MDB_OBJ_TYPE) + && ( sameString(mdbVar->val,MDB_OBJ_TYPE_TABLE) + || sameString(mdbVar->val,MDB_OBJ_TYPE_FILE) + || sameString(mdbVar->val,MDB_OBJ_TYPE_COMPOSITE))) continue; - printf("INVALID '%s' not defined in cv.ra: %s -> %s = %s\n",mdbVar->var,mdbObj->obj,mdbVar->var,mdbVar->val); + printf("INVALID '%s' not defined in %s: %s -> %s = %s\n",mdbVar->var,CV_FILE_NAME,mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; continue; } - char *validationRule = hashFindVal(termHash,"validate"); + char *validationRule = hashFindVal(termHash,CV_VALIDATE); if (validationRule == NULL) { - verbose(1,"ERROR in cv.ra: Term %s in typeOfTerms but has no 'validate' setting.\n",mdbVar->var); + verbose(1,"ERROR in %s: Term '%s' in typeOfTerms but has no '%s' setting.\n",CV_FILE_NAME,mdbVar->var,CV_VALIDATE); continue; // Should we errAbort? } // NOTE: Working on memory in hash but we are throwing away a comment and removing trailing spaces so that is okay strSwapChar(validationRule,'#','\0'); // Chop off any comment in the setting validationRule = trimSpaces(validationRule); // Validate should be or start with known word - if (startsWithWord("cv",validationRule)) + if (startsWithWord(CV_VALIDATE_CV,validationRule)) { - if (SETTING_NOT_ON(hashFindVal(termHash,"cvDefined"))) // Known type of term but no validation to be done + if (SETTING_NOT_ON(hashFindVal(termHash,CV_TOT_CV_DEFINED))) // Known type of term but no validation to be done { - verbose(1,"ERROR in cv.ra: Term %s says validate in cv but is not 'cvDefined'.\n",mdbVar->var); + verbose(1,"ERROR in %s: Term '%s' says validate in cv but is not '%s'.\n",CV_FILE_NAME,mdbVar->var,CV_TOT_CV_DEFINED); continue; } // cvDefined so every val should be in cv struct hash *cvTermHash = (struct hash *)mdbCvTermHash(mdbVar->var); if (cvTermHash == NULL) { - verbose(1,"ERROR in cv.ra: Term %s says validate in cv but not found as a cv term.\n",mdbVar->var); + verbose(1,"ERROR in %s: Term '%s' says validate in cv but not found as a cv term.\n",CV_FILE_NAME,mdbVar->var); continue; } if (hashFindVal(cvTermHash,mdbVar->val) == NULL) // No cv definition for term so no validation can be done { - char * orControl = skipBeyondDelimit(validationRule,' '); - if (orControl && sameString(orControl,"or None") && sameString(mdbVar->val,"None")) + if (sameString(validationRule,CV_VALIDATE_CV_OR_NONE) && sameString(mdbVar->val,MDB_VAL_ENCODE_EDV_NONE)) continue; - else if (orControl && sameString(orControl,"or control")) + else if (sameString(validationRule,CV_VALIDATE_CV_OR_CONTROL)) { - cvTermHash = (struct hash *)mdbCvTermHash("control"); + cvTermHash = (struct hash *)mdbCvTermHash(CV_TERM_CONTROL); if (cvTermHash == NULL) { - verbose(1,"ERROR in cv.ra: Term control says validate in cv but not found as a cv term.\n"); + verbose(1,"ERROR in %s: Term '%s' says validate in cv but not found as a cv term.\n",CV_FILE_NAME,CV_TERM_CONTROL); continue; } if (hashFindVal(cvTermHash,mdbVar->val) != NULL) continue; } printf("INVALID cv lookup: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } } - else if (startsWithWord("date",validationRule)) + else if (startsWithWord(CV_VALIDATE_DATE,validationRule)) { if (dateToSeconds(mdbVar->val,"%F") == 0) { printf("INVALID date: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } } - else if (startsWithWord("exists",validationRule)) + else if (startsWithWord(CV_VALIDATE_EXISTS,validationRule)) continue; // (e.g. fileName exists) Nothing to be done at this time. - else if (startsWithWord("float",validationRule)) + else if (startsWithWord(CV_VALIDATE_FLOAT,validationRule)) { char* end; double notNeeded = strtod(mdbVar->val, &end); // Don't want float, just error (However, casting to void resulted in a comple error on Ubuntu Maveric and Lucid) if ((end == mdbVar->val) || (*end != '\0')) { printf("INVALID float: %s -> %s = %s (resulting double: %g)\n",mdbObj->obj,mdbVar->var,mdbVar->val,notNeeded); invalids++; } } - else if (startsWithWord("integer",validationRule)) + else if (startsWithWord(CV_VALIDATE_INT,validationRule)) { char *p0 = mdbVar->val; if (*p0 == '-') p0++; char *p = p0; while ((*p >= '0') && (*p <= '9')) p++; if ((*p != '\0') || (p == p0)) { printf("INVALID integer: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } } - else if (startsWithWord("list:",validationRule)) + else if (startsWithWord(CV_VALIDATE_LIST,validationRule)) { validationRule = skipBeyondDelimit(validationRule,' '); if (validationRule == NULL) { - verbose(1,"ERROR in cv.ra: Invalid 'list:' for %s.\n",mdbVar->var); + verbose(1,"ERROR in %s: Invalid '%s' for %s.\n",CV_FILE_NAME,CV_VALIDATE_LIST,mdbVar->var); continue; } int count = chopByChar(validationRule, ',', NULL, 0); if (count == 1) { if (differentString(mdbVar->val,validationRule)) { printf("INVALID list '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } } else if (count > 1) { char **array = needMem(count*sizeof(char*)); chopByChar(cloneString(validationRule), ',', array, count); // Want to also trimSpaces()? No if (stringArrayIx(mdbVar->val, array, count) == -1) { printf("INVALID list '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } } else - verbose(1,"ERROR in cv.ra: Invalid 'validate list: %s' for term %s,\n",validationRule,mdbVar->var); + verbose(1,"ERROR in %s: Invalid 'validate list: %s' for term %s,\n",CV_FILE_NAME,validationRule,mdbVar->var); } - else if (startsWithWord("none",validationRule)) + else if (startsWithWord(CV_VALIDATE_NONE,validationRule)) continue; - else if (startsWithWord("regex:",validationRule)) + else if (startsWithWord(CV_VALIDATE_REGEX,validationRule)) { validationRule = skipBeyondDelimit(validationRule,' '); if (validationRule == NULL) { - verbose(1,"ERROR in cv.ra: Invalid 'regex:' for %s.\n",mdbVar->var); + verbose(1,"ERROR in %s: Invalid '%s' for %s.\n",CV_FILE_NAME,CV_VALIDATE_REGEX,mdbVar->var); continue; } // Real work ahead interpreting regex regex_t regEx; int err = regcomp(®Ex, validationRule, REG_NOSUB); if(err != 0) // Compile the regular expression so that it can be used. Use: REG_EXTENDED ? { char buffer[128]; regerror(err, ®Ex, buffer, sizeof buffer); - verbose(1,"ERROR in cv.ra: Invalid regular expression for %s - %s. %s\n",mdbVar->var,validationRule,buffer); + verbose(1,"ERROR in %s: Invalid regular expression for %s - %s. %s\n",CV_FILE_NAME,mdbVar->var,validationRule,buffer); continue; } err = regexec(®Ex, mdbVar->val, 0, NULL, 0); if (err != 0) { //char buffer[128]; //regerror(err, ®Ex, buffer, sizeof buffer); printf("INVALID regex '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } regfree(®Ex); } else - verbose(1,"ERROR in cv.ra: Unknown validationRule rule '%s' for term %s.\n",validationRule,mdbVar->var); + verbose(1,"ERROR in %s: Unknown validationRule rule '%s' for term %s.\n",CV_FILE_NAME,validationRule,mdbVar->var); } } return invalids; } #define EXPERIMENTS_TABLE "hgFixed.encodeExp" -#define EDV_VAR_NAME "expVars" -#define EXP_ID_NAME "expId" -#define COMPOSITE_VAR "composite" -#define DCC_ACCESSION "dccAccession" struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName,struct mdbObj **pMdbObjs, int warn,boolean createExpIfNecessary) // Organizes objects into experiments and validates experiment IDs. Will add/update the ids in the structures. // If warn=1, then prints to stdout all the experiments/obs with missing or wrong expIds; // warn=2, then print line for each obj with expId or warning. // createExpIfNecessary means go ahead and add to the hgFixed.encodeExp table to get an ID // Returns a new set of mdbObjs that is what can (and should) be used to update the mdb via mdbObjsSetToDb(). { if (pMdbObjs == NULL || *pMdbObjs == NULL) return 0; struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbProcessedObs = NULL; struct mdbObj *mdbUpdateObjs = NULL; /* Here is what "experimentify" does from "mdbPrint -encodeExp" and "mdbUpdate -encodeExp": - Uses normal selection methods to get a set of objects (e.g. one composite worth) or all objs. (in mdbPrint and mdbUpdate) - This API: - Breaks up and walks through set of objects composite by composite - Looks up EDVs (expiment defining vars) for composite. Currently these are defined in the mdb under objType=composite expVars= (e.g. obj=wgEncodeBroadHistone objType=composite expVars=lab,dataType,cell,antibody) FIXME: Nice to add white-list to cv.ra typeOfTerms - Breaks up and walks through composite objects exp by exp (handle's "None"s gracefully) - Determines what expId should be. - Creates new mdbObjs list of updates needed to put expId and dccAccession into the mdb. - From "mdbPrint", this API warns of mismatches or missing expIds - From "mdbUpdate" (not -test) then that utility will update the mdb from this API's return structs. If -test, will reveal what would be updated. */ // Sort all objects by composite, so that we handle composite by composite -mdbObjsSortOnVars(&mdbObjs, COMPOSITE_VAR); +mdbObjsSortOnVars(&mdbObjs, MDB_VAR_COMPOSITE); struct dyString *dyVars = dyStringNew(256); while(mdbObjs != NULL) { // Work on a composite at a time char *compName = NULL; while(mdbObjs != NULL && compName == NULL) { - compName = mdbObjFindValue(mdbObjs,COMPOSITE_VAR); + compName = mdbObjFindValue(mdbObjs,MDB_VAR_COMPOSITE); if (compName == NULL) { - verbose(1, "Object '%s' has no %s defined.\n",mdbObjs->obj,COMPOSITE_VAR); + verbose(1, "Object '%s' has no %s defined.\n",mdbObjs->obj,MDB_VAR_COMPOSITE); mdbProcessedObs = slCat(mdbProcessedObs,slPopHead(&mdbObjs)); continue; } } - struct mdbObj *mdbCompositeObjs = mdbObjsFilter(&mdbObjs, COMPOSITE_VAR, compName,TRUE); + struct mdbObj *mdbCompositeObjs = mdbObjsFilter(&mdbObjs, MDB_VAR_COMPOSITE, compName,TRUE); // --- At this point we have nibbled off a composite worth of objects from the full set of objects // Find the composite obj if it exists - struct mdbObj *compObj = mdbObjsFilter(&mdbCompositeObjs, "objType", "composite",TRUE); + struct mdbObj *compObj = mdbObjsFilter(&mdbCompositeObjs, MDB_OBJ_TYPE, MDB_OBJ_TYPE_COMPOSITE,TRUE); if (compObj == NULL) // May be NULL if mdbObjs passed in was produced by too narrow of selection criteria { dyStringClear(dyVars); - dyStringPrintf(dyVars,"composite=%s %s=", compName,EDV_VAR_NAME); + dyStringPrintf(dyVars,"%s=%s %s=", MDB_VAR_COMPOSITE, compName,MDB_VAR_ENCODE_EDVS); struct mdbByVar *mdbByVars = mdbByVarsLineParse(dyStringContents(dyVars)); compObj = mdbObjsQueryByVars(conn,tableName,mdbByVars); } // Obtain experiment defining variables for the composite dyStringClear(dyVars); if (compObj != NULL) { - char *expVars = mdbObjFindValue(compObj,EDV_VAR_NAME); + char *expVars = mdbObjFindValue(compObj,MDB_VAR_ENCODE_EDVS); if (expVars) dyStringAppend(dyVars, expVars); // expVars in form of "var1 var2 var3" } if (dyStringLen(dyVars) == 0) { // figure them out? // NOTE: Kate wants white list of EDVs from the cv. Wranglers satisfied with defining them in an mdbObj of objType=composite // Walk through the mdbCompositeObjs looking for matching vars. - verbose(1, "There are no experiment defining variables established for this composite. Add them to obj %s => var:%s.\n",compName,EDV_VAR_NAME); + verbose(1, "There are no experiment defining variables established for this %s. Add them to obj %s => var:%s.\n", + MDB_VAR_COMPOSITE, compName,MDB_VAR_ENCODE_EDVS); mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs); mdbCompositeObjs = NULL; continue; } // Parse into individual Experiment Defining Variables (no vals at the composite level) if (strchr(dyStringContents(dyVars), ',') != NULL) // Tolerate delimit by commas strSwapChar(dyStringContents(dyVars),',',' '); else if (strchr(dyStringContents(dyVars), ';') != NULL) // Tolerate delimit by semicolons strSwapChar(dyStringContents(dyVars),';',' '); struct slName *compositeEdvs = slNameListFromString(dyStringContents(dyVars), ' '); assert(slCount(compositeEdvs) > 0); if (warn > 0) - printf("Composite '%s' with %d objects has %d EDVs(%s): [%s].\n",compName,slCount(mdbCompositeObjs),slCount(compositeEdvs),EDV_VAR_NAME,dyStringContents(dyVars)); // Set the stage + printf("Composite '%s' with %d objects has %d EDVs(%s): [%s].\n",compName,slCount(mdbCompositeObjs), + slCount(compositeEdvs),MDB_VAR_ENCODE_EDVS,dyStringContents(dyVars)); // Set the stage // Organize composite objs by EDVs dyStringPrintf(dyVars, " view replicate "); // Allows for nicer sorted list char *edvSortOrder = cloneString(dyStringContents(dyVars)); // Walk through objs for an exp as defined by EDVs int expCount=0; // Count of experiments in composite int expMissing=0; // Count of experiments with missing expId int expObjsCount=0; // Total of all experimental object accoss the composite int expMax=0; // Largest experiment (in number of objects) int expMin=999; // Smallest experiment (in number of objects) while(mdbCompositeObjs != NULL) { // Must sort each cycle, because sort order is lost during mdbObjs FilterByVars(); mdbObjsSortOnVars(&mdbCompositeObjs, edvSortOrder); @@ -2629,32 +2608,32 @@ int valsFound = 0; for(;var!=NULL;var=var->next) { char *val = mdbObjFindValue(mdbCompositeObjs,var->name); // Looking at first obj in queue if (val) { valsFound++; dyStringPrintf(filterVars,"%s=%s ",var->name,val); edvVar = mdbVarAdd(&edvVars, var->name,val); dyStringPrintf(dyVars,"%s=%s ",edvVar->var,edvVar->val); } else { if (differentWord(var->name,ENCODE_EXP_FIELD_ORGANISM)) // Does not go into EDV's sent to encodeExp table { - dyStringPrintf(filterVars,"%s=None ",var->name); - edvVar = mdbVarAdd(&edvVars, var->name,"None"); + dyStringPrintf(filterVars,"%s=%s ",var->name,MDB_VAL_ENCODE_EDV_NONE); + edvVar = mdbVarAdd(&edvVars, var->name,MDB_VAL_ENCODE_EDV_NONE); dyStringPrintf(dyVars,"%s=%s ",edvVar->var,edvVar->val); } } } dyStringContents(dyVars)[dyStringLen(dyVars) -1] = '\0'; // Nicer printing is all if (valsFound == 0) { verbose(1, "There are no experiment defining variables for this object '%s'.\n",mdbCompositeObjs->obj); slAddHead(&mdbProcessedObs,slPopHead(&mdbCompositeObjs)); // We're done with this one dyStringFree(&filterVars); mdbVarsFree(&edvVars); continue; } @@ -2703,117 +2682,117 @@ printf("Experiment %s has %d objects based upon %d EDVs: [%s].\n",experimentId,slCount(mdbExpObjs),valsFound,dyStringContents(dyVars)); // Set the stage } // Now we can walk through each obj in experiment and determine if it has the coorect expId int foundId = FALSE; int errors = objsInExp; if (expMax < objsInExp) expMax = objsInExp; if (expMin > objsInExp) expMin = objsInExp; while(mdbExpObjs != NULL) { struct mdbObj *obj = slPopHead(&mdbExpObjs); { // NOTE: This list could expand but we expect only tables and files to be objs in an experiment - char *objType = mdbObjFindValue(obj,"objType"); - assert(objType != NULL && (sameString(objType,"table") || sameString(objType,"file"))); + char *objType = mdbObjFindValue(obj,MDB_OBJ_TYPE); + assert(objType != NULL && (sameString(objType,MDB_OBJ_TYPE_TABLE) || sameString(objType,MDB_OBJ_TYPE_FILE))); } boolean updateObj = FALSE; - char *val = mdbObjFindValue(obj,EXP_ID_NAME); + char *val = mdbObjFindValue(obj,MDB_VAR_ENCODE_EXP_ID); if (val != NULL) { foundId = TRUE; // warn==1 will give only 1 exp wide error if no individual errors. NOTE: would be nice if those with expId sorted to beginning, but can't have everything. int thisId = atoi(val); if (thisId == expId && expId != -1) { errors--; // One less error if (warn > 1) // NOTE: Could give more info for each obj as per wrangler's desires - printf(" %s obj='%s' has %s set.\n",experimentId,obj->obj,EXP_ID_NAME); + printf(" %s obj='%s' has %s set.\n",experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID); } else { updateObj = TRUE; if (warn > 0) - printf(" %s obj='%s' has bad %s=%s.\n",experimentId,obj->obj,EXP_ID_NAME,val); + printf(" %s obj='%s' has bad %s=%s.\n",experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID,val); } } else { updateObj = (expId != -1); if ((foundId && warn > 0) || warn > 1) - printf(" %s obj='%s' has no %s.\n",experimentId,obj->obj,EXP_ID_NAME); + printf(" %s obj='%s' has no %s.\n",experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID); } // This object needs to be updated. if (updateObj) { - mdbObjSetVarInt(obj,EXP_ID_NAME,expId); - struct mdbObj *newObj = mdbObjCreate(obj->obj,EXP_ID_NAME, experimentId); + mdbObjSetVarInt(obj,MDB_VAR_ENCODE_EXP_ID,expId); + struct mdbObj *newObj = mdbObjCreate(obj->obj,MDB_VAR_ENCODE_EXP_ID, experimentId); assert(exp != NULL); - mdbObjSetVar(newObj,DCC_ACCESSION,exp->accession); + if (exp->accession != NULL) + mdbObjSetVar(newObj,MDB_VAR_DCC_ACCESSION,exp->accession); slAddHead(&mdbUpdateObjs,newObj); } slAddHead(&mdbProcessedObs,obj); } // Done with one experiment encodeExpFree(&exp); if (!foundId && errors > 0) { expMissing++; if (warn > 0) - printf(" %s all %d objects are missing an %s.\n",experimentId,objsInExp,EXP_ID_NAME); + printf(" %s all %d objects are missing an %s.\n",experimentId,objsInExp,MDB_VAR_ENCODE_EXP_ID); } } // Done with one composite if (expCount > 0) printf("Composite '%s' has %d recognizable experiment%s with %d missing an %s.\n objects/experiment: min:%d max:%d mean:%lf.\n", - compName,expCount,(expCount != 1?"s":""),expMissing,EXP_ID_NAME,expMin,expMax,((double)expObjsCount/expCount)); + compName,expCount,(expCount != 1?"s":""),expMissing,MDB_VAR_ENCODE_EXP_ID,expMin,expMax,((double)expObjsCount/expCount)); if (edvSortOrder != NULL) freeMem(edvSortOrder); slNameFreeList(compositeEdvs); } // Done with all composites dyStringFree(&dyVars); *pMdbObjs = mdbProcessedObs; return mdbUpdateObjs; } boolean mdbObjIsEncode(struct mdbObj *mdb) // Return true if this metaDb object is for ENCODE { -char *project = mdbObjFindValue(mdb, "project"); -if (sameOk(project, ENCODE_MDB_PROJECT)) - return TRUE; -return FALSE; +return mdbObjContains(mdb, MDB_VAR_PROJECT, MDB_VAL_ENCODE_PROJECT); // Could be more stringent: -//return (mdbObjFindValue(mdbObj, "lab") != NULL && mdbObjFindValue(mdbObj, "dataType") != NULL && mdbObjFindValue(mdbObj, "subId")); +//return ( mdbObjContains(mdbObj, MDB_VAR_LAB, NULL) +// && mdbObjContains(mdbObj, MDB_VAR_DATATYPE, NULL) +// && mdbObjContains(mdbObj, MDB_VAR_ENCODE_SUBID,NULL)); } boolean mdbObjInComposite(struct mdbObj *mdb, char *composite) // Return true if metaDb object is in specified composite. // If composite is NULL, always return true { -if (composite == NULL || sameOk(composite, mdbObjFindValue(mdb, "composite"))) +if (composite == NULL || sameOk(composite, mdbObjFindValue(mdb, MDB_VAR_COMPOSITE))) return TRUE; return FALSE; } // --------------- Free at last ---------------- void mdbObjsFree(struct mdbObj **mdbObjsPtr) // Frees one or more metadata objects and any contained mdbVars. Will free any hashes as well. { if(mdbObjsPtr != NULL && *mdbObjsPtr != NULL) { // free all roots struct mdbObj *mdbObj = NULL; while((mdbObj = slPopHead(mdbObjsPtr)) != NULL) { @@ -2862,35 +2841,35 @@ // ----------------- CGI specific routines for use with tdb ----------------- #define MDB_NOT_FOUND ((struct mdbObj *)-666) #define METADATA_NOT_FOUND ((struct mdbObj *)-999) #define MDB_OBJ_KEY "mdbObj" static struct mdbObj *metadataForTableFromTdb(struct trackDb *tdb) // Returns the metadata for a table from a tdb setting. { char *setting = trackDbSetting(tdb, MDB_METADATA_KEY); if(setting == NULL) return NULL; struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(tdb->table); AllocVar(mdbObj->vars); mdbObj->vars->var = cloneString(MDB_OBJ_TYPE); -mdbObj->vars->val = cloneString("table"); +mdbObj->vars->val = cloneString(MDB_OBJ_TYPE_TABLE); mdbObj->varHash = hashNew(0); hashAdd(mdbObj->varHash, mdbObj->vars->var, mdbObj->vars); mdbObj = mdbObjAddVarPairs(mdbObj,setting); -mdbObjRemoveVars(mdbObj,"tableName"); // NOTE: Special hint that the tdb metadata is used since no mdb metadata is found +mdbObjRemoveVars(mdbObj,MDB_VAR_TABLENAME); // NOTE: Special hint that the tdb metadata is used since no mdb metadata is found return mdbObj; } const struct mdbObj *metadataForTable(char *db,struct trackDb *tdb,char *table) // Returns the metadata for a table. NEVER FREE THIS STRUCT! { struct mdbObj *mdbObj = NULL; // See of the mdbObj was already built if(tdb != NULL) { mdbObj = tdbExtrasGetOrDefault(tdb, MDB_OBJ_KEY,NULL); if(mdbObj == METADATA_NOT_FOUND) // NOT in mtatbl, not in tdb metadata setting! return NULL; else if(mdbObj == MDB_NOT_FOUND) // looked mdb already and not found! @@ -2975,51 +2954,51 @@ verbose(3, "rows (vars) returned: %d\n",slCount(mdb)); struct mdbObj *mdbObjs = mdbObjsLoadFromMemory(&mdb,TRUE); return mdbObjs; } struct mdbObj *mdbObjRepeatedSearch(struct sqlConnection *conn,struct slPair *varValPairs,boolean tables,boolean files) // Search the metaDb table for objs by var,val pairs. Uses mdbCvSearchMethod() if available. // This method will use mdbObjsQueryByVars() { struct slPair *onePair; struct dyString *dyTerms = dyStringNew(256); // Build list of terms as "var1=val1 var2=val2a,val2b,val2c var3=%val3%" for(onePair = varValPairs; onePair != NULL; onePair = onePair->next) { - if (isEmpty(((char *)(onePair->val)))) // NOTE: All the commas are needed to get the macro to do the right thing + if (isEmpty(((char *)(onePair->val)))) // NOTE: All the parens are needed to get the macro to do the right thing continue; enum mdbCvSearchable searchBy = mdbCvSearchMethod(onePair->name); if (searchBy == cvsSearchBySingleSelect || searchBy == cvsSearchByMultiSelect) // multiSelect val will be filled with a comma delimited list dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val); else if (searchBy == cvsSearchByFreeText) // If select is by free text then like dyStringPrintf(dyTerms,"%s=%%%s%% ",onePair->name,(char *)onePair->val); - else if (sameWord(onePair->name,"composite")) // special case. Not directly searchable by UI but indirectly and will show up here. + else if (sameWord(onePair->name,MDB_VAR_COMPOSITE)) // special case. Not directly searchable by UI but indirectly and will show up here. dyStringPrintf(dyTerms,"%s=%s ",onePair->name,(char *)onePair->val); else if (searchBy == cvsSearchByDateRange || searchBy == cvsSearchByIntegerRange) { // TO BE IMPLEMENTED // Requires new mdbObjSearch API and more than one (char *)onePair->val warn("mdb search by date or number is not yet implemented."); } } // Be sure to include table or file in selections if (tables) - dyStringAppend(dyTerms,"objType=table "); + dyStringPrintf(dyTerms,"%s=%s ",MDB_OBJ_TYPE,MDB_OBJ_TYPE_TABLE); if (files) - dyStringAppend(dyTerms,"fileName=? "); + dyStringPrintf(dyTerms,"%s=? ",MDB_VAR_FILENAME); // Build the mdbByVals struct and then select all mdbObjs in one query struct mdbByVar *mdbByVars = mdbByVarsLineParse(dyStringContents(dyTerms)); dyStringClear(dyTerms); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct mdbObj *mdbObjs = mdbObjsQueryByVars(conn,tableName,mdbByVars); return mdbObjs; } struct slName *mdbObjNameSearch(struct sqlConnection *conn, char *var, char *val, char *op, int limit, boolean tables, boolean files) // Search the metaDb table for objs by var and val. Can restrict by op "is", "like", "in" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. { // Note: This proves faster than getting mdbObjs then converting to slNames struct mdbObj *mdbObjs = mdbObjSearch(conn,var,val,op,limit); @@ -3045,234 +3024,250 @@ if (!tables && !files) errAbort("mdbValSearch requests values for neither table nor file objects.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct dyString *dyQuery = dyStringNew(512); if (limit > 0) dyStringPrintf(dyQuery,"select distinct LEFT(val,%d)",limit); else dyStringPrintf(dyQuery,"select distinct val"); dyStringPrintf(dyQuery," from %s l1 where l1.var='%s' ",tableName,var); if (!tables || !files) dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", - tableName,tables?"table":"file"); + tableName,tables?MDB_OBJ_TYPE_TABLE:MDB_OBJ_TYPE_FILE); dyStringAppend(dyQuery," order by val"); retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery)); slNameSortCase(&retVal); return retVal; } +// ------------ CONTROLLED VOCABULARY APIs -------------- +// CV Defines that should not necessarily be public + +// CV UGLY TERMS (NOTE: These should be hiddne inside cv.c APIS and callers should use non-UGLY terms) +#define CV_UGLY_TOT_CELLTYPE "cellType" +#define CV_UGLY_TERM_CELL_LINE "Cell Line" +#define CV_UGLY_TERM_ANTIBODY "Antibody" + +// Type of Terms searchable defines +#define CV_SEARCHABLE "searchable" +#define CV_SEARCHABLE_SINGLE_SELECT "select" +#define CV_SEARCHABLE_MULTI_SELECT "multiSelect" +#define CV_SEARCHABLE_FREE_TEXT "freeText" + // TODO: decide to make this public or hide it away inside the one function so far that uses it. static char *cv_file() // return default location of cv.ra { static char filePath[PATH_LEN]; char *root = hCgiRoot(); if (root == NULL || *root == 0) root = "/usr/local/apache/cgi-bin/"; // Make this check out sandboxes? // root = "/cluster/home/tdreszer/kent/src/hg/makeDb/trackDb/cv/alpha/"; // Make this check out sandboxes? -safef(filePath, sizeof(filePath), "%s/encode/cv.ra", root); +safef(filePath, sizeof(filePath), "%s/encode/%s", root,CV_FILE_NAME); if(!fileExists(filePath)) - errAbort("Error: can't locate cv.ra; %s doesn't exist\n", filePath); + errAbort("Error: can't locate %s; %s doesn't exist\n", CV_FILE_NAME, filePath); return filePath; } const struct hash *mdbCvTermHash(char *term) // returns a hash of hashes of a term which should be defined in cv.ra // NOTE: in static memory: DO NOT FREE { static struct hash *cvHashOfHashOfHashes = NULL; -if (sameString(term,"cell")) - term = "Cell Line"; -else if (sameString(term,"antibody")) - term = "Antibody"; +if (sameString(term,MDB_VAR_CELL)) + term = CV_UGLY_TERM_CELL_LINE; +else if (sameString(term,MDB_VAR_ANTIBODY)) + term = CV_UGLY_TERM_ANTIBODY; if (cvHashOfHashOfHashes == NULL) cvHashOfHashOfHashes = hashNew(0); struct hash *cvTermHash = hashFindVal(cvHashOfHashOfHashes,term); // Establish cv hash of Term Types if it doesn't already exist if (cvTermHash == NULL) { - cvTermHash = raReadWithFilter(cv_file(), "term","type",term); + cvTermHash = raReadWithFilter(cv_file(), CV_TERM,CV_TYPE,term); if (cvTermHash != NULL) hashAdd(cvHashOfHashOfHashes,term,cvTermHash); } return cvTermHash; } struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags, boolean tables, boolean files) // Search the metaDb table for vals by var and returns val (as pair->name) and controlled vocabulary (cv) label // (if it exists) (as pair->val). Can impose (non-zero) limit on returned string size of name. // if requested, return cv tag instead of mdb val. If requested, limit to table objs or file objs // Return is case insensitive sorted on label (cv label or else val). { // TODO: Change this to use normal mdb struct routines? if (!tables && !files) errAbort("mdbValSearch requests values for neither table nor file objects.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first struct dyString *dyQuery = dyStringNew(512); if (limit > 0) dyStringPrintf(dyQuery,"select distinct LEFT(val,%d)",limit); else dyStringPrintf(dyQuery,"select distinct val"); dyStringPrintf(dyQuery," from %s l1 where l1.var='%s' ",tableName,var); if (!tables || !files) dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", - tableName,tables?"table":"file"); + tableName,tables?MDB_OBJ_TYPE_TABLE:MDB_OBJ_TYPE_FILE); struct hash *varHash = (struct hash *)mdbCvTermHash(var); struct slPair *pairs = NULL; struct sqlResult *sr = sqlGetResult(conn, dyStringContents(dyQuery)); dyStringFree(&dyQuery); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *val = row[0]; char *label = NULL; if (varHash != NULL) { struct hash *valHash = hashFindVal(varHash,val); if (valHash != NULL) { - label = cloneString(hashOptionalVal(valHash,"label",row[0])); + label = cloneString(hashOptionalVal(valHash,CV_LABEL,row[0])); if (tags) { - char *tag = hashFindVal(valHash,"tag"); + char *tag = hashFindVal(valHash,CV_TAG); if (tag != NULL) val = tag; } } } if (label == NULL); label = cloneString(row[0]); label = strSwapChar(label,'_',' '); // vestigial _ meaning space slPairAdd(&pairs,val,label); } sqlFreeResult(&sr); slPairValSortCase(&pairs); return pairs; } const struct hash *mdbCvTermTypeHash() // returns a hash of hashes of mdb and controlled vocabulary (cv) term types // Those terms should contain label,description,searchable,cvDefined,hidden // NOTE: in static memory: DO NOT FREE { // NOTE: "typeOfTerm" is specialized, so don't use mdbCvTermHash static struct hash *cvHashOfTermTypes = NULL; // Establish cv hash of Term Types if it doesn't already exist if (cvHashOfTermTypes == NULL) { - cvHashOfTermTypes = raReadWithFilter(cv_file(), "term","type","typeOfTerm"); + cvHashOfTermTypes = raReadWithFilter(cv_file(), CV_TERM,CV_TYPE,CV_TOT); // Patch up an ugly inconsistency with 'cell' - struct hash *cellHash = hashRemove(cvHashOfTermTypes,"cellType"); + struct hash *cellHash = hashRemove(cvHashOfTermTypes,CV_UGLY_TOT_CELLTYPE); if (cellHash) { - hashAdd(cvHashOfTermTypes,"cell",cellHash); - hashReplace(cellHash, "term", cloneString("cell")); // spilling memory of 'cellType' val + hashAdd(cvHashOfTermTypes,CV_TERM_CELL,cellHash); + hashReplace(cellHash, CV_TERM, cloneString(CV_TERM_CELL)); // spilling memory of 'cellType' val } - struct hash *abHash = hashRemove(cvHashOfTermTypes,"Antibody"); + struct hash *abHash = hashRemove(cvHashOfTermTypes,CV_UGLY_TERM_ANTIBODY); if (abHash) { - hashAdd(cvHashOfTermTypes,"antibody",abHash); - hashReplace(abHash, "term", cloneString("antibody")); // spilling memory of 'Antibody' val + hashAdd(cvHashOfTermTypes,CV_TERM_ANTIBODY,abHash); + hashReplace(abHash, CV_TERM, cloneString(CV_TERM_ANTIBODY)); // spilling memory of 'Antibody' val } } return cvHashOfTermTypes; } struct slPair *mdbCvWhiteList(boolean searchTracks, boolean cvDefined) // returns the official mdb/controlled vocabulary terms that have been whitelisted for certain uses. // TODO: change to return struct that includes searchable! { struct slPair *whitePairs = NULL; // Get the list of term types from thew cv struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); struct hashCookie hc = hashFirst(termTypeHash); struct hashEl *hEl; while ((hEl = hashNext(&hc)) != NULL) { char *setting = NULL; struct hash *typeHash = (struct hash *)hEl->val; //if (!includeHidden) { - setting = hashFindVal(typeHash,"hidden"); + setting = hashFindVal(typeHash,CV_TOT_HIDDEN); if(SETTING_IS_ON(setting)) continue; } if (searchTracks) { - setting = hashFindVal(typeHash,"searchable"); + setting = hashFindVal(typeHash,CV_SEARCHABLE); if (setting == NULL - || (differentWord(setting,"select") && differentWord(setting,"multiSelect") && differentWord(setting,"freeText"))) + || ( differentWord(setting,CV_SEARCHABLE_SINGLE_SELECT) + && differentWord(setting,CV_SEARCHABLE_MULTI_SELECT) + && differentWord(setting,CV_SEARCHABLE_FREE_TEXT))) continue; } if (cvDefined) { - setting = hashFindVal(typeHash,"cvDefined"); + setting = hashFindVal(typeHash,CV_TOT_CV_DEFINED); if(SETTING_NOT_ON(setting)) continue; } char *term = hEl->name; - char *label = hashFindVal(typeHash,"label"); + char *label = hashFindVal(typeHash,CV_LABEL); if (label == NULL) label = term; slPairAdd(&whitePairs, term, cloneString(label)); // Term gets cloned in slPairAdd } if (whitePairs != NULL) slPairValSortCase(&whitePairs); return whitePairs; } enum mdbCvSearchable mdbCvSearchMethod(char *term) // returns whether the term is searchable // TODO: replace with mdbCvWhiteList() returning struct { // Get the list of term types from thew cv struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); struct hash *termHash = hashFindVal(termTypeHash,term); if (termHash != NULL) { - char *searchable = hashFindVal(termHash,"searchable"); + char *searchable = hashFindVal(termHash,CV_SEARCHABLE); if (searchable != NULL) { - if (sameWord(searchable,"select")) + if (sameWord(searchable,CV_SEARCHABLE_SINGLE_SELECT)) return cvsSearchBySingleSelect; - if (sameWord(searchable,"multiSelect")) + if (sameWord(searchable,CV_SEARCHABLE_MULTI_SELECT)) return cvsSearchByMultiSelect; - if (sameWord(searchable,"freeText")) + if (sameWord(searchable,CV_SEARCHABLE_FREE_TEXT)) return cvsSearchByFreeText; //if (sameWord(searchable,"date")) // return cvsSearchByDateRange; //if (sameWord(searchable,"numeric")) // return cvsSearchByIntegerRange; } } return cvsNotSearchable; } const char *cvLabel(char *term) // returns cv label if term found or else just term { // Get the list of term types from thew cv struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); struct hash *termHash = hashFindVal(termTypeHash,term); if (termHash != NULL) { - char *label = hashFindVal(termHash,"label"); + char *label = hashFindVal(termHash,CV_LABEL); if (label != NULL) return label; } return term; }