a233918b8efa335e93b8432c08ac7d5dd2c019bd tdreszer Wed Mar 23 12:48:02 2011 -0700 Better management of mdb vars and cv terms string literals in anticilation of splitting out cv lib. Also removed the varType once and for all diff --git src/hg/inc/mdb.h src/hg/inc/mdb.h index 3dbe230..b1bfd8a 100644 --- src/hg/inc/mdb.h +++ src/hg/inc/mdb.h @@ -80,30 +80,64 @@ #define mdbTabOut(el,f) mdbOutput(el,f,'\t','\n'); /* Print out mdb as a line in a tab-separated file. */ #define mdbCommaOut(el,f) mdbOutput(el,f,',',','); /* Print out mdb as a comma separated list including final comma. */ void mdbJsonOutput(struct mdb *el, FILE *f); /* Print out mdb in JSON format. */ /* -------------------------------- End autoSql Generated Code -------------------------------- */ #include "trackDb.h" #define MDB_DEFAULT_NAME "metaDb" +// The three mdb tuples +#define MDB_OBJ "obj" +#define MDB_VAR "var" +#define MDB_VAL "val" + +// OBJECT TYPES +#define MDB_OBJ_TYPE "objType" +#define MDB_OBJ_TYPE_TABLE "table" +#define MDB_OBJ_TYPE_FILE "file" +#define MDB_OBJ_TYPE_COMPOSITE "composite" + +// WELL KNOWN MDB VARS +#define MDB_VAR_COMPOSITE MDB_OBJ_TYPE_COMPOSITE +#define MDB_VAR_ANTIBODY "antibody" +#define MDB_VAR_CELL "cell" +#define MDB_VAR_LAB "lab" +#define MDB_VAR_DATATYPE "dataType" +#define MDB_VAR_TABLENAME "tableName" +#define MDB_VAR_FILENAME "fileName" +#define MDB_VAR_FILEINDEX "fileIndex" +#define MDB_VAR_DCC_ACCESSION "dccAccession" +#define MDB_VAR_PROJECT "project" + +// SPECIAL MDB VALS +#define MDB_VAL_SURE_TO_NOT_MATCH "{nothing}" + +// ENCODE Specific (at least for now) +#define MDB_VAL_ENCODE_PROJECT "wgEncode" +#define MDB_VAR_ENCODE_SUBID "subId" +#define MDB_VAR_ENCODE_EDVS "expVars" +#define MDB_VAR_ENCODE_EXP_ID "expId" +#define MDB_VAL_ENCODE_EDV_NONE "None" + + // The mdb holds metadata primarily for tables. // Many types of objects could be supported, though currently files are the only other type. // It is easy to imagine using the current mdb to support hierarchical trees of metadata. // For example a composite type object called "myComposte" could have metadata that is valid for // all tables that have the var=composite val=myComposte metadata defined. // // There are 2 ways to look at the metadata: By Obj: obj->[var=val] and By Var: var->[val->[obj]]. // By Obj: an object has many var/val pairs but only one val for each unique var. Querying by // object creates a single (2 level) one to many structure. // By Var: a variable has many possible values and each value may be defined for more than one object. // Therefore, querying by var results in a (3 level) one to many to many structure. struct mdbVar // The metadata var=val construct. This is contained by mdbObj { @@ -333,34 +367,34 @@ // ----------------- Validateion and specialty APIs ----------------- int mdbObjsValidate(struct mdbObj *mdbObjs, boolean full); // Validates vars and vals against cv.ra. Returns count of errors found. // Full considers vars not defined in cv as invalids struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName,struct mdbObj **pMdbObjs, int warn,boolean createExpIfNecessary); // Organizes objects into experiments and validates experiment IDs. Will add/update the ids in the structures. // If warn=1, then prints to stdout all the experiments/obs with missing or wrong expIds; // warn=2, then print line for each obj with expId or warning. // createExpIfNecessary means go ahead and add to the hgFixed.encodeExp table to get an ID // Returns a new set of mdbObjs that is what can (and should) be used to update the mdb via mdbObjsSetToDb(). // -- Requested by Kate: -- -#define MDB_FIELD_LAB "lab" -#define MDB_FIELD_DATA_TYPE "dataType" -#define MDB_FIELD_CELL_TYPE "cell" -#define ENCODE_MDB_PROJECT "wgEncode" +#define MDB_FIELD_LAB MDB_VAR_LAB +#define MDB_FIELD_DATA_TYPE MDB_VAR_DATATYPE +#define MDB_FIELD_CELL_TYPE MDB_VAR_CELL +#define ENCODE_MDB_PROJECT MDB_VAL_ENCODE_PROJECT boolean mdbObjIsEncode(struct mdbObj *mdbObj); // Returns TRUE if MDB object is an ENCODE object (project=wgEncode) boolean mdbObjInComposite(struct mdbObj *mdb, char *composite); // Returns TRUE if metaDb object is in specified composite. // If composite is NULL, always return true // FIXME: KATE Why return true if composite not defined??? //struct encodeExp *encodeExps(char *composite,char *expTable); //struct mdbObjs *mdbObjsForDefinedExpId(int expId); // Returns the mdb objects belonging to a single encode experiment defined in the encodExp table // --------------- Free at last ---------------- void mdbObjsFree(struct mdbObj **mdbObjsPtr); @@ -392,30 +426,63 @@ // Search the metaDb table for objs by var and val. Can restrict by op "is" or "like" and accept (non-zero) limited string size // Search is via mysql, so it's case-insensitive. Return is sorted on obj. struct slName *mdbValSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files); // Search the metaDb table for vals by var. Can impose (non-zero) limit on returned string size of val // Search is via mysql, so it's case-insensitive. Return is sorted on val. struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags, boolean tables, boolean files); // Search the metaDb table for vals by var and returns val (as pair->name) and controlled vocabulary (cv) label // (if it exists) (as pair->val). Can impose (non-zero) limit on returned string size of name. // if requested, return cv tag instead of mdb val. If requested, limit to table objs or file objs // Return is case insensitive sorted on label (cv label or else val). #define mdbPairVal(pair) (pair)->name #define mdbPairLabel(pair) (pair)->val +// ------------ CONTROLLED VOCABULARY APIs -------------- + +#define CV_FILE_NAME "cv.ra" + +// CV Common settings +#define CV_TERM "term" +#define CV_TYPE "type" +#define CV_LABEL "label" +#define CV_TAG "tag" + +// Type of Terms defines +#define CV_TOT "typeOfTerm" +#define CV_TOT_HIDDEN "hidden" +#define CV_TOT_CV_DEFINED "cvDefined" + +// Validation Rules +#define CV_VALIDATE "validate" +#define CV_VALIDATE_CV "cv" +#define CV_VALIDATE_CV_OR_NONE "cv or None" +#define CV_VALIDATE_CV_OR_CONTROL "cv or control" +#define CV_VALIDATE_DATE "date" +#define CV_VALIDATE_EXISTS "exists" +#define CV_VALIDATE_FLOAT "float" +#define CV_VALIDATE_INT "integer" +#define CV_VALIDATE_LIST "list:" +#define CV_VALIDATE_REGEX "regex:" +#define CV_VALIDATE_NONE "none" + +// CV TERMS (NOTE: UGLY Terms in cv.ra are hidden inside cv.c APIS) +#define CV_TERM_CELL MDB_VAR_CELL +#define CV_TERM_ANTIBODY MDB_VAR_ANTIBODY +#define CV_TERM_CONTROL "control" + const struct hash *mdbCvTermHash(char *term); // returns a hash of hashes of a term which should be defined in cv.ra // NOTE: in static memory: DO NOT FREE const struct hash *mdbCvTermTypeHash(); // returns a hash of hashes of mdb and controlled vocabulary (cv) term types // Those terms should contain label,descrition,searchable,cvDefined,hidden // NOTE: in static memory: DO NOT FREE struct slPair *mdbCvWhiteList(boolean searchTracks, boolean cvLinks); // returns the official mdb/controlled vocabulary terms that have been whitelisted for certain uses. enum mdbCvSearchable // metadata Variavble are only certain declared types {