df4ddaf7bd0cb5daad0a3e12346355a60951dfc6 tdreszer Thu Mar 3 15:26:48 2011 -0800 Removed unused 'varType'. Changed longblob to varchar(2084). Changed the recreate load object to print to a temp file and use mysql 'LOAD DATA' command. Made secondary key non-unique and use disable/enable keys to speed up load. diff --git src/hg/inc/mdb.h src/hg/inc/mdb.h index 9ffdc12..5766e2f 100644 --- src/hg/inc/mdb.h +++ src/hg/inc/mdb.h @@ -2,31 +2,30 @@ * generated mdb.c and mdb.sql. This header links the database and * the RAM representation of objects. */ #ifndef MDB_H #define MDB_H #include "jksql.h" #define MDB_NUM_COLS 4 struct mdb /* This contains metadata for a table, file or other predeclared object type. */ { struct mdb *next; /* Next in singly linked list. */ char *obj; /* Object name or ID. */ char *var; /* Metadata variable name. */ - char *varType; /* txt | binary */ char *val; /* Metadata value. */ }; void mdbStaticLoad(char **row, struct mdb *ret); /* Load a row from mdb table into ret. The contents of ret will * be replaced at the next call to this function. */ struct mdb *mdbLoadByQuery(struct sqlConnection *conn, char *query); /* Load all mdb from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with mdbFreeList(). */ void mdbSaveToDb(struct sqlConnection *conn, struct mdb *el, char *tableName, int updateSize); @@ -93,44 +92,35 @@ #define MDB_DEFAULT_NAME "metaDb" // The mdb holds metadata primarily for tables. // Many types of objects could be supported, though currently files are the only other type. // It is easy to imagine using the current mdb to support hierarchical trees of metadata. // For example a composite type object called "myComposte" could have metadata that is valid for // all tables that have the var=composite val=myComposte metadata defined. // // There are 2 ways to look at the metadata: By Obj: obj->[var=val] and By Var: var->[val->[obj]]. // By Obj: an object has many var/val pairs but only one val for each unique var. Querying by // object creates a single (2 level) one to many structure. // By Var: a variable has many possible values and each value may be defined for more than one object. // Therefore, querying by var results in a (3 level) one to many to many structure. -enum mdbVarType -// metadata Variavble are only certain declared types - { - vtTxt =0, // Txt is default - vtBinary =1, // Could support binary blobs - vtUnknown =99 // Not determined. - }; - struct mdbVar // The metadata var=val construct. This is contained by mdbObj { struct mdbVar* next; // Next in singly linked list of variables char *var; // Metadata variable name. - enum mdbVarType varType; // txt | binary char *val; // Metadata value. }; struct mdbObj // The standard container of a single object's metadata. // Also: when searching metadata obj->var->val this is the top struct. { struct mdbObj* next; // Next in singly linked list of objects char *obj; // Object name or ID boolean deleteThis; // Used when loading formatted file which may contain delete requests struct mdbVar* vars; // if NOT NULL: list of variables belonging to this object struct hash* varHash; // if NOT NULL: variables are also hashed! (var str to mdbVar struct) }; struct mdbLeafObj @@ -142,59 +132,51 @@ struct mdbLimbVal // When searching var->val->obj this is the mid-level val->obj struct. { struct mdbLimbVal* next; // Next in singly linked list of variables char *val; // Metadata value. struct mdbLeafObj* objs; // if NOT NULL: list of Objects which have this variable struct hash* objHash; // if NOT NULL: hash of objects (val str to leafObj struct) }; struct mdbByVar // When searching metadata var->val->object this is the top struct { struct mdbByVar* next; // Next in singly linked list of variables char *var; // Metadata variable name. - enum mdbVarType varType; // txt | binary boolean notEqual; // For querying only struct mdbLimbVal* vals; // list of values associated with this var struct hash* valHash; // if NOT NULL: hash of vals (val str to limbVal struct) }; -// -------------- Enum to Strings -------------- -enum mdbVarType mdbVarTypeStringToEnum(char *varType); -// Convert metadata varType string to enum - -char *mdbVarTypeEnumToString(enum mdbVarType varType); -// Convert metadata varType enum string - // ------ Parsing lines ------ struct mdbObj *metadataLineParse(char *line); /* Parses a single formatted metadata line into mdbObj for updates or queries. */ struct mdbByVar *mdbByVarsLineParse(char *line); /* Parses a line of "var1=val1 var2=val2 into a mdbByVar object for queries. */ // ------ Loading from args, hashes ------ -struct mdbObj *mdbObjCreate(char *obj,char *var, char *varType,char *val); +struct mdbObj *mdbObjCreate(char *obj,char *var, char *val); /* Creates a singular mdbObj query object based on obj and all other optional params. */ -struct mdbByVar *mdbByVarCreate(char *var, char *varType,char *val); +struct mdbByVar *mdbByVarCreate(char *var, char *val); /* Creates a singular var=val pair struct for metadata queries. */ -boolean mdbByVarAppend(struct mdbByVar *mdbByVars,char *var, char *varType,char *val,boolean notEqual); +boolean mdbByVarAppend(struct mdbByVar *mdbByVars,char *var, char *val,boolean notEqual); /* Adds a another var to a list of mdbByVar pairs to be used in metadata queries. */ struct mdbObj *mdbObjsLoadFromHashes(struct hash *objsHash); // Load all mdbObjs from a file containing metadata formatted lines // ------ Loading from files ------ struct mdbObj *mdbObjsLoadFromFormattedFile(char *fileName,boolean *validated); // Load all mdbObjs from a file containing metadata formatted lines // If requested, will determine if a magic number at the end of the file matches contents struct mdbObj *mdbObjsLoadFromRAFile(char *fileName,boolean *validated); // Load all mdbObjs from a file containing RA formatted 'metaObjects' // If requested, will determine if a magic number at the end of the file matches contents @@ -235,30 +217,33 @@ struct mdbObj *mdbObjsQueryByVars(struct sqlConnection *conn,char *table,struct mdbByVar *mdbByVars); // Query the metadata table by one or more var=val pairs to find the distinct set of objs that satisfy ALL conditions. // Returns new mdbObj struct fully populated and sorted in obj,var order. // ----------- Printing and Counting ----------- void mdbObjPrint(struct mdbObj *mdbObjs,boolean raStyle); // prints objs and var=val pairs as formatted metadata lines or ra style void mdbObjPrintToFile(struct mdbObj *mdbObjs,boolean raStyle, char *file); // prints (to file) objs and var=val pairs as formatted metadata lines or ra style void mdbObjPrintToStream(struct mdbObj *mdbObjs,boolean raStyle, FILE *outF); // prints (to stream) objs and var=val pairs as formatted metadata lines or ra style +int mdbObjPrintToTabFile(struct mdbObj *mdbObjs, char *file); +// prints all objs as tab delimited obj var val into file for SQL LOAD DATA. Returns count. + char *mdbObjVarValPairsAsLine(struct mdbObj *mdbObj,boolean objTypeExclude); // returns NULL or a line for a single mdbObj as "var1=val1; var2=val2 ...". Must be freed. void mdbByVarPrint(struct mdbByVar *mdbByVars,boolean raStyle); // prints var=val pairs and objs that go with them single lines or ra style int mdbObjCount(struct mdbObj *mdbObjs, boolean objs); // returns the count of vars belonging to this obj or objs; int mdbByVarCount(struct mdbByVar *mdbByVars,boolean vars, boolean vals); // returns the count of objs belonging to this set of vars; // ----------------- Utilities ----------------- struct mdbVar *mdbObjFind(struct mdbObj *mdbObj, char *var); // Finds the val associated with the var or retruns NULL @@ -313,31 +298,31 @@ // Returns matched or unmatched items objects as requested, maintaining sort order struct mdbObj *mdbObjsFilterByVars(struct mdbObj **pMdbObjs,char *vars,boolean noneEqualsNotFound,boolean returnMatches); // Filters mdb objects to only those that include/exclude var=val pairs (e.g. "var1=val1 var2 var3!=val3 var4=None"). // Supports != ("var!=" means var not found). Optionally supports var=None equal to var is not found // Returns matched or unmatched items objects as requested. Multiple passes means sort order is destroyed. struct mdbObj *mdbObjsFilterTablesOrFiles(struct mdbObj **pMdbObjs,boolean table, boolean files); // Filters mdb objects to only those that have associated tables or files. Returns removed non-table/file objects // Note: Since table/file objects overlap, there are 3 possibilites: tables, files, table && files struct mdbObj *mdbObjIntersection(struct mdbObj **a, struct mdbObj *b); // return duplicate objs from an intersection of two mdbObj lists. // List b is untouched but pA will contain the resulting intersection -void mdbObjTransformToUpdate(struct mdbObj *mdbObjs, char *var, char *varType,char *val,boolean deleteThis); +void mdbObjTransformToUpdate(struct mdbObj *mdbObjs, char *var, char *val,boolean deleteThis); // Turns one or more mdbObjs into the stucture needed to add/update or delete. struct mdbObj *mdbObjClone(const struct mdbObj *mdbObj); // Clones a single mdbObj, including hash and maintining order struct slName *mdbObjToSlName(struct mdbObj *mdbObjs); // Creates slNames list of mdbObjs->obj. mdbObjs remains untouched int mdbVarCmp(const void *va, const void *vb); /* Compare to sort on label. */ // ----------------- Validateion and specialty APIs ----------------- int mdbObjsValidate(struct mdbObj *mdbObjs, boolean full); // Validates vars and vals against cv.ra. Returns count of errors found.