cadb863ddfbcc1f97b04f0d0b8ef73db862dd060 tdreszer Thu Jul 28 16:32:04 2011 -0700 Fixed some hash sizes, moved some validation code from mdb.c to cv.c, added numeric sort of hgFileUi filerBy and track/file search of terms defined in cv validation as integer. diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index f335e46..b696ecf 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -1,28 +1,27 @@ /* mdb.c was originally generated by the autoSql program, which also * generated mdb.h and mdb.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "hdb.h" #include "cv.h" #include "mdb.h" #include "encode/encodeExp.h" -#include <regex.h> static char const rcsid[] = "$Id: mdb.c,v 1.8 2010/06/11 17:11:28 tdreszer Exp $"; void mdbStaticLoad(char **row, struct mdb *ret) /* Load a row from mdb table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->obj = row[0]; ret->var = row[1]; ret->val = row[2]; } struct mdb *mdbLoadByQuery(struct sqlConnection *conn, char *query) /* Load all mdb from table that satisfy the query given. @@ -283,31 +282,31 @@ struct mdbObj *mdbObj = NULL; struct mdbObj *mdbObjs = NULL; struct mdbVar *mdbVar; struct mdb *thisRow; while((thisRow = slPopHead(mdbPtr)) != NULL) { if (mdbObj == NULL || differentString(thisRow->obj,mdbObj->obj) ) { // Finish last object before starting next! if(mdbObj!= NULL) slReverse(&(mdbObjs->vars)); // Start new object AllocVar(mdbObj); mdbObj->obj = thisRow->obj; if ( buildHashes ) - mdbObj->varHash = hashNew(0); + mdbObj->varHash = hashNew(8); slAddHead(&mdbObjs,mdbObj); } else { freeMem(thisRow->obj); // Already got this from prev row } AllocVar(mdbVar); mdbVar->var = thisRow->var; mdbVar->val = thisRow->val; slAddHead(&(mdbObj->vars),mdbVar); if ( buildHashes ) hashAddUnique(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type freeMem(thisRow); @@ -333,51 +332,51 @@ while((thisRow = slPopHead(mdbPtr)) != NULL) { // Start at root if (rootVar == NULL || differentString(thisRow->var,rootVar->var) ) { // Finish last var before starting next! if(rootVars && rootVars->vals && rootVars->vals->objs) slReverse(&(rootVars->vals->objs)); if(rootVars && rootVars->vals) slReverse(&(rootVars->vals)); // Start new var AllocVar(rootVar); limbVal = NULL; // Very important! rootVar->var = thisRow->var; if ( buildHashes ) - rootVar->valHash = hashNew(0); + rootVar->valHash = hashNew(8); slAddHead(&rootVars,rootVar); } else { freeMem(thisRow->var); // Already got this from prev row } // Continue with limb if (limbVal == NULL || differentString(thisRow->val,limbVal->val) ) { // Finish last val before starting next! if(limbVal != NULL && limbVal->objs != NULL) slReverse(&(limbVal->objs)); // Start new val AllocVar(limbVal); limbVal->val = thisRow->val; if ( buildHashes ) { hashAddUnique(rootVar->valHash, limbVal->val, limbVal); // Pointer to struct to get to objHash - limbVal->objHash = hashNew(0); + limbVal->objHash = hashNew(10); } slAddHead(&(rootVar->vals),limbVal); } else freeMem(thisRow->val); // Already got this from prev row // End with leaf AllocVar(leafObj); leafObj->obj = thisRow->obj; if ( buildHashes ) hashAddUnique(limbVal->objHash, leafObj->obj, leafObj); // Pointer to struct to resolve type! slAddHead(&(limbVal->objs),leafObj); freeMem(thisRow); } @@ -446,31 +445,31 @@ // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneVars,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneVars,words,count); if(count < 1 || words[0] == NULL) { errAbort("This is not formatted var=val pairs:\n\t%s\n",varPairs); } verbose(3, "mdbObjAddVarPairs() word count:%d\n\t%s\n",count,varPairs); if(mdbObj == NULL) AllocVar(mdbObj); if(mdbObj->varHash == NULL) - mdbObj->varHash = hashNew(0); + mdbObj->varHash = hashNew(8); int ix; for(ix = 0;ix<count;ix++) { if(*words[ix] == '#') break; if(strchr(words[ix], '=') == NULL) errAbort("This is not formatted var=val pairs: '%s'\n\t%s\n",words[ix],varPairs); AllocVar(mdbVar); mdbVar->var = cloneNextWordByDelimiter(&(words[ix]),'='); mdbVar->val = cloneString(words[ix]); verbose(3, "mdbObjAddVarPairs() var=val: %s=%s\n",mdbVar->var,mdbVar->val); struct mdbVar *oldVar = (struct mdbVar *)hashFindVal(mdbObj->varHash, mdbVar->var); if(oldVar) @@ -606,54 +605,52 @@ if(varPairs != NULL && strlen(varPairs) > 0 && *varPairs != '#') mdbObj = mdbObjAddVarPairs(mdbObj,varPairs); else if(mdbObj->deleteThis == FALSE) errAbort("This is not a formatted metadata line:\n\t%s\n",fromTheTop); return mdbObj; } struct mdbByVar *mdbByVarsLineParse(char *line) /* Parses a line of "var1=val1 var2=val2 into a mdbByVar object for queries. */ { int thisWord = 0; struct mdbByVar *mdbByVars = NULL; struct mdbByVar *rootVar = NULL; struct mdbLimbVal *limbVal = NULL; char *cloneLine = cloneString(line); -struct hash* varHash; // There must not be multiple occurrances of the same var // initial chop and determine if this looks like metadata int count = chopByWhiteRespectDoubleQuotes(cloneLine,NULL,0); char **words = needMem(sizeof(char *) * count); count = chopByWhiteRespectDoubleQuotes(cloneLine,words,count); verbose(3, "mdbByVarsLineParse() word count:%d\n\t%s\n",count,line); - // Get obj and figure out if this is a delete line - varHash = hashNew(0); // All words are expected to be var=val pairs! for (thisWord=0; thisWord<count; thisWord++) { if (strchr(words[thisWord], '=') == NULL) errAbort("Expected '%s=%s' but found '%s'. This is not properly formatted metadata:\n\t%s\n",MDB_VAR,MDB_VAL,words[thisWord],line); // Set up var struct from 1st half of pair + // NOTE: Do not try to combine repeated vars because "fob=a fob=b" is 'AND' while "fob=a,b" is 'OR'. + // Does this make sense? Yes: select * ... where fob like 'Fr%' and fob != 'Frunk' AllocVar(rootVar); rootVar->var = cloneNextWordByDelimiter(&(words[thisWord]),'='); rootVar->notEqual = (rootVar->var[strlen(rootVar->var)-1] == '!'); // requested not equal if (rootVar->notEqual) rootVar->var[strlen(rootVar->var)-1] = 0; - // Do not try to combine repeated vars because "foo=a foo=b" is 'AND' while "foo=a,b" is 'OR'. // Fill in the val(s) from second half of pair char *val = NULL; if (words[thisWord][0] != '\0' && words[thisWord][0] != '?') // "var=?" or "var=" will query by var name only val = cloneString(words[thisWord]); if (val != NULL) { // Strip any single or double quotes first. char *end = val + strlen(val) - 1; if ((*val == '"' && *end == '"') || (*val == '\'' && *end == '\'')) { *end = '\0'; val++; } @@ -663,31 +660,30 @@ char * aVal = NULL; while((aVal = cloneNextWordByDelimiter(&val,',')) != NULL) { AllocVar(limbVal); limbVal->val = aVal; slAddTail(&rootVar->vals,limbVal); } } else { AllocVar(limbVal); limbVal->val = val; rootVar->vals = limbVal; } } - hashAdd(varHash, rootVar->var, rootVar); slAddHead(&mdbByVars,rootVar); } freeMem(words); slReverse(&mdbByVars); verbose(3, "mdbByVarsLineParse() parsed:%d first: %s%s='%s'.\n", slCount(mdbByVars),mdbByVars->var,(mdbByVars->notEqual?"!":""),(mdbByVars->vals?mdbByVars->vals->val:"")); return mdbByVars; } // ------ Loading from args, hashes and tdb ------ struct mdbByVar*mdbByVarCreate(char *var, char *val) /* Creates a singular var=val pair struct for metadata queries. */ { struct mdbByVar *mdbByVar = NULL; @@ -770,53 +766,53 @@ // An mdbObj requires and obj, so if one is not supplied it will be "[unknown]" { struct mdbObj *mdbObj = NULL; if (obj == NULL) errAbort("Need obj to create mdbObj object.\n"); if (mdbVars == NULL) { AllocVar(mdbObj); mdbObj->obj = cloneString(obj); return mdbObj; } else { mdbObj = mdbObjCreate(obj,mdbVars->var,mdbVars->val); - mdbObj->varHash = hashNew(0); + mdbObj->varHash = hashNew(8); hashAddUnique(mdbObj->varHash, mdbVars->var, mdbObj->vars); // pointer to struct to resolve type struct mdbVar *var = mdbVars->next; for(;var != NULL;var = var->next); mdbObjSetVar(mdbObj, var->var,var->val); } return mdbObj; } struct mdbObj *mdbObjsLoadFromHashes(struct hash *objsHash) // Load all mdbObjs from a file containing metadata formatted lines { struct mdbObj *mdbObjs = NULL; struct hashEl* objEl = NULL; struct hashCookie objCookie = hashFirst(objsHash); while((objEl = hashNext(&objCookie)) != NULL) { struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(objEl->name); - mdbObj->varHash = hashNew(0); + mdbObj->varHash = hashNew(8); struct hash *hashedVars = objEl->val; struct hashCookie varCookie = hashFirst(hashedVars); struct hashEl* varEl = NULL; while((varEl = hashNext(&varCookie)) != NULL) { if(sameString(varEl->name,MDB_METAOBJ_RAKEY)) continue; struct mdbVar * mdbVar; AllocVar(mdbVar); mdbVar->var = cloneString(varEl->name); mdbVar->val = cloneString(varEl->val); hashAdd(mdbObj->varHash, mdbVar->var, mdbVar); // pointer to struct to resolve type slAddHead(&(mdbObj->vars),mdbVar); } @@ -2382,31 +2378,31 @@ struct mdbObj *mdbObjClone(const struct mdbObj *mdbObj) // Clones a single mdbObj, including hash and maintining order { if(mdbObj == NULL) return NULL; struct mdbObj *newObj; AllocVar(newObj); if(mdbObj->obj != NULL) newObj->obj = cloneString(mdbObj->obj); newObj->deleteThis = mdbObj->deleteThis; if(mdbObj->vars != NULL) { if(mdbObj->varHash != NULL) - newObj->varHash = hashNew(0); + newObj->varHash = hashNew(8); struct mdbVar *mdbVar = NULL; for(mdbVar = mdbObj->vars; mdbVar != NULL; mdbVar = mdbVar->next ) { struct mdbVar *newVar = NULL; AllocVar(newVar); if(mdbVar->var != NULL) newVar->var = cloneString(mdbVar->var); if(mdbVar->val != NULL) newVar->val = cloneString(mdbVar->val); if(newVar->var != NULL && newVar->val != NULL) hashAdd(newObj->varHash, newVar->var, newVar); // pointer to struct to resolve type slAddHead(&(newObj->vars),newVar); } slReverse(&(newObj->vars)); @@ -2458,178 +2454,45 @@ for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) { struct mdbVar *mdbVar = NULL; for(mdbVar = mdbObj->vars;mdbVar != NULL;mdbVar=mdbVar->next) { struct hash *termHash = hashFindVal(termTypeHash,mdbVar->var); if (termHash == NULL) // No cv definition for term so no validation can be done { if (!full) continue; if (sameString(mdbVar->var,MDB_OBJ_TYPE) && ( sameString(mdbVar->val,MDB_OBJ_TYPE_TABLE) || sameString(mdbVar->val,MDB_OBJ_TYPE_FILE) || sameString(mdbVar->val,MDB_OBJ_TYPE_COMPOSITE))) continue; - printf("INVALID '%s' not defined in %s: %s -> %s = %s\n",mdbVar->var,CV_FILE_NAME,mdbObj->obj,mdbVar->var,mdbVar->val); + printf("INVALID %s '%s' not defined in %s: %s = %s in %s: %s\n",CV_TERM, + mdbVar->var,CV_FILE_NAME,mdbVar->var,mdbVar->val,MDB_OBJ,mdbObj->obj); invalids++; continue; } - char *validationRule = hashFindVal(termHash,CV_VALIDATE); - if (validationRule == NULL) + char reason[256]; + boolean valid = cvValidateTerm(mdbVar->var,mdbVar->val,reason,sizeof(reason)); + if (!valid) { - verbose(1,"ERROR in %s: Term '%s' in typeOfTerms but has no '%s' setting.\n",CV_FILE_NAME,mdbVar->var,CV_VALIDATE); - continue; // Should we errAbort? - } - - // NOTE: Working on memory in hash but we are throwing away a comment and removing trailing spaces so that is okay - strSwapChar(validationRule,'#','\0'); // Chop off any comment in the setting - validationRule = trimSpaces(validationRule); - - // Validate should be or start with known word - if (startsWithWord(CV_VALIDATE_CV,validationRule)) - { - if (SETTING_NOT_ON(hashFindVal(termHash,CV_TOT_CV_DEFINED))) // Known type of term but no validation to be done - { - verbose(1,"ERROR in %s: Term '%s' says validate in cv but is not '%s'.\n",CV_FILE_NAME,mdbVar->var,CV_TOT_CV_DEFINED); - continue; - } - - // cvDefined so every val should be in cv - struct hash *cvHashForTerm = (struct hash *)cvTermHash(mdbVar->var); - if (cvHashForTerm == NULL) - { - verbose(1,"ERROR in %s: Term '%s' says validate in cv but not found as a cv term.\n",CV_FILE_NAME,mdbVar->var); - continue; - } - if (hashFindVal(cvHashForTerm,mdbVar->val) == NULL) // No cv definition for term so no validation can be done - { - if (sameString(validationRule,CV_VALIDATE_CV_OR_NONE) && sameString(mdbVar->val,MDB_VAL_ENCODE_EDV_NONE)) - continue; - else if (sameString(validationRule,CV_VALIDATE_CV_OR_CONTROL)) - { - cvHashForTerm = (struct hash *)cvTermHash(CV_TERM_CONTROL); - if (cvHashForTerm == NULL) - { - verbose(1,"ERROR in %s: Term '%s' says validate in cv but not found as a cv term.\n",CV_FILE_NAME,CV_TERM_CONTROL); - continue; - } - if (hashFindVal(cvHashForTerm,mdbVar->val) != NULL) - continue; - } - printf("INVALID cv lookup: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); - invalids++; - } - } - else if (startsWithWord(CV_VALIDATE_DATE,validationRule)) - { - if (dateToSeconds(mdbVar->val,"%F") == 0) - { - printf("INVALID date: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); - invalids++; - } - } - else if (startsWithWord(CV_VALIDATE_EXISTS,validationRule)) - continue; // (e.g. fileName exists) Nothing to be done at this time. - else if (startsWithWord(CV_VALIDATE_FLOAT,validationRule)) - { - char* end; - double notNeeded = strtod(mdbVar->val, &end); // Don't want float, just error (However, casting to void resulted in a comple error on Ubuntu Maveric and Lucid) - - if ((end == mdbVar->val) || (*end != '\0')) - { - printf("INVALID float: %s -> %s = %s (resulting double: %g)\n",mdbObj->obj,mdbVar->var,mdbVar->val,notNeeded); - invalids++; - } - } - else if (startsWithWord(CV_VALIDATE_INT,validationRule)) - { - char *p0 = mdbVar->val; - if (*p0 == '-') - p0++; - char *p = p0; - while ((*p >= '0') && (*p <= '9')) - p++; - if ((*p != '\0') || (p == p0)) - { - printf("INVALID integer: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); - invalids++; - } - } - else if (startsWithWord(CV_VALIDATE_LIST,validationRule)) - { - validationRule = skipBeyondDelimit(validationRule,' '); - if (validationRule == NULL) - { - verbose(1,"ERROR in %s: Invalid '%s' for %s.\n",CV_FILE_NAME,CV_VALIDATE_LIST,mdbVar->var); - continue; - } - int count = chopByChar(validationRule, ',', NULL, 0); - if (count == 1) - { - if (differentString(mdbVar->val,validationRule)) - { - printf("INVALID list '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); - invalids++; - } - } - else if (count > 1) - { - char **array = needMem(count*sizeof(char*)); - chopByChar(cloneString(validationRule), ',', array, count); // Want to also trimSpaces()? No - - if (stringArrayIx(mdbVar->val, array, count) == -1) - { - printf("INVALID list '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); - invalids++; - } - } + if (startsWith("ERROR in ",reason)) + verbose(1,"%s\n",reason); else - verbose(1,"ERROR in %s: Invalid 'validate list: %s' for term %s,\n",CV_FILE_NAME,validationRule,mdbVar->var); - } - else if (startsWithWord(CV_VALIDATE_NONE,validationRule)) - continue; - else if (startsWithWord(CV_VALIDATE_REGEX,validationRule)) - { - validationRule = skipBeyondDelimit(validationRule,' '); - if (validationRule == NULL) - { - verbose(1,"ERROR in %s: Invalid '%s' for %s.\n",CV_FILE_NAME,CV_VALIDATE_REGEX,mdbVar->var); - continue; - } - // Real work ahead interpreting regex - regex_t regEx; - int err = regcomp(®Ex, validationRule, REG_NOSUB); - if(err != 0) // Compile the regular expression so that it can be used. Use: REG_EXTENDED ? - { - char buffer[128]; - regerror(err, ®Ex, buffer, sizeof buffer); - verbose(1,"ERROR in %s: Invalid regular expression for %s - %s. %s\n",CV_FILE_NAME,mdbVar->var,validationRule,buffer); - continue; - } - err = regexec(®Ex, mdbVar->val, 0, NULL, 0); - if (err != 0) - { - //char buffer[128]; - //regerror(err, ®Ex, buffer, sizeof buffer); - printf("INVALID regex '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); + verbose(1,"%s in %s: %s\n",reason,MDB_OBJ,mdbObj->obj); invalids++; } - regfree(®Ex); - } - else - verbose(1,"ERROR in %s: Unknown validationRule rule '%s' for term %s.\n",CV_FILE_NAME,validationRule,mdbVar->var); } } return invalids; } static struct slName *mdbObjGetNamedEncodeEdvs(struct mdbObj *compObj) // returns NULL or the list of EDVs defined for this composite { char *edvs = mdbObjFindValue(compObj,MDB_VAR_ENCODE_EDVS); if (edvs == NULL) return NULL; edvs = cloneString(edvs); if (strchr( edvs,',') != NULL) // Tolerate delimit by commas strSwapChar(edvs,',',' '); @@ -3100,31 +2963,31 @@ #define MDB_NOT_FOUND ((struct mdbObj *)-666) #define METADATA_NOT_FOUND ((struct mdbObj *)-999) static struct mdbObj *metadataForTableFromTdb(struct trackDb *tdb) // Returns the metadata for a table from a tdb setting. { char *setting = trackDbSetting(tdb, MDB_METADATA_KEY); if(setting == NULL) return NULL; struct mdbObj *mdbObj; AllocVar(mdbObj); mdbObj->obj = cloneString(tdb->table?tdb->table:tdb->track); AllocVar(mdbObj->vars); mdbObj->vars->var = cloneString(MDB_OBJ_TYPE); mdbObj->vars->val = cloneString(MDB_OBJ_TYPE_TABLE); -mdbObj->varHash = hashNew(0); +mdbObj->varHash = hashNew(8); hashAdd(mdbObj->varHash, mdbObj->vars->var, mdbObj->vars); mdbObj = mdbObjAddVarPairs(mdbObj,setting); mdbObjRemoveVars(mdbObj,MDB_VAR_TABLENAME); // NOTE: Special hint that the tdb metadata is used since no mdb metadata is found return mdbObj; } const struct mdbObj *metadataForTable(char *db,struct trackDb *tdb,char *table) // Returns the metadata for a table. NEVER FREE THIS STRUCT! { struct mdbObj *mdbObj = NULL; // See of the mdbObj was already built if(tdb != NULL) { mdbObj = tdbExtrasMdb(tdb); @@ -3370,31 +3233,39 @@ label = cloneString(hashOptionalVal(valHash,CV_LABEL,row[0])); if (tags) { char *tag = hashFindVal(valHash,CV_TAG); if (tag != NULL) val = tag; } } } if (label == NULL); label = cloneString(row[0]); label = strSwapChar(label,'_',' '); // vestigial _ meaning space slPairAdd(&pairs,val,label); } sqlFreeResult(&sr); +if (slCount(pairs) > 0) + { + // should have a list sorted on the label + enum cvDataType eCvDataType = cvDataType(var); + if (eCvDataType == cvInteger) + slPairValAtoiSort(&pairs); + else slPairValSortCase(&pairs); + } return pairs; } struct slPair *mdbVarsSearchable(struct sqlConnection *conn, boolean hasTableName, boolean hasFileName) // returns a white list of mdb vars that actually exist in the current DB. // Searchable vars are only for table or file objects. Further restrict to vars associated with tableName, fileName or both. { if (!hasTableName && !hasFileName) errAbort("mdbVarsSearchable requests vals associated with neither table nor files.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first char letter = 'A'; struct slPair *cvApproved = cvWhiteList(TRUE,FALSE); struct slPair *relevant = NULL;