5dcfe2b1322ee5d4c989006b6161fcdac490580e tdreszer Fri Jan 28 16:13:11 2011 -0800 Added -validate option to mdbPrint, which will validate mdb vars against cv.ra. diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 6baeb4f..693ceda 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -1,27 +1,28 @@ /* mdb.c was originally generated by the autoSql program, which also * generated mdb.h and mdb.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "hdb.h" #include "cheapcgi.h" #include "hui.h" #include "mdb.h" +#include <regex.h> static char const rcsid[] = "$Id: mdb.c,v 1.8 2010/06/11 17:11:28 tdreszer Exp $"; void mdbStaticLoad(char **row, struct mdb *ret) /* Load a row from mdb table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->obj = row[0]; ret->var = row[1]; ret->varType = row[2]; ret->val = row[3]; } struct mdb *mdbLoadByQuery(struct sqlConnection *conn, char *query) @@ -2446,31 +2447,35 @@ dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", tableName,tables?"table":"file"); dyStringAppend(dyQuery," order by val"); retVal = sqlQuickList(conn, dyStringCannibalize(&dyQuery)); slNameSortCase(&retVal); return retVal; } // TODO: decide to make this public or hide it away inside the one function so far that uses it. static struct hash *cvHash = NULL; static char *cv_file() // return default location of cv.ra { static char filePath[PATH_LEN]; -safef(filePath, sizeof(filePath), "%s/encode/cv.ra", hCgiRoot()); +char *root = hCgiRoot(); +if (root == NULL || *root == 0) + root = "/usr/local/apache/cgi-bin/"; // Make this check out sandboxes? +// root = "/cluster/home/tdreszer/kent/src/hg/makeDb/trackDb/cv/alpha/"; // Make this check out sandboxes? +safef(filePath, sizeof(filePath), "%s/encode/cv.ra", root); if(!fileExists(filePath)) errAbort("Error: can't locate cv.ra; %s doesn't exist\n", filePath); return filePath; } struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tables, boolean files) // Search the metaDb table for vals by var and returns controlled vocabulary (cv) label // (if it exists) and val as a pair. Can impose (non-zero) limit on returned string size of name. // Return is case insensitive sorted on name (label or else val). { // TODO: Change this to use normal mdb struct routines? if (!tables && !files) errAbort("mdbValSearch requests values for neither table nor file objects.\n"); char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first @@ -2516,34 +2521,58 @@ if (label != NULL) { freeMem(pair->name); // Allocated when pair was created pair->name = strSwapChar(cloneString(label),'_',' '); // vestigial _ meaning space if (limit > 0 && strlen(pair->name) > limit) pair->name[limit] = '\0'; } } slAddHead(&pairs, pair); } sqlFreeResult(&sr); slPairSortCase(&pairs); return pairs; } +struct hash *mdbCvTermHash(char *term) +// returns a hash of hashes of a term which should be defined in cv.ra +{ +static struct hash *cvHashOfHashOfHashes = NULL; +if (sameString(term,"cell")) + term = "Cell Line"; +else if (sameString(term,"antibody")) + term = "Antibody"; + +if (cvHashOfHashOfHashes == NULL) + cvHashOfHashOfHashes = hashNew(0); + +struct hash *cvTermHash = hashFindVal(cvHashOfHashOfHashes,term); +// Establish cv hash of Term Types if it doesn't already exist +if (cvTermHash == NULL) + { + cvTermHash = raReadWithFilter(cv_file(), "term","type",term); + if (cvTermHash != NULL) + hashAdd(cvHashOfHashOfHashes,term,cvTermHash); + } + +return cvTermHash; +} + struct hash *mdbCvTermTypeHash() // returns a hash of hashes of mdb and controlled vocabulary (cv) term types // Those terms should contain label,description,searchable,cvDefined,hidden -{ +{ // NOTE: "typeOfTerm" is specialized, so don't use mdbCvTermHash static struct hash *cvHashOfTermTypes = NULL; // Establish cv hash of Term Types if it doesn't already exist if (cvHashOfTermTypes == NULL) { cvHashOfTermTypes = raReadWithFilter(cv_file(), "term","type","typeOfTerm"); // Patch up an ugly inconsistency with 'cell' struct hash *cellHash = hashRemove(cvHashOfTermTypes,"cellType"); if (cellHash) { hashAdd(cvHashOfTermTypes,"cell",cellHash); hashReplace(cellHash, "term", cloneString("cell")); // spilling memory of 'cellType' val } struct hash *abHash = hashRemove(cvHashOfTermTypes,"Antibody"); if (abHash) @@ -2624,15 +2653,179 @@ const char *cvLabel(char *term) // returns cv label if term found or else just term { // Get the list of term types from thew cv struct hash *termTypeHash = mdbCvTermTypeHash(); struct hash *termHash = hashFindVal(termTypeHash,term); if (termHash != NULL) { char *label = hashFindVal(termHash,"label"); if (label != NULL) return label; } return term; } + +int mdbObjsValidate(struct mdbObj *mdbObjs) +// Validates vars and vals against cv.ra. Returns count of errors found +{ +struct hash *termTypeHash = mdbCvTermTypeHash(); +struct mdbObj *mdbObj = NULL; +int invalids = 0; +for( mdbObj=mdbObjs; mdbObj!=NULL; mdbObj=mdbObj->next ) + { + struct mdbVar *mdbVar = NULL; + for(mdbVar = mdbObj->vars;mdbVar != NULL;mdbVar=mdbVar->next) + { + struct hash *termHash = hashFindVal(termTypeHash,mdbVar->var); + if (termHash == NULL) // No cv definition for term so no validation can be done + continue; + char *validationRule = hashFindVal(termHash,"validate"); + if (validationRule == NULL) + { + verbose(1,"ERROR in cv.ra: Term %s in typeOfTerms but has no 'validate' setting.\n",mdbVar->var); + continue; // Should we errAbort? + } + + // NOTE: Working on memory in hash but we are throwing away a comment and removing trailing spaces so that is okay + strSwapChar(validationRule,'#','\0'); // Chop off any comment in the setting + validationRule = trimSpaces(validationRule); + + // Validate should be or start with known word + if (startsWithWord("cv",validationRule)) + { + if (SETTING_NOT_ON(hashFindVal(termHash,"cvDefined"))) // Known type of term but no validation to be done + { + verbose(1,"ERROR in cv.ra: Term %s says validate in cv but is not 'cvDefined'.\n",mdbVar->var); + continue; + } + + // cvDefined so every val should be in cv + struct hash *cvTermHash = mdbCvTermHash(mdbVar->var); + if (cvTermHash == NULL) + { + verbose(1,"ERROR in cv.ra: Term %s says validate in cv but not found as a cv term.\n",mdbVar->var); + continue; + } + if (hashFindVal(cvTermHash,mdbVar->val) == NULL) // No cv definition for term so no validation can be done + { + char * orControl = skipBeyondDelimit(validationRule,' '); + if (orControl && sameString(orControl,"or None") && sameString(mdbVar->val,"None")) + continue; + else if (orControl && sameString(orControl,"or control")) + { + cvTermHash = mdbCvTermHash("control"); + if (cvTermHash == NULL) + { + verbose(1,"ERROR in cv.ra: Term control says validate in cv but not found as a cv term.\n"); + continue; + } + if (hashFindVal(cvTermHash,mdbVar->val) != NULL) + continue; + } + printf("INVALID cv lookup: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + } + else if (startsWithWord("date",validationRule)) + { + if (dateToSeconds(mdbVar->val,"%F") == 0) + { + printf("INVALID date: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + } + else if (startsWithWord("exists",validationRule)) + continue; // (e.g. fileName exists) Nothing to be done at this time. + else if (startsWithWord("float",validationRule)) + { + char* end; + (void)strtod(mdbVar->val, &end); // Don't want float, just error + + if ((end == mdbVar->val) || (*end != '\0')) + { + printf("INVALID float: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + } + else if (startsWithWord("integer",validationRule)) + { + char *p0 = mdbVar->val; + if (*p0 == '-') + p0++; + char *p = p0; + while ((*p >= '0') && (*p <= '9')) + p++; + if ((*p != '\0') || (p == p0)) + { + printf("INVALID integer: %s -> %s = %s\n",mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + } + else if (startsWithWord("list:",validationRule)) + { + validationRule = skipBeyondDelimit(validationRule,' '); + if (validationRule == NULL) + { + verbose(1,"ERROR in cv.ra: Invalid 'list:' for %s.\n",mdbVar->var); + continue; + } + int count = chopByChar(validationRule, ',', NULL, 0); + if (count == 1) + { + if (differentString(mdbVar->val,validationRule)) + { + printf("INVALID list '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + } + else if (count > 1) + { + char **array = needMem(count*sizeof(char*)); + chopByChar(cloneString(validationRule), ',', array, count); // Want to also trimSpaces()? No + + if (stringArrayIx(mdbVar->val, array, count) == -1) + { + printf("INVALID list '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + } + else + verbose(1,"ERROR in cv.ra: Invalid 'validate list: %s' for term %s,\n",validationRule,mdbVar->var); + } + else if (startsWithWord("none",validationRule)) + continue; + else if (startsWithWord("regex:",validationRule)) + { + validationRule = skipBeyondDelimit(validationRule,' '); + if (validationRule == NULL) + { + verbose(1,"ERROR in cv.ra: Invalid 'regex:' for %s.\n",mdbVar->var); + continue; + } + // Real work ahead interpreting regex + regex_t regEx; + int err = regcomp(®Ex, validationRule, REG_NOSUB); + if(err != 0) // Compile the regular expression so that it can be used. Use: REG_EXTENDED ? + { + char buffer[128]; + regerror(err, ®Ex, buffer, sizeof buffer); + verbose(1,"ERROR in cv.ra: Invalid regular expression for %s - %s. %s\n",mdbVar->var,validationRule,buffer); + continue; + } + err = regexec(®Ex, mdbVar->val, 0, NULL, 0); + if (err != 0) + { + //char buffer[128]; + //regerror(err, ®Ex, buffer, sizeof buffer); + printf("INVALID regex '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); + invalids++; + } + regfree(®Ex); + } + else + verbose(1,"ERROR in cv.ra: Unknown validationRule rule '%s' for term %s.\n",validationRule,mdbVar->var); + } + } +return invalids; +}