src/hg/lib/cv.c cadb863ddfbcc1f97b04f0d0b8ef73db862dd060

cadb863ddfbcc1f97b04f0d0b8ef73db862dd060
tdreszer
  Thu Jul 28 16:32:04 2011 -0700
Fixed some hash sizes, moved some validation code from mdb.c to cv.c, added numeric sort of hgFileUi filerBy and track/file search of terms defined in cv validation as integer.
diff --git src/hg/lib/cv.c src/hg/lib/cv.c
index d46c89b..b0ec8c5 100644
--- src/hg/lib/cv.c
+++ src/hg/lib/cv.c
@@ -1,20 +1,21 @@
 
 
 // cv.c stands for Controlled Vocabullary and this file contains the
 // library APIs for reading and making sense of the contents of cv.ra.
 
+#include <regex.h>
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "hdb.h"
 #include "ra.h"
 #include "hui.h"
 #include "mdb.h"
 
 // CV Defines that should not necessarily be public
 
 // CV UGLY TERMS (NOTE: These should be hiddne inside cv.c APIS and callers should use non-UGLY terms)
 #define CV_UGLY_TOT_CELLTYPE    "cellType"
 #define CV_UGLY_TERM_CELL_LINE  "Cell Line"
 #define CV_UGLY_TERM_ANTIBODY   "Antibody"
 
@@ -101,31 +102,31 @@
     errAbort("Error: can't locate %s; %s doesn't exist\n", CV_FILE_NAME, filePath);
 return filePath;
 }
 
 const struct hash *cvTermHash(const char *term)
 // returns a hash of hashes of a term which should be defined in cv.ra
 // NOTE: in static memory: DO NOT FREE
 {
 static struct hash *cvHashOfHashOfHashes = NULL;
 if (sameString(term,CV_TERM_CELL))
     term = CV_UGLY_TERM_CELL_LINE;
 else if (sameString(term,CV_TERM_ANTIBODY))
     term = CV_UGLY_TERM_ANTIBODY;
 
 if (cvHashOfHashOfHashes == NULL)
-    cvHashOfHashOfHashes = hashNew(4);
+    cvHashOfHashOfHashes = hashNew(9);
 
 struct hash *cvHashForTerm = hashFindVal(cvHashOfHashOfHashes,(char *)term);
 // Establish cv hash of Term Types if it doesn't already exist
 if (cvHashForTerm == NULL)
     {
     cvHashForTerm = raReadWithFilter((char *)cvFile(), CV_TERM,CV_TYPE,(char *)term);
     if (cvHashForTerm != NULL)
         hashAdd(cvHashOfHashOfHashes,(char *)term,cvHashForTerm);
     }
 
 return cvHashForTerm;
 }
 
 const struct hash *cvOneTermHash(const char *type,const char *term)
 // returns a hash for a single term of a given type
@@ -255,30 +256,65 @@
         if (sameWord(searchable,CV_SEARCHABLE_SINGLE_SELECT))
             return cvSearchBySingleSelect;
         if (sameWord(searchable,CV_SEARCHABLE_MULTI_SELECT))
             return cvSearchByMultiSelect;
         if (sameWord(searchable,CV_SEARCHABLE_FREE_TEXT))
             return cvSearchByFreeText;
         if (sameWord(searchable,"date"))
             return cvSearchByDateRange;
         if (sameWord(searchable,"numeric"))
             return cvSearchByIntegerRange;
         }
     }
 return cvNotSearchable;
 }
 
+const char *cvValidationRule(const char *term)
+// returns validation rule, trimmed of comment
+{
+// Get the list of term types from thew cv
+struct hash *termTypeHash = (struct hash *)cvTermTypeHash();
+struct hash *termHash = hashFindVal(termTypeHash,(char *)term);
+if (termHash != NULL)
+    {
+    char *validationRule = hashFindVal(termHash,CV_VALIDATE);
+    // NOTE: Working on memory in hash but we are throwing away a comment and removing trailing spaces so that is okay
+    strSwapChar(validationRule,'#','\0'); // Chop off any comment in the setting
+    validationRule = trimSpaces(validationRule);
+    return validationRule;  // Clone?
+    }
+return NULL;
+}
+
+enum cvDataType cvDataType(const char *term)
+// returns the dataType if it can be determined
+{
+const char *validationRule = cvValidationRule(term);
+if (validationRule != NULL)
+    {
+    if (startsWithWord(CV_VALIDATE_INT,(char *)validationRule))
+        return cvInteger;
+    else if (startsWithWord(CV_VALIDATE_FLOAT,(char *)validationRule))
+        return cvFloat;
+    else if (startsWithWord(CV_VALIDATE_DATE,(char *)validationRule))
+        return cvDate;
+    else
+        return cvString;
+    }
+return cvIndeterminant;
+}
+
 const char *cvLabel(const char *term)
 // returns cv label if term found or else just term
 {
 // Get the list of term types from thew cv
 struct hash *termTypeHash = (struct hash *)cvTermTypeHash();
 struct hash *termHash = hashFindVal(termTypeHash,(char *)term);
 if (termHash != NULL)
     {
     char *label = hashFindVal(termHash,CV_LABEL);
     if (label != NULL)
         return label;
     }
 return term;
 }
 
@@ -341,15 +377,192 @@
 struct hash *termTypeHash = (struct hash *)cvTermTypeHash();
 struct hash *termHash = hashFindVal(termTypeHash,(char *)term);
 if (termHash != NULL)
     {
     char *validationRule = hashFindVal(termHash,CV_VALIDATE);
     if (validationRule != NULL)
         {           // Currently only supporting special case for "None"
         if (sameString(validationRule,CV_VALIDATE_CV_OR_NONE)
         && sameString(val,MDB_VAL_ENCODE_EDV_NONE))
             return TRUE;
         }
     }
 return FALSE;
 }
 
+boolean cvValidateTerm(const char *term,const char *val,char *reason,int len)
+// returns TRUE if term is valid.  Can pass in a reason buffer of len to get reason.
+{
+if (reason != NULL)
+    *reason = '\0';
+
+char *validationRule = (char *)cvValidationRule(term);
+if (validationRule == NULL)
+    {
+    if (reason != NULL)
+        safef(reason,len,"ERROR in %s: Term '%s' in typeOfTerms but has no '%s' setting.",CV_FILE_NAME,(char *)term,CV_VALIDATE);
+    return FALSE;
+    }
+
+    // Validate should be or start with known word
+    if (startsWithWord(CV_VALIDATE_CV,validationRule))
+        {
+        struct hash *termTypeHash = (struct hash *)cvTermTypeHash();
+        struct hash *termHash = hashFindVal(termTypeHash,(char *)term);
+        if (SETTING_NOT_ON(hashFindVal(termHash,CV_TOT_CV_DEFINED))) // Known type of term but no validation to be done
+            {
+            if (reason != NULL)
+                safef(reason,len,"ERROR in %s: Term '%s' says validate in cv but is not '%s'.",CV_FILE_NAME,(char *)term,CV_TOT_CV_DEFINED);
+            return FALSE;
+            }
+
+        // cvDefined so every val should be in cv
+        struct hash *cvHashForTerm = (struct hash *)cvTermHash((char *)term);
+        if (cvHashForTerm == NULL)
+            {
+            if (reason != NULL)
+                safef(reason,len,"ERROR in %s: Term '%s' says validate in cv but not found as a cv term.",CV_FILE_NAME,(char *)term);
+            return FALSE;
+            }
+        if (hashFindVal(cvHashForTerm,(char *)val) == NULL) // No cv definition for term so no validation can be done
+            {
+            if (sameString(validationRule,CV_VALIDATE_CV_OR_NONE) && sameString((char *)val,MDB_VAL_ENCODE_EDV_NONE))
+                return TRUE;
+            else if (sameString(validationRule,CV_VALIDATE_CV_OR_CONTROL))
+                {
+                cvHashForTerm = (struct hash *)cvTermHash(CV_TERM_CONTROL);
+                if (cvHashForTerm == NULL)
+                    {
+                    if (reason != NULL)
+                        safef(reason,len,"ERROR in %s: Term '%s' says validate in cv but not found as a cv term.",CV_FILE_NAME,CV_TERM_CONTROL);
+                    return FALSE;
+                    }
+                if (hashFindVal(cvHashForTerm,(char *)val) != NULL)
+                    return TRUE;
+                }
+            if (reason != NULL)
+                safef(reason,len,"INVALID cv lookup: %s = '%s'",(char *)term,(char *)val);
+            return FALSE;
+            }
+        }
+    else if (startsWithWord(CV_VALIDATE_DATE,validationRule))
+        {
+        if (dateToSeconds((char *)val,"%F") == 0)
+            {
+            if (reason != NULL)
+                safef(reason,len,"INVALID date: %s = %s",(char *)term,(char *)val);
+            return FALSE;
+            }
+        }
+    else if (startsWithWord(CV_VALIDATE_EXISTS,validationRule))
+        {
+        return TRUE;  // (e.g. fileName exists) Nothing to be done at this time.
+        }
+    else if (startsWithWord(CV_VALIDATE_FLOAT,validationRule))
+        {
+        char* end;
+        double notNeeded = strtod((char *)val, &end); // Don't want float, just error (However, casting to void resulted in a compile error on Ubuntu Maveric and Lucid)
+
+        if ((end == (char *)val) || (*end != '\0'))
+            {
+            if (reason != NULL)
+                safef(reason,len,"INVALID float: %s = %s (resulting double: %g)",(char *)term,(char *)val,notNeeded);
+            return FALSE;
+            }
+        }
+    else if (startsWithWord(CV_VALIDATE_INT,validationRule))
+        {
+        char *p0 = (char *)val;
+        if (*p0 == '-')
+            p0++;
+        char *p = p0;
+        while ((*p >= '0') && (*p <= '9'))
+            p++;
+        if ((*p != '\0') || (p == p0))
+            {
+            if (reason != NULL)
+                safef(reason,len,"INVALID integer: %s = %s",(char *)term,(char *)val);
+            return FALSE;
+            }
+        }
+    else if (startsWithWord(CV_VALIDATE_LIST,validationRule))
+        {
+        validationRule = skipBeyondDelimit(validationRule,' ');
+        if (validationRule == NULL)
+            {
+            if (reason != NULL)
+                safef(reason,len,"ERROR in %s: Invalid '%s' for %s.",CV_FILE_NAME,CV_VALIDATE_LIST,(char *)term);
+            return FALSE;
+            }
+        int count = chopByChar(validationRule, ',', NULL, 0);  ////////////////////////
+        if (count == 1)
+            {
+            if (differentString((char *)val,validationRule))
+                {
+                if (reason != NULL)
+                    safef(reason,len,"INVALID list '%s' match: %s = '%s'",validationRule,(char *)term,(char *)val);
+                return FALSE;
+                }
+            }
+        else if (count > 1)
+            {
+            char **array = needMem(count*sizeof(char*));
+            chopByChar(cloneString(validationRule), ',', array, count); // Want to also trimSpaces()? No
+
+            if (stringArrayIx((char *)val, array, count) == -1)
+                {
+                if (reason != NULL)
+                    safef(reason,len,"INVALID list '%s' match: %s = '%s'",validationRule,(char *)term,(char *)val);
+                return FALSE;
+                }
+            }
+        else
+            {
+            if (reason != NULL)
+                safef(reason,len,"ERROR in %s: Invalid 'validate list: %s' for term %s.",CV_FILE_NAME,validationRule,(char *)term);
+            return FALSE;
+            }
+        }
+    else if (startsWithWord(CV_VALIDATE_NONE,validationRule))
+        {
+        return TRUE;
+        }
+    else if (startsWithWord(CV_VALIDATE_REGEX,validationRule))
+        {
+        validationRule = skipBeyondDelimit(validationRule,' ');
+        if (validationRule == NULL)
+            {
+            if (reason != NULL)
+                safef(reason,len,"ERROR in %s: Invalid '%s' for %s.",CV_FILE_NAME,CV_VALIDATE_REGEX,(char *)term);
+            return FALSE;
+            }
+        // Real work ahead interpreting regex
+        regex_t regEx;
+        int err = regcomp(&regEx, validationRule, REG_NOSUB);
+        if(err != 0)  // Compile the regular expression so that it can be used.  Use: REG_EXTENDED ?
+            {
+            char buffer[128];
+            regerror(err, &regEx, buffer, sizeof buffer);
+            if (reason != NULL)
+                safef(reason,len,"ERROR in %s: Invalid regular expression for %s - %s.  %s.",CV_FILE_NAME,(char *)term,validationRule,buffer);
+            return FALSE;
+            }
+        err = regexec(&regEx, (char *)val, 0, NULL, 0);
+        if (err != 0)
+            {
+            //char buffer[128];
+            //regerror(err, &regEx, buffer, sizeof buffer);
+            if (reason != NULL)
+                safef(reason,len,"INVALID regex '%s' match: %s = '%s'",validationRule,(char *)term,(char *)val);
+            return FALSE;
+            }
+        regfree(&regEx);
+        }
+    else
+        {
+        if (reason != NULL)
+            safef(reason,len,"ERROR in %s: Unknown validationRule rule '%s' for term %s.",CV_FILE_NAME,validationRule,(char *)term);
+        return FALSE;
+        }
+return TRUE;
+}
+