386b5d76ae4c1625e3da1421d4f2a73628ef6048
kent
  Wed Feb 21 14:06:22 2024 -0800
Making cdwSubmit tag names against a schema file in the cdwSettings rather than hard-coded (and currently ifdefed out) tables.

diff --git src/hg/cirm/cdw/lib/cdwValid.c src/hg/cirm/cdw/lib/cdwValid.c
index ecd50f5..0eb54a5 100644
--- src/hg/cirm/cdw/lib/cdwValid.c
+++ src/hg/cirm/cdw/lib/cdwValid.c
@@ -199,30 +199,31 @@
     if (sameString(name, cdwBedTypeTable[i].name))
         return &cdwBedTypeTable[i];
     }
 return NULL;
 }
 
 struct cdwBedType *cdwBedTypeFind(char *name)
 /* Return cdwBedType of given name.  Abort if not found */
 {
 struct cdwBedType *bedType = cdwBedTypeMayFind(name);
 if (bedType == NULL)
     errAbort("Couldn't find bed format %s", name);
 return bedType;
 }
 
+#ifdef OLD
 char *cdwAllowedTags[] = {
     "access",
     "analyte",
     "analyte_detector",
     "analyte_reporter_fluorochrome",
     "assay",
     "assay_method",
     "assay_platform",
     "assay_seq",
     "average_insert_size",
     "biomaterial_provider",
     "biosample_ancestry_population",
     "biosample_cell_type",
     "biosample_characterization_protocol_id",
     "biosample_collectors_email",
@@ -352,78 +353,30 @@
 
 struct hash *cdwAllowedTagsHash()
 /* Get hash of all allowed tags */
 {
 static struct hash *allowedHash = NULL;
 if (allowedHash == NULL)
     {
     allowedHash = hashNew(7);
     int i;
     for (i=0; i<ArraySize(cdwAllowedTags); ++i)
 	hashAdd(allowedHash, cdwAllowedTags[i], NULL);
     }
 return allowedHash;
 }
 
-void cdwValidateTagName(char *tag)
-/* Make sure that tag is one of the allowed ones. */
-{
-char *geoPrefix = "GEO_";
-// If it's not a legal C symbol, don't let it be a tag
-if (!isSymbolString(tag))
-    errAbort("Bad tag symbol %s.", tag);
-// First see if it is in hash of allowed tags.
-struct hash *allowedHash = cdwAllowedTagsHash();
-if (hashLookup(allowedHash, tag) != NULL)
-    return;
-// Otherwise see if it's one of the prefixes that allows anything afterwords 
-else if (startsWith("lab_", tag) || startsWith("user_", tag) )
-    {
-    return;
-    }
-else if (startsWith(geoPrefix, tag) || startsWith("SRA_", tag))
-    {
-    // Generally just pass GEO_ and SRA_ tags through, but do check that
-    // the case is what we expect to avoid duplicate symbol conflicts between
-    // differently cased versions of GEO_ tags in particular.
-
-    // We have a couple of built-in geo_ tags for the major GEO database identifiers.
-    int tagLen = strlen(tag);
-    char lowerTag[tagLen+1];
-    strcpy(lowerTag, tag);
-    tolowers(lowerTag);
-    if (hashLookup(allowedHash, lowerTag))
-        errAbort("Please change %s tag to %s", tag, lowerTag);
-
-    // This will detect a misguided attempt to change case on bits after GEO_ that
-    // bit us once.
-    int geoPrefixSize = strlen(geoPrefix);
-    if (!isupper(tag[geoPrefixSize]))
-        errAbort("Looks like %s has been altered, expecting upper case letter after GEO_.", tag);
-    return;
-    }
-// Otherwise see if it's one of our reserved but unimplemented things
-else if (sameString("mixin", tag) || sameString("deprecated", tag) 
-    || sameString("deprecated_acc", tag) || sameString("children", tag)
-    || sameString("replaces_reason", tag) || sameString("replaces_file", tag))
-    {
-    errAbort("%s not implemented", tag);
-    }
-// Otherwise, nope, doesn't validate.
-errAbort("Unknown tag '%s'", tag);
-}
-
 static struct hash *makeStringHash(char **array, int size)
 /* Make a hash that contains all elements of array strings of given size */
 {
 struct hash *hash = hashNew(0);
 int i;
 for (i=0; i<size; ++i)
     hashAdd(hash, array[i], NULL);
 return hash;
 }
 
 static struct hash *makeCvHash()
 /* Turn a bunch of lists of words into hashes for fast lookup of whether
  * something is in a controlled vocabulary. */
 {
 /* These are just code generate things pasted in for now.  May do something more elegant and
@@ -576,43 +529,92 @@
 hashAdd(hash, "biosample_source_health_status", 
     makeStringHash(biosample_source_health_status, ArraySize(biosample_source_health_status)));
 hashAdd(hash, "enriched_in", makeStringHash(enriched_in, ArraySize(enriched_in)));
 hashAdd(hash, "format", makeStringHash(format, ArraySize(format)));
 hashAdd(hash, "fluidics_chip", makeStringHash(fluidics_chip, ArraySize(fluidics_chip)));
 hashAdd(hash, "assay_platform", makeStringHash(assay_platform, ArraySize(assay_platform)));
 hashAdd(hash, "species", makeStringHash(species, ArraySize(species)));
 hashAdd(hash, "strain", makeStringHash(strain, ArraySize(strain)));
 hashAdd(hash, "subcellular_localization", 
     makeStringHash(subcellular_localization, ArraySize(subcellular_localization)));
 hashAdd(hash, "immunoprecipitation_target", 
     makeStringHash(immunoprecipitation_target, ArraySize(immunoprecipitation_target)));
 return hash;
 }
 
-void cdwValidateTagVal(char *tag, char *val)
+void old_cdwValidateTagVal(char *tag, char *val)
 /* Make sure that tag is one of the allowed ones and that
  * val is compatible */
 {
 cdwValidateTagName(tag);
 static struct hash *cvHash = NULL;
 if (cvHash == NULL)
     cvHash = makeCvHash();
 struct hash *hash = hashFindVal(cvHash, tag);
 if (hash != NULL)
     if (!hashLookup(hash, val))
        errAbort("%s is not a valid value for tag %s\n", val, tag);
 }
+#endif /* OLD */
+
+void cdwValidateTagName(char *tag, struct hash *schemaHash)
+/* Make sure that tag is one of the allowed ones. */
+{
+char *geoPrefix = "GEO_";
+// If it's not a legal C symbol, don't let it be a tag
+if (!isSymbolString(tag))
+    errAbort("Bad tag symbol %s.", tag);
+// First see if it is in hash of allowed tags.
+if (hashLookup(schemaHash, tag) != NULL)
+    return;
+// Otherwise see if it's one of the prefixes that allows anything afterwords 
+else if (startsWith("lab_", tag) || startsWith("user_", tag) )
+    {
+    return;
+    }
+else if (startsWith(geoPrefix, tag) || startsWith("SRA_", tag))
+    {
+    // Generally just pass GEO_ and SRA_ tags through, but do check that
+    // the case is what we expect to avoid duplicate symbol conflicts between
+    // differently cased versions of GEO_ tags in particular.
+
+    // We have a couple of built-in geo_ tags for the major GEO database identifiers.
+    int tagLen = strlen(tag);
+    char lowerTag[tagLen+1];
+    strcpy(lowerTag, tag);
+    tolowers(lowerTag);
+    if (hashLookup(schemaHash, lowerTag))
+        errAbort("Please change %s tag to %s", tag, lowerTag);
+
+    // This will detect a misguided attempt to change case on bits after GEO_ that
+    // bit us once.
+    int geoPrefixSize = strlen(geoPrefix);
+    if (!isupper(tag[geoPrefixSize]))
+        errAbort("Looks like %s has been altered, expecting upper case letter after GEO_.", tag);
+    return;
+    }
+// Otherwise see if it's one of our reserved but unimplemented things
+else if (sameString("mixin", tag) || sameString("deprecated", tag) 
+    || sameString("deprecated_acc", tag) || sameString("children", tag)
+    || sameString("replaces_reason", tag) || sameString("replaces_file", tag))
+    {
+    errAbort("%s not implemented", tag);
+    }
+// Otherwise, nope, doesn't validate.
+errAbort("Unknown tag '%s'", tag);
+}
+
 
 struct slPair *cdwFormatList()
 /* Return list of formats.  The name of the list items are the format names.
  * The vals are short descriptions. */
 {
 static struct slPair *list = NULL;
 if (list == NULL)
     {
     static char *array[] = 
 	{
 	"2bit Two bit per base DNA format",
 	"bam Short read mapping format",
 	"bed Genome browser compatible format for genes and other discrete elements",
 	"bigBed	Compressed BED recommended for files with more than 100,000 elements",
 	"bigWig	Compressed base by base signal graphs",