386b5d76ae4c1625e3da1421d4f2a73628ef6048 kent Wed Feb 21 14:06:22 2024 -0800 Making cdwSubmit tag names against a schema file in the cdwSettings rather than hard-coded (and currently ifdefed out) tables. diff --git src/hg/cirm/cdw/cdwSubmit/cdwSubmit.c src/hg/cirm/cdw/cdwSubmit/cdwSubmit.c index c6bb05d..588cfd1 100644 --- src/hg/cirm/cdw/cdwSubmit/cdwSubmit.c +++ src/hg/cirm/cdw/cdwSubmit/cdwSubmit.c @@ -16,30 +16,31 @@ #include "cheapcgi.h" #include "net.h" #include "hmac.h" #include "paraFetch.h" #include "md5.h" #include "portable.h" #include "obscure.h" #include "hex.h" #include "filePath.h" #include "fieldedTable.h" #include "cdw.h" #include "cdwValid.h" #include "cdwLib.h" #include "mailViaPipe.h" #include "tagStorm.h" +#include "tagSchema.h" boolean doUpdate = FALSE; boolean noRevalidate = FALSE; boolean noBackup = FALSE; boolean justTest = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "cdwSubmit - Submit URL with validated.txt to warehouse.\n" "usage:\n" " cdwSubmit email manifest.txt meta.txt\n" "where email is the email address associated with the data set, typically from the lab, not the\n" "wrangler. You need to run cdwCreateUser with the email address if it's the first submission \n" @@ -747,61 +748,61 @@ newFile->submitFileName, oldFile->cdwFileName, name, oldVal, newVal); } verbose(1, "updating tags for %s\n", newFile->submitFileName); } if (updateMeta || updateTags) cdwFileResetTags(conn, oldFile, newFile->tags, !noRevalidate, submitId); if (updateTags || updateName || updateMeta) ++updateCount; cgiDictionaryFree(&oldTags); cgiDictionaryFree(&newTags); } return updateCount; } -static void rCheckTagValid(struct tagStorm *tagStorm, struct tagStanza *list) +static void rCheckTagValid(struct tagStorm *tagStorm, struct tagStanza *list, struct hash *schemaHash) /* Check tagStorm tags */ { struct tagStanza *stanza; for (stanza = list; stanza != NULL; stanza = stanza->next) { struct slPair *pair; for (pair = stanza->tagList; pair != NULL; pair = pair->next) { - cdwValidateTagName(pair->name); + cdwValidateTagName(pair->name, schemaHash); } - rCheckTagValid(tagStorm, stanza->children); + rCheckTagValid(tagStorm, stanza->children, schemaHash); } } -void checkMetaTags(struct tagStorm *tagStorm) +void checkMetaTags(struct tagStorm *tagStorm, struct hash *schemaHash) /* Check tags are all good. */ { -rCheckTagValid(tagStorm, tagStorm->forest); +rCheckTagValid(tagStorm, tagStorm->forest, schemaHash); } boolean isEnrichedInFormat(char *format) /* Return TRUE if it is a genomic format */ { char *formats[] = {"bed", "bigBed", "bigWig", "fastq", "gtf",}; return stringArrayIx(format, formats, ArraySize(formats)) >= 0; } void checkManifestAndMetadata( struct fieldedTable *table, int fileIx, int formatIx, int metaIx, int enrichedInIx, - struct tagStorm *tagStorm, struct hash *metaHash) + struct tagStorm *tagStorm, struct hash *metaHash, struct hash *schemaHash) /* Make sure that all file names are unique, all metadata tags are unique, and that * meta tags in table exist in tagStorm. Some of the replace a file logic is here. */ { /* Check files for uniqueness, formats for being supported, and meta for existance. */ struct fieldedRow *row; struct hash *fileHash = hashNew(0); for (row = table->rowList; row != NULL; row = row->next) { char *file = row->row[fileIx]; char *format = row->row[formatIx]; char *meta = row->row[metaIx]; /* Make sure that files are all unique */ struct fieldedRow *oldRow = hashFindVal(fileHash, file); if (oldRow != NULL) @@ -830,35 +831,35 @@ int i; for (i=0; ifieldCount; ++i) { char *field = table->fields[i]; - cdwValidateTagName(field); + cdwValidateTagName(field, schemaHash); } /* Check meta.txt tags */ -checkMetaTags(tagStorm); +checkMetaTags(tagStorm, schemaHash); } char *nullForNaOrEmpty(char *s) /* If s is NULL, "", or "n/a" return NULL, otherwise return s */ { if (s == NULL || s[0] == 0 || sameWord(s, "n/a")) return NULL; else return s; } int storeSubmissionFile(struct sqlConnection *conn, char *submitFileName, int submitId, int submitDirId, struct cdwUser *user, char *access) /* Save file to warehouse and make a record for it. This is for tagless files, * just the ones that make up the submission metadata. */ @@ -1031,33 +1032,41 @@ struct fieldedTable *table = fieldedTableFromTabFile(manifestFile, manifestFile, requiredFields, ArraySize(requiredFields)); int fileIx = stringArrayIx("file", table->fields, table->fieldCount); int formatIx = stringArrayIx("format", table->fields, table->fieldCount); int metaIx = stringArrayIx("meta", table->fields, table->fieldCount); int enrichedInIx = stringArrayIx("enriched_in", table->fields, table->fieldCount); verbose(1, "Got %d fields and %d rows in %s\n", table->fieldCount, slCount(table->rowList), manifestFile); struct tagStorm *tagStorm = tagStormFromFile(metaFile); struct hash *metaHash = tagStormIndexExtended(tagStorm, "meta", TRUE, FALSE); verbose(1, "Got %d items in metaHash\n", metaHash->elCount); struct sqlConnection *conn = cdwConnectReadWrite(); +char *schemaFile = cdwRequiredSetting(conn, "schema"); +verbose(2, "Reading schema from %s\n", schemaFile); +struct tagSchema *schemaList = tagSchemaFromFile(schemaFile); +struct hash *schemaHash = tagSchemaHash(schemaList); +verbose(1, "Got %d items in tagSchema\n", schemaHash->elCount); + struct cdwUser *user = cdwMustGetUserFromEmail(conn, email); +uglyf("about to checkManifest\n"); checkManifestAndMetadata(table, fileIx, formatIx, metaIx, enrichedInIx, - tagStorm, metaHash); + tagStorm, metaHash, schemaHash); +uglyf("done checkManifest\n"); /* Convert to data structure that has more fields. If submission contains * replacement files, check that the accessions being replaced are legitimate. */ int md5Ix = stringArrayIx("md5", table->fields, table->fieldCount); int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount); int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount); struct submitFileRow *sfrList = submitFileRowFromFieldedTable(conn, table, fileIx, md5Ix, replacesIx, replaceReasonIx); verbose(2, "Parsed manifest and metadata into %d files\n", slCount(sfrList)); /* Fake URL - system was built initially for remote files. */ char submitUrl[PATH_LEN]; safef(submitUrl, sizeof(submitUrl), "%s%s/%s", localPrefix, submitDir, manifestFile); if (startsWith("/", manifestFile)) errAbort("Please don't include full path to manifest file path. This is no longer needed.");