114397ec2020824f4cdd0eae3ddce83c86f5c4e7 kent Sat Dec 19 11:33:09 2020 -0800 Finally committing this small fix. This should be viewed as one-shot code as it is just a way to pull down data from a server no longer maintained. diff --git src/hca/hcat/hcatParseParth/hcatParseParth.c src/hca/hcat/hcatParseParth/hcatParseParth.c index 5020df6..d5e4f52 100644 --- src/hca/hcat/hcatParseParth/hcatParseParth.c +++ src/hca/hcat/hcatParseParth/hcatParseParth.c @@ -281,64 +281,75 @@ { /* Each project is an object/hash/dictionary depending on your fave language. * Here we get that level object into a C hash, and extract the project_uuid */ struct hash *projectHash = jsonObjectVal(projectRef->val, "project"); struct jsonElement *uuidEl = hashMustFindVal(projectHash, "project_uuid"); char *projectUuid = uuidEl->val.jeString; /* Get the ingest-info subobject and make sure it's complete. */ struct jsonElement *ingestList = hashFindVal(projectHash, "ingest-info"); if (ingestList == NULL) errAbort("Can't find ingest-info for project_uuid %s", projectUuid); if (ingestList->type != jsonList) errAbort("Expecting list value for ingest-info"); int ingestListSize = slCount(ingestList->val.jeList); if (ingestListSize != 1) - verbose(2, "ingest-info[] has %d members\n", ingestListSize); + verbose(1, "ingest-info[] has %d members\n", ingestListSize); int subBunCount = 0; struct slRef *ingestRef; char *submissionId = NULL; char *shortName = NULL; + boolean gotReal = FALSE; for (ingestRef = ingestList->val.jeList; ingestRef != NULL; ingestRef = ingestRef->next) { struct jsonElement *ingestEl = ingestRef->val; char *primaryState = jsonStringField(ingestEl, "primary_state"); if (!isComplete(primaryState)) continue; /* God help us even among the completes there are multiple projects associated * with the same thing. So far project_short_name is unique. We'll just take * the first (complete) one and warn about the rest. Some of the dupes have the * same uuid, some different. Yes, it's a little messy this input . */ shortName = jsonStringField(ingestEl, "project_short_name"); + if (shortName == NULL) + { + verbose(1, "Skipping project without shortName '%s'\n", shortName); + continue; + } // Abbreviate what is really and truly not a short name! if (startsWith("Single cell RNAseq characterization of cell types produced over time in an in ", shortName)) { shortName = "Single cell RNAseq characterization of cell types produced over time"; verbose(2, "Abbreviated shortName to %s\n", shortName); } if (hashLookup(uniqShortNameHash, shortName)) { verbose(2, "Skipping duplicate project named '%s'\n", shortName); continue; } hashAdd(uniqShortNameHash, shortName, NULL); /* Grab more string fields we like from ingest-info. */ submissionId = jsonStringField(ingestEl, "submission_id"); + if (submissionId == NULL) + { + warn("submissionId for %s is NULL", projectUuid); + continue; + } char *title = jsonStringField(ingestEl, "project_title"); char *wrangler = jsonStringField(ingestEl, "data_curator"); char *contributors = jsonStringField(ingestEl, "primary_investigator"); char *submissionDateTime = jsonStringField(ingestEl, "submission_date"); /* Turn dateTime into just date */ char *tStart = strchr(submissionDateTime, 'T'); if (tStart == NULL) errAbort("No T separator in submission_date %s", submissionDateTime); char *submissionDate = cloneStringZ(submissionDateTime, tStart - submissionDateTime); /* Get species list, maybe.... */ struct jsonElement *speciesEl = jsonMustFindNamedField(ingestEl, "ingest-info", "species"); struct slRef *speciesRefList = jsonListVal(speciesEl, "species"); char *species = sciNameRefsToSpecies(speciesRefList, scratch); @@ -353,36 +364,38 @@ char *techs = ingestConstructionRefsToAssayTech(constructList, scratch); /* Still more error checking */ hashAddUnique(uniqHash, projectUuid, NULL); hashAddUnique(uniqTitleHash, title, NULL); /* Update contributors table */ dyStringClear(contribCsv); outputContributors(fContrib, contributors, "contributor", contribCsv, scratch); outputContributors(fContrib, wrangler, "wrangler", contribCsv, scratch); /* Update project table */ fprintf(fProject, "%s\t%s\t", shortName, title); fprintf(fProject, "%s\t%s\t%s\t", species, techs, contribCsv->string); fprintf(fProject, "%s\n", submissionDate); + gotReal = TRUE; break; // Still figuring out if this loop is here to stay } /* We processed the heck out of the ingest-info, and this routine is so long, * pass along what we parsed out that goes into the tracker table, and have it * deal with the azul-info, matrix-info, etc, which are read-only to wranglers. */ + if (gotReal) outputTracker(fTracker, shortName, submissionId, projectUuid, projectHash, subBunCount, scratch); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); hcatParseParth(argv[1], argv[2]); return 0; }