114397ec2020824f4cdd0eae3ddce83c86f5c4e7
kent
  Sat Dec 19 11:33:09 2020 -0800
Finally committing this small fix.  This should be viewed as one-shot code as it is just a way to pull down data from a server no longer maintained.

diff --git src/hca/hcat/hcatParseParth/hcatParseParth.c src/hca/hcat/hcatParseParth/hcatParseParth.c
index 5020df6..d5e4f52 100644
--- src/hca/hcat/hcatParseParth/hcatParseParth.c
+++ src/hca/hcat/hcatParseParth/hcatParseParth.c
@@ -281,64 +281,75 @@
     {
     /* Each project is an object/hash/dictionary depending on your fave language.
      * Here we get that level object into a C hash, and extract the project_uuid */
     struct hash *projectHash = jsonObjectVal(projectRef->val, "project");
     struct jsonElement *uuidEl = hashMustFindVal(projectHash, "project_uuid");
     char *projectUuid = uuidEl->val.jeString;
 
     /* Get the ingest-info subobject and make sure it's complete. */
     struct jsonElement *ingestList = hashFindVal(projectHash, "ingest-info");
     if (ingestList == NULL)
         errAbort("Can't find ingest-info for project_uuid %s", projectUuid);
     if (ingestList->type != jsonList)
         errAbort("Expecting list value for ingest-info");
     int ingestListSize = slCount(ingestList->val.jeList);
     if (ingestListSize != 1)
-        verbose(2, "ingest-info[] has %d members\n", ingestListSize);
+        verbose(1, "ingest-info[] has %d members\n", ingestListSize);
 
     int subBunCount = 0;
     struct slRef *ingestRef;
     char *submissionId = NULL;
     char *shortName = NULL;
+    boolean gotReal = FALSE;
     for (ingestRef = ingestList->val.jeList; ingestRef != NULL; ingestRef = ingestRef->next)
 	{
 	struct jsonElement *ingestEl = ingestRef->val;
 	char *primaryState = jsonStringField(ingestEl, "primary_state");
 	if (!isComplete(primaryState))
 	     continue;
 
 	/* God help us even among the completes there are multiple projects associated
 	 * with the same thing.  So far project_short_name is unique.  We'll just take
 	 * the first (complete) one and warn about the rest.  Some of the dupes have the
 	 * same uuid, some different.  Yes, it's a little messy this input . */
 	shortName = jsonStringField(ingestEl, "project_short_name");
+	if (shortName == NULL)
+	    {
+	    verbose(1, "Skipping project without shortName '%s'\n", shortName);
+	    continue;
+	    }
 	// Abbreviate what is really and truly not a short name!
 	if (startsWith("Single cell RNAseq characterization of cell types produced over time in an in ",
 	     shortName))
 	    {
 	    shortName = "Single cell RNAseq characterization of cell types produced over time";
 	    verbose(2, "Abbreviated shortName to %s\n", shortName);
 	    }
 	if (hashLookup(uniqShortNameHash, shortName))
 	    {
 	    verbose(2, "Skipping duplicate project named '%s'\n", shortName);
 	    continue;
 	    }
 	hashAdd(uniqShortNameHash, shortName, NULL);
 
 	/* Grab more string fields we like from ingest-info. */
 	submissionId = jsonStringField(ingestEl, "submission_id");
+	if (submissionId == NULL)
+	   {
+	   warn("submissionId for %s is NULL", projectUuid);
+	   continue;
+	   }
 	char *title = jsonStringField(ingestEl, "project_title");
 	char *wrangler = jsonStringField(ingestEl, "data_curator");
 	char *contributors = jsonStringField(ingestEl, "primary_investigator");
 	char *submissionDateTime = jsonStringField(ingestEl, "submission_date");
 
 	/* Turn dateTime into just date */
 	char *tStart = strchr(submissionDateTime, 'T');
 	if (tStart == NULL)
 	    errAbort("No T separator in submission_date %s", submissionDateTime);
 	char *submissionDate = cloneStringZ(submissionDateTime, tStart - submissionDateTime);
 
 	/* Get species list, maybe.... */
 	struct jsonElement *speciesEl = jsonMustFindNamedField(ingestEl, "ingest-info", "species");
 	struct slRef *speciesRefList = jsonListVal(speciesEl, "species");
 	char *species = sciNameRefsToSpecies(speciesRefList, scratch);
@@ -353,36 +364,38 @@
 	char *techs = ingestConstructionRefsToAssayTech(constructList, scratch);
 
 	/* Still more error checking */
 	hashAddUnique(uniqHash, projectUuid, NULL);
 	hashAddUnique(uniqTitleHash, title, NULL);
 
 	/* Update contributors table */
 	dyStringClear(contribCsv);
 	outputContributors(fContrib, contributors, "contributor", contribCsv, scratch);
 	outputContributors(fContrib, wrangler, "wrangler", contribCsv, scratch);
 
 	/* Update project table */
 	fprintf(fProject, "%s\t%s\t", shortName, title);
 	fprintf(fProject, "%s\t%s\t%s\t", species, techs, contribCsv->string);
 	fprintf(fProject, "%s\n", submissionDate);
+	gotReal = TRUE;
 
 	break;	    // Still figuring out if this loop is here to stay
 	}
 
     /* We processed the heck out of the ingest-info, and this routine is so long,
      * pass along what we parsed out that goes into the tracker table, and have it
      * deal with the azul-info, matrix-info, etc,  which are read-only to wranglers. */
+    if (gotReal)
 	outputTracker(fTracker, shortName, submissionId, projectUuid, projectHash, 
 	    subBunCount, scratch);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 hcatParseParth(argv[1], argv[2]);
 return 0;
 }