src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c 2fe474a25e247a186d1ba8890c9c70028fbad0e2

2fe474a25e247a186d1ba8890c9c70028fbad0e2
kent
  Wed Sep 18 20:19:17 2019 -0700
Adding contributors to contact in hopes of unrolling them all and avoiding interns cutting and pasting long author lists.

diff --git src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c
index 86325d9..b91f730 100644
--- src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c
+++ src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c
@@ -141,53 +141,55 @@
 /* Make contributor output table.  The first row of it will be seeded with the contact.
  * We can fill out names, but not other info on the other contributors, who will make
  * up the rest of the rows. */
 struct fieldedTable *contributors = fieldedTableNew("contributor", contactFields, 
     realFieldCount);
 contributors->startsSharp = inProject->startsSharp;
 
 /* Make up first row from contacts */
 char *outVals[realFieldCount];
 int outIx;
 struct dyString *scratch = dyStringNew(0);
 for (outIx=0; outIx<realFieldCount-1; ++outIx)
     {
     char *inTsv = projectRow[contactIx[outIx]];
     char *inVal = emptyForNull(cloneString(csvParseNext(&inTsv, scratch)));
-    outVals[outIx] = cloneString(csvEscapeToDyString(csvScratch, inVal));
+    outVals[outIx] = inVal;
     }
 outVals[outIx] = "lab contact";
 char *contactName = cloneString(outVals[0]);
 fieldedTableAdd(contributors, outVals, realFieldCount, 1);
 
-/* Unroll the contributors field  into further rows*/
+/* Unroll the contributors field  into further rows if it exists. */
+int inContribIx = fieldedTableFindFieldIx(inProject, "contributors");
+if (inContribIx >= 0)
+    {
     for (outIx=0; outIx<realFieldCount; ++outIx)
 	outVals[outIx] = "";	// Empty out all rows.
-int inContribIx = fieldedTableMustFindFieldIx(inProject, "contributors");
     int outContribIx = fieldedTableMustFindFieldIx(contributors, "?name");
     char *inTsv = projectRow[inContribIx];
     char *oneVal;
     while ((oneVal = csvParseNext(&inTsv, scratch)) != NULL)
 	{
-    char *escaped = csvEscapeToDyString(csvScratch, oneVal);
-    if (differentString(escaped, contactName))  // We already got the contact as a contributor
+	if (differentString(oneVal, contactName))  // We already got the contact as a contributor
 	    {
-	outVals[outContribIx] = escaped;
+	    outVals[outContribIx] = oneVal;
 	    outVals[realFieldCount-1] = "contributor";
 	    fieldedTableAdd(contributors, outVals, realFieldCount, contributors->rowCount+1);
 	    }
 	}
+    }
 dyStringFree(&csvScratch);
 return contributors;
 }
 
 char *lookupSpecies(char *taxon)
 /* Some day we may query a decent database, for now
  * just have some of the most common */
 {
 if (sameString(taxon, "9606")) return "human";
 if (sameString(taxon, "10090")) return "mouse";
 if (sameString(taxon, "10116")) return "rat";
 if (sameString(taxon, "7955")) return "zebrafish";
 if (sameString(taxon, "7227")) return "fly";
 if (sameString(taxon, "6239")) return "worm";
 if (sameString(taxon, "4932")) return "yeast";
@@ -322,75 +324,80 @@
     if (sameString("state_reached", inName) || sameString("cur_state", inName))
         {
 	safef(nameBuf, sizeof(nameBuf), "@%s_id@hcat_projectstate@state@id", inName);
 	inName = cloneString(nameBuf);
 	}
     else if (sameString("consent", inName))
         {
 	safef(nameBuf, sizeof(nameBuf), "@%s_id@hcat_%s@short_name@id", inName, inName);
 	inName = cloneString(nameBuf);
 	}
     else if (sameString("effort", inName))
         {
 	safef(nameBuf, sizeof(nameBuf), "@%s_id@hcat_efforttype@short_name@id", inName);
 	inName = cloneString(nameBuf);
 	}
+#ifdef TOO_FLAKEY
     else if (sameString("lab", inName))
         {
 	safef(nameBuf, sizeof(nameBuf), "%s",
 	    "@@lab@id@hcat_project_labs@project_id@lab_id@hcat_lab@short_name@id");
 	inName = cloneString(nameBuf);
 	}
+#endif /* TOO_FLAKEY */
     else if (sameString("publications", inName))
         {
 	safef(nameBuf, sizeof(nameBuf), "%s",
 	    "@@publications@id@hcat_project_publications@project_id@publication_id@hcat_publication@short_name@id");
 	inName = cloneString(nameBuf);
 	}
 
     /* Output all the ones we haven't dealt with already or will deal with later */
     if (!startsWith("contact_", inName) && !sameString("contributors", inName))
         {
 	outFields[outFieldCount] = inName;
 	outRow[outFieldCount] = inVal;
 	++outFieldCount;
 	}
     }
 
 /* Add in contributors as a multi to multi field */
+char *contributors = fieldedTableLookupNamedFieldInRow(inProject, "contributors", inRow);
+if (contributors != NULL)
+    {
     outFields[outFieldCount] = "@@contributors@id@hcat_project_contributors@project_id@contributor_id@hcat_contributor@name@id";
-outRow[outFieldCount] = fieldedTableLookupNamedFieldInRow(inProject, "contributors", inRow);
+    outRow[outFieldCount] = contributors;
     outFieldCount += 1;
+    }
 
 /* Add in contacts as a multi to multi field too */
 outFields[outFieldCount] = "@@contacts@id@hcat_project_contacts@project_id@contributor_id@hcat_contributor@name@id";
 outRow[outFieldCount] = fieldedTableLookupNamedFieldInRow(inProject, "contact_name", inRow);
 outFieldCount += 1;
 
 /* Add the fields we scan and merge from sample at end */
 projectVocabField(inProject, inSample, "organ", outDir, 
     outFields, outRow, outFieldMax, &outFieldCount);
 projectVocabField(inProject, inSample, "organ_part", outDir, 
     outFields, outRow, outFieldMax, &outFieldCount);
 projectVocabField(inProject, inSample, "assay_type", outDir, 
     outFields, outRow, outFieldMax, &outFieldCount);
 projectVocabField(inProject, inSample, "assay_tech", outDir, 
     outFields, outRow, outFieldMax, &outFieldCount);
 projectVocabField(inProject, inSample, "disease", outDir, 
     outFields, outRow, outFieldMax, &outFieldCount);
 
-uglyf("making project table with %d fields\n", outFieldCount);
 struct fieldedTable *outTable = fieldedTableNew("project", outFields, outFieldCount);
 outTable->startsSharp = inProject->startsSharp;
 fieldedTableAdd(outTable, outRow, outFieldCount, 2);
 dyStringFree(&scratch);
 return outTable;
 }
 
 
 struct fieldedTable *makePublication(struct fieldedTable *inProject)
 /* If there's a publication field we make a publication table and seed it with the pmid 
  * and stuff. */
 {
 int pubIx = fieldedTableFindFieldIx(inProject, "publications");
 if (pubIx >= 0)
     {
@@ -406,102 +413,107 @@
         {
 	char name[64];
 	safef(name, sizeof(name), "pmid: %s", pmid);
 	char *outRow[2] = {name, pmid};
 	fieldedTableAdd(pubTable, outRow, ArraySize(outRow), 0);
 	csvEscapeAndAppend(newPubNames, name);
 	}
     inRow[pubIx] = dyStringCannibalize(&newPubNames);  // Other people need to use new value too
     dyStringFree(&csvScratch);
     return pubTable;
     }
 else
     return NULL;
 }
 
+#ifdef TOO_FLAKEY
 struct fieldedTable *makeLab(struct fieldedTable *inProject)
 /* If there's a lab field we make a lab table and seed it with the contacts. */
 {
 int labIx = fieldedTableFindFieldIx(inProject, "lab");
 if (labIx >= 0)
     {
     char **inRow = inProject->rowList->row;
     char *short_name = inRow[labIx];
-    char *contributors = fieldedTableLookupNamedFieldInRow(inProject, "contributors", inRow);
+    char *contributors = emptyForNull(fieldedTableLookupNamedFieldInRow(
+							    inProject, "contributors", inRow));
     char *institute = fieldedTableLookupNamedFieldInRow(inProject, "contact_institute", inRow);
     char labName[256];
     if (strlen(short_name) < 20)  // Unlikely to be unique, may cause trouble
 	safef(labName, sizeof(labName), "%s %s", short_name, emptyForNull(institute));
     else
         safef(labName, sizeof(labName), "%s", short_name);
     labName[50] = 0;  // not too long
     inRow[labIx] = cloneString(labName);  /* Other people need to know about this too. */
 
     char *outFields[3] = {"?short_name", "institution", 
 	"@@contributors@id@hcat_lab_contributors@lab_id@contributor_id@hcat_contributor@name@id"};
     struct fieldedTable *labTable = fieldedTableNew("lab", outFields, ArraySize(outFields));
     char *outRow[3] = {labName, institute, contributors};
     fieldedTableAdd(labTable, outRow, ArraySize(outRow), 1);
     return labTable;
     }
 else
     return NULL;
 }
+#endif /* TOO_FLAKEY */
 
 void hcatTabUpdate(char *inDir, char *outDir)
 /* hcatTabUpdate - take the tabToTabDir result of the geo/sra import.
  * Put results in an output dir in a format sqlUpdateRelated understands. */
 {
 // We are actually just looking for specific files in inDir. */
 
 /* Load up input projects table */
 char *projectFile = "hcat_project.tsv";
 char inPath[PATH_LEN];
 safef(inPath, sizeof(inPath), "%s/%s", inDir, projectFile);
-char *projectRequired[] = {"short_name", "contact_name", "contributors"};
+char *projectRequired[] = {"short_name", "contact_name", };
 struct fieldedTable *inProject = fieldedTableFromTabFile(inPath, inPath, 
     projectRequired, ArraySize(projectRequired));
 
 /* Load up samples table */
 char *sampleFile = "hcat_sample.tsv";
 safef(inPath, sizeof(inPath), "%s/%s", inDir, sampleFile);
 char *sampleRequired[] = {"short_name",};
 struct fieldedTable *inSample = fieldedTableFromTabFile(inPath, inPath, 
     sampleRequired, ArraySize(sampleRequired));
-
+verbose(2, "Got %d fields %d rows in %s\n", inSample->fieldCount, inSample->rowCount, inPath);
 
 /* Make sure inProject table makes sense by having exactly one row */
 if (inProject->rowCount != 1)
     errAbort("Expected one row in %s, got %d\n", projectFile, inProject->rowCount);
 
 /* Write output from lowest level to highest level tables. */
 makeDirsOnPath(outDir);
 
 
 /* Contributors table - it's always there */
 struct fieldedTable *outContributor = makeContributors(inProject);
 char outPath[PATH_LEN];
 safef(outPath, sizeof(outPath), "%s/hcat_%s", outDir, "contributor.tsv");
 fieldedTableToTabFile(outContributor, outPath);
 
+#ifdef TOO_FLAKEY
 /* Make lab table if there is a lab field */
 struct fieldedTable *outLab = makeLab(inProject);
 if (outLab != NULL)
     {
     safef(outPath, sizeof(outPath), "%s/hcat_%s", outDir, "lab.tsv");
     fieldedTableToTabFile(outLab, outPath);
     }
+#endif /* TOO_FLAKEY */
 
 /* Make pubs table if there are pubs fields */
 struct fieldedTable *outPub = makePublication(inProject);
 if (outPub != NULL)
     {
     safef(outPath, sizeof(outPath), "%s/hcat_%s", outDir, "publication.tsv");
     fieldedTableToTabFile(outPub, outPath);
     }
 
 
 struct fieldedTable *outProject = makeProject(inProject, inSample, outDir);
 safef(outPath, sizeof(outPath), "%s/hcat_%s", outDir, "project.tsv");
 fieldedTableToTabFile(outProject, outPath);
 }