5db640d61b5495fb5a644885753ea37ff61dbb31 kent Mon Sep 2 08:46:09 2019 -0700 Fixing field names in output. diff --git src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c index e3b9bf1..39a548b 100644 --- src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c +++ src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c @@ -160,33 +160,34 @@ outVals[outIx] = cloneString(csvEscapeToDyString(csvScratch, inVal)); } outVals[outIx] = "lab contact"; char *contactName = cloneString(outVals[0]); fieldedTableAdd(contributors, outVals, realFieldCount, 1); /* Unroll the contributors field into further rows*/ for (outIx=0; outIx<realFieldCount; ++outIx) outVals[outIx] = ""; // Empty out all rows. int inContribIx = fieldedTableMustFindFieldIx(inProject, "contributors"); int outContribIx = fieldedTableMustFindFieldIx(contributors, "?name"); char *inTsv = projectRow[inContribIx]; char *oneVal; while ((oneVal = csvParseNext(&inTsv, scratch)) != NULL) { - if (differentString(oneVal, contactName)) // We already got the contact as a contributor + char *escaped = csvEscapeToDyString(csvScratch, oneVal); + if (differentString(escaped, contactName)) // We already got the contact as a contributor { - outVals[outContribIx] = csvEscapeToDyString(csvScratch, oneVal); + outVals[outContribIx] = escaped; outVals[realFieldCount-1] = "contributor"; fieldedTableAdd(contributors, outVals, realFieldCount, contributors->rowCount+1); } } dyStringFree(&csvScratch); return contributors; } char *lookupSpecies(char *taxon) /* Some day we may query a decent database, for now * just have some of the most common */ { if (sameString(taxon, "9606")) return "human"; if (sameString(taxon, "10090")) return "mouse"; if (sameString(taxon, "10116")) return "rat"; @@ -214,31 +215,31 @@ } void addListFieldIfNonempty(char *field, struct slName *list, char *newFields[], char *newVals[], int maxNewCount,int *pCurCount) /* Add field to newFields if list is non-empty, taking care not to go past end. */ { if (list != NULL) { int curCount = *pCurCount; if (curCount >= maxNewCount) errAbort("Too many fields in addListFieldIfNonempty on %s, %d max", field, curCount); char fieldName[256]; char *strippedField = cloneString(field); stripChar(strippedField, '_'); safef(fieldName, sizeof(fieldName), - "@@%s@hcat_project_%s@project_id@%s_id@hcat_%s.short_name@id", + "@@%s@id@hcat_project_%s@project_id@%s_id@hcat_%s.short_name@id", field, strippedField, field, strippedField); newFields[curCount] = cloneString(fieldName); newVals[curCount] = slNameToCsv(list); *pCurCount = curCount+1; freez(&strippedField); } } struct fieldedTable *makeProject(struct fieldedTable *inProject, struct fieldedTable *inSample) /* Make output project table. This is the big one - 35 fields now * probably twice that by the time HCA is done. Fortunately we only need * to deal with some of the fields and it only has one row. */ { char **inFields = inProject->fields; @@ -252,58 +253,58 @@ int outFieldCount = 0; /* First we make up the basics of the outProject table. Mostly this is just * passing through from the inProject, but there's exceptions like contacts. */ int inIx; for (inIx=0; inIx<inFieldCount; ++inIx) { /* Fetch input name and value */ char *inName = inFields[inIx]; char *inVal = inRow[inIx]; /* Go through list of input fields we tweak slightly */ if (sameString("taxons", inName)) { // its many-to-many, whoot! - inName = "@@species@hcat_project_species@project_id@species_id@hcat_species@common_name@id"; + inName = "@@species@id@hcat_project_species@project_id@species_id@hcat_species@common_name@id"; inVal = taxonsToSpecies(inVal, scratch); } /* We might modify names of some fields */ char nameBuf[128]; if (sameString("state_reached", inName) || sameString("cur_state", inName)) { safef(nameBuf, sizeof(nameBuf), "@%s_id@hcat_project_state.state@id", inName); inName = cloneString(nameBuf); } else if (sameString("consent", inName) || sameString("effort", inName)) { safef(nameBuf, sizeof(nameBuf), "@%s_id@hcat_%s.short_name@id", inName, inName); inName = cloneString(nameBuf); } /* Output all the ones we haven't dealt with already or will deal with later */ if (!startsWith("contact_", inName) && !sameString("contributors", inName)) { outFields[outFieldCount] = inName; outRow[outFieldCount] = inVal; ++outFieldCount; } } /* Add in contributors as a multi to multi field */ -outFields[outFieldCount] = "@@contributors@hcat_project_contributors@project_id@contributor_id@hcat_contributor.name@id"; +outFields[outFieldCount] = "@@contributors@hcat_project_contributors@id@project_id@contributor_id@hcat_contributor@name@id"; outRow[outFieldCount] = fieldedTableLookupNamedFieldInRow(inProject, "contributors", inRow); /* Add the fields we scan and merge from sample at end */ struct slName *organList = uniqVals(inSample, "organ"); struct slName *organPartList = uniqVals(inSample, "organ_part"); struct slName *assayTypeList = uniqVals(inSample, "assay_type"); struct slName *diseaseList = uniqVals(inSample, "disease"); addListFieldIfNonempty("organ", organList, outFields, outRow, outFieldMax, &outFieldCount); addListFieldIfNonempty("organ_part", organPartList, outFields, outRow, outFieldMax, &outFieldCount); addListFieldIfNonempty("assay_type", assayTypeList, outFields, outRow, outFieldMax, &outFieldCount); addListFieldIfNonempty("disease", diseaseList, outFields, outRow, outFieldMax, &outFieldCount); struct fieldedTable *outTable = fieldedTableNew("project", outFields, outFieldCount); outTable->startsSharp = inProject->startsSharp; fieldedTableAdd(outTable, outRow, outFieldCount, 1); @@ -317,32 +318,32 @@ int labIx = fieldedTableFindFieldIx(inProject, "lab"); if (labIx >= 0) { char **inRow = inProject->rowList->row; char *short_name = inRow[labIx]; char *contact = fieldedTableLookupNamedFieldInRow(inProject, "contact_name", inRow); char *contributors = fieldedTableLookupNamedFieldInRow(inProject, "contributors", inRow); char *institute = emptyForNull(fieldedTableLookupNamedFieldInRow(inProject, "contact_institute", inRow)); char labName[256]; if (strlen(short_name) < 20) // Unlikely to be unique, may cause trouble safef(labName, sizeof(labName), "%s %s", short_name, institute); else safef(labName, sizeof(labName), "%s", short_name); labName[50] = 0; // not too long - char *outFields[4] = {"?short_name", "institute", "@contact@hcat_contributor.name@id", - "@@contributors@hcat_lab_contributors@lab_id@contributor_id@hcat_contributor.name@id"}; + char *outFields[4] = {"?short_name", "institution", "@contact_id@hcat_contributor@name@id", + "@@contributors@id@hcat_lab_contributors@lab_id@contributor_id@hcat_contributor@name@id"}; struct fieldedTable *labTable = fieldedTableNew("lab", outFields, ArraySize(outFields)); char *outRow[4] = {labName, institute, contact, contributors}; fieldedTableAdd(labTable, outRow, ArraySize(outRow), 1); return labTable; } else return NULL; } void hcatTabUpdate(char *inDir, char *outDir) /* hcatTabUpdate - Update the hcat database given a tab seperated input and output dir. */ { // We are actually just looking for specific files in inDir. */ /* Load up input projects table */