72ce237deec6e71abc9588bee0a94caba887e052 kent Mon Sep 2 07:19:55 2019 -0700 Getting rid of some fields of contributor that HCAT doesn't support yet. Unkinking some of the CSV handling. Copying with an inconsistency in Django underbars. diff --git src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c index 6850d5d..e3b9bf1 100644 --- src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c +++ src/hca/hcat/hcatTabUpdate/hcatTabUpdate.c @@ -92,104 +92,106 @@ char *slNameToCsv(struct slName *list) /* Convert slNames to a long string */ { struct dyString *dy = dyStringNew(0); struct slName *el; for (el = list; el != NULL; el = el->next) csvEscapeAndAppend(dy, el->name); return dyStringCannibalize(&dy); } struct fieldedTable *makeContributors(struct fieldedTable *inProject) /* Make a fielded table from project contact info and contributors list */ { char **projectRow = inProject->rowList->row; +struct dyString *csvScratch = dyStringNew(0); /* Make the contributors list in two pieces first off of the contact */ int contact_email = fieldedTableFindFieldIx(inProject, "contact_email"); int contact_phone = fieldedTableFindFieldIx(inProject, "contact_phone"); -int contact_department = fieldedTableFindFieldIx(inProject, "contact_department"); -int contact_institute = fieldedTableFindFieldIx(inProject, "contact_institute"); -int contact_address = fieldedTableFindFieldIx(inProject, "contact_address"); -int contact_city = fieldedTableFindFieldIx(inProject, "contact_city"); -int contact_country = fieldedTableFindFieldIx(inProject, "contact_country"); -int contact_zip_postal_code = fieldedTableFindFieldIx(inProject, "contact_zip_postal_code"); +//int contact_department = fieldedTableFindFieldIx(inProject, "contact_department"); +//int contact_institute = fieldedTableFindFieldIx(inProject, "contact_institute"); +//int contact_address = fieldedTableFindFieldIx(inProject, "contact_address"); +//int contact_city = fieldedTableFindFieldIx(inProject, "contact_city"); +//int contact_country = fieldedTableFindFieldIx(inProject, "contact_country"); +//int contact_zip_postal_code = fieldedTableFindFieldIx(inProject, "contact_zip_postal_code"); /* Figure out which contact data we actually have */ const int maxContacts = 32; char *contactFields[maxContacts+1]; // An extra for the project_role int contactIx[maxContacts]; char **oldFields = inProject->fields; // Add contact name field separately from the rest. We know it's there since it's a // required field, and also we need to decorate it's name contactFields[0] = "?name"; contactIx[0] = fieldedTableMustFindFieldIx(inProject, "contact_name"); int realFieldCount = 1; // The rest of the contact pieces are added just conditionally addIfReal(contact_email, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); addIfReal(contact_phone, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); -addIfReal(contact_department, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); -addIfReal(contact_institute, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); -addIfReal(contact_address, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); -addIfReal(contact_city, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); -addIfReal(contact_country, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); -addIfReal(contact_zip_postal_code, oldFields, - contactFields, contactIx, maxContacts, &realFieldCount); +//addIfReal(contact_department, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); +//#addIfReal(contact_institute, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); +//#addIfReal(contact_address, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); +//addIfReal(contact_city, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); +//addIfReal(contact_country, oldFields, contactFields, contactIx, maxContacts, &realFieldCount); +//addIfReal(contact_zip_postal_code, oldFields, +// contactFields, contactIx, maxContacts, &realFieldCount); contactFields[realFieldCount] = "project_role"; realFieldCount += 1; /* Make contributor output table. The first row of it will be seeded with the contact. * We can fill out names, but not other info on the other contributors, who will make * up the rest of the rows. */ struct fieldedTable *contributors = fieldedTableNew("contributor", contactFields, realFieldCount); contributors->startsSharp = inProject->startsSharp; /* Make up first row from contacts */ char *outVals[realFieldCount]; int outIx; struct dyString *scratch = dyStringNew(0); for (outIx=0; outIx<realFieldCount-1; ++outIx) { char *inTsv = projectRow[contactIx[outIx]]; char *inVal = emptyForNull(cloneString(csvParseNext(&inTsv, scratch))); - outVals[outIx] = inVal; + outVals[outIx] = cloneString(csvEscapeToDyString(csvScratch, inVal)); } outVals[outIx] = "lab contact"; char *contactName = cloneString(outVals[0]); fieldedTableAdd(contributors, outVals, realFieldCount, 1); /* Unroll the contributors field into further rows*/ for (outIx=0; outIx<realFieldCount; ++outIx) outVals[outIx] = ""; // Empty out all rows. int inContribIx = fieldedTableMustFindFieldIx(inProject, "contributors"); int outContribIx = fieldedTableMustFindFieldIx(contributors, "?name"); char *inTsv = projectRow[inContribIx]; char *oneVal; while ((oneVal = csvParseNext(&inTsv, scratch)) != NULL) { if (differentString(oneVal, contactName)) // We already got the contact as a contributor { - outVals[outContribIx] = cloneString(oneVal); + outVals[outContribIx] = csvEscapeToDyString(csvScratch, oneVal); outVals[realFieldCount-1] = "contributor"; fieldedTableAdd(contributors, outVals, realFieldCount, contributors->rowCount+1); } } +dyStringFree(&csvScratch); return contributors; } char *lookupSpecies(char *taxon) /* Some day we may query a decent database, for now * just have some of the most common */ { if (sameString(taxon, "9606")) return "human"; if (sameString(taxon, "10090")) return "mouse"; if (sameString(taxon, "10116")) return "rat"; if (sameString(taxon, "7955")) return "zebrafish"; if (sameString(taxon, "7227")) return "fly"; if (sameString(taxon, "6239")) return "worm"; if (sameString(taxon, "4932")) return "yeast"; errAbort("Unknown taxon %s", taxon); @@ -209,36 +211,39 @@ csvEscapeAndAppend(result, species); } return dyStringCannibalize(&result); } void addListFieldIfNonempty(char *field, struct slName *list, char *newFields[], char *newVals[], int maxNewCount,int *pCurCount) /* Add field to newFields if list is non-empty, taking care not to go past end. */ { if (list != NULL) { int curCount = *pCurCount; if (curCount >= maxNewCount) errAbort("Too many fields in addListFieldIfNonempty on %s, %d max", field, curCount); char fieldName[256]; + char *strippedField = cloneString(field); + stripChar(strippedField, '_'); safef(fieldName, sizeof(fieldName), "@@%s@hcat_project_%s@project_id@%s_id@hcat_%s.short_name@id", - field, field, field, field); + field, strippedField, field, strippedField); newFields[curCount] = cloneString(fieldName); newVals[curCount] = slNameToCsv(list); *pCurCount = curCount+1; + freez(&strippedField); } } struct fieldedTable *makeProject(struct fieldedTable *inProject, struct fieldedTable *inSample) /* Make output project table. This is the big one - 35 fields now * probably twice that by the time HCA is done. Fortunately we only need * to deal with some of the fields and it only has one row. */ { char **inFields = inProject->fields; char **inRow = inProject->rowList->row; int inFieldCount = inProject->fieldCount; struct dyString *scratch = dyStringNew(0); int outFieldMax = inFieldCount + 16; // Mostly we remove fields but we do add a few. Gets checked