a6f3ed94b533d5ffa08d7ff8f7c87a2956848e11 kent Mon May 10 13:37:24 2021 -0700 A program that takes tab separated files with column labels in the 1st row that we generate to templates with column labels in the 4th row like HCA likes. diff --git src/hca/toHca/hcaAffixTsvToTemplate/hcaAffixTsvToTemplate.c src/hca/toHca/hcaAffixTsvToTemplate/hcaAffixTsvToTemplate.c new file mode 100644 index 0000000..39eb67b --- /dev/null +++ src/hca/toHca/hcaAffixTsvToTemplate/hcaAffixTsvToTemplate.c @@ -0,0 +1,172 @@ +/* hcaAffixTsvToTemplate - Given a template of an HCA spreadsheet with the machine readable field + * names in the 4th row, and a sheet wit some of the fields in with the labels in the top row, + * make a new tsv that looks like the template with selected columns filled in.. */ + +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" +#include "fieldedTable.h" +#include "portable.h" + +boolean gFull = FALSE; +boolean gDir = FALSE; +boolean gAppend = FALSE; + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "hcaAffixTsvToTemplate - Given a template of an HCA spreadsheet with the machine readable field\n" + "names in the 4th row, and a sheet wit some of the fields in with the labels in the top row,\n" + "make a new tsv that looks like the template with selected columns filled in.\n" + "usage:\n" + " hcaAffixTsvToTemplate inSheet.tsv inTemplate.tsv outSheet.tsv\n" + "options:\n" + " -dir recurse treat inSheet, template, and outSheet as directories. There must be\n" + " a file in template named the same as each file in outSheet.\n" + " -full - include columns in template with no data in inSheet.\n" + " -append - appends to full template rather than just taking the first 4 lines" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {"dir", OPTION_BOOLEAN}, + {"full", OPTION_BOOLEAN}, + {"append", OPTION_BOOLEAN}, + {NULL, 0}, +}; + +void affixToTemplate(char *inSheet, char *inTemplate, char *outSheet) +/* affixToTemplate - Affix a single file to template. */ +{ +/* Read in input sheet and template */ +struct fieldedTable *smallSheet = fieldedTableFromTabFile(inSheet, inSheet, NULL, 0); +struct fieldedTable *template = fieldedTableFromTabFile(inTemplate, inTemplate, NULL, 0); +if (template->rowCount < 4) + errAbort("Expecting at least three rows in template %s, got %d", inTemplate, template->rowCount); +struct fieldedRow *computerNamesFr = slElementFromIx(template->rowList, 2); +assert(computerNamesFr != NULL); +char **computerNames = computerNamesFr->row; +int computerNamesCount = template->fieldCount; + +/* Figure out how inSheet field indexes relate to inTemplate field indexes */ +int translate[smallSheet->fieldCount]; +int i; +for (i=0; ifieldCount; ++i) + { + char *fieldName = smallSheet->fields[i]; + int tx = stringArrayIx(fieldName, computerNames, computerNamesCount); + if (tx < 0) + errAbort("field %s not found in row 4 of %s", fieldName, inTemplate); + translate[i] = tx; + } + +/* Create an output row that is prefilled with empty strings. */ +char *outRow[template->fieldCount]; +for (i=0; ifieldCount; ++i) + outRow[i] = ""; + +/* Create output table with same fields and same first three rows as template */ +struct fieldedTable *outTable = fieldedTableNew(outSheet, template->fields, template->fieldCount); +struct fieldedRow *fr; +for (i=0, fr=template->rowList; i<4; ++i, fr = fr->next) + fieldedTableAdd(outTable, fr->row, template->fieldCount, i); +if (gAppend) + { + // Keep going if they ask us to. + for (; fr != NULL; ++i, fr = fr->next) + fieldedTableAdd(outTable, fr->row, template->fieldCount, i); + } + +/* Add remaining rows from inTable spread over outRow */ +int smallCount = smallSheet->fieldCount; +for (fr = smallSheet->rowList; fr != NULL; fr = fr->next) + { + int i; + char **row = fr->row; + for (i=0; ifieldCount, outTable->rowCount); + } + +if (gFull) + fieldedTableToTabFile(outTable, outSheet); +else + { + /* Reduce back to small fields */ + char *smallRow[smallCount]; + int smallIx; + for (smallIx=0; smallIxfields[translate[smallIx]]; + struct fieldedTable *smallOut = fieldedTableNew(outSheet, smallRow, smallCount); + + /* Copy in our fields only. */ + for (fr = outTable->rowList; fr != NULL; fr = fr->next) + { + int i; + char **row = fr->row; + for (i=0; irowCount); + } + fieldedTableToTabFile(smallOut, outSheet); + fieldedTableFree(&smallOut); + } + +fieldedTableFree(&smallSheet); +fieldedTableFree(&template); +fieldedTableFree(&outTable); +} + +void hcaAffixTsvToTemplate(char *input, char *template, char *output) +/* hcaAffixTsvToTemplate - Given a template of an HCA spreadsheet with the machine readable field + * names in the 4th row, and a sheet wit some of the fields in with the labels in the top row, + * make a new tsv that looks like the template with selected columns filled in.. */ +{ +if (gDir) + { + struct slName *inList = listDir(input, "*"); + + /* Make a pass just to verify all template files are there to fail fast. */ + struct slName *in; + for (in = inList; in != NULL; in = in->next) + { + char templatePath[PATH_LEN]; + safef(templatePath, sizeof(templatePath), "%s/%s", template, in->name); + if (!fileExists(templatePath)) + errAbort("%s/%s exists but %s/%s does not", input, in->name, template, in->name); + } + + /* Make output directory if need be */ + makeDirsOnPath(output); + + /* A second pass to convert */ + for (in = inList; in != NULL; in = in->next) + { + char inPath[PATH_LEN], outPath[PATH_LEN], templatePath[PATH_LEN]; + safef(inPath, sizeof(inPath), "%s/%s", input, in->name); + safef(outPath, sizeof(outPath), "%s/%s", output, in->name); + safef(templatePath, sizeof(templatePath), "%s/%s", template, in->name); + affixToTemplate(inPath, templatePath, outPath); + } + } +else /* Simple non-recursive directory case. */ + { + affixToTemplate(input, template, output); + } +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 4) + usage(); +gFull = optionExists("full"); +gDir = optionExists("dir"); +gAppend = optionExists("append"); +hcaAffixTsvToTemplate(argv[1], argv[2], argv[3]); +return 0; +}