c4c03a112fa516468b6ce875108c5b1749e11104 kent Fri Sep 6 14:08:20 2019 -0700 Making it so that a newField can have a ? before it, in which case if the corresponding oldField is missing it is not an error, it simply is skipped. diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c index 6820d01..e3fc031 100644 --- src/tabFile/tabToTabDir/tabToTabDir.c +++ src/tabFile/tabToTabDir/tabToTabDir.c @@ -30,30 +30,35 @@ "usage:\n" " in.tsv is a tab-separated input file. The first line is the label names and may start with #\n" " spec.txt is a file that says what columns to put into the output, described in more detail below\n" " outDir is a directory that will be populated with tab-separated files\n" "The spec.txt file contains one blank line separated stanza per output table.\n" "Each stanza should look like:\n" " table tableName key-column\n" " columnName1 sourceField1\n" " columnName2 sourceField2\n" " ...\n" "if the sourceField is missing it is assumed to be a column of the same name in in.tsv\n" "The sourceField can either be a column name in the in.tsv, or a string enclosed literal\n" "or an @ followed by a table name, in which case it refers to the key of that table.\n" "If the source column is in comma-separated-values format then the sourceField can include a\n" "constant array index to pick out an item from the csv list.\n" +"\n" +"If there is a '?' in front of the column name it is taken to mean an optional field.\n" +"if the corresponding source field does not exist then there's no error (and no output)\n" +"for that column\n" +"\n" "You can also use strex expressions for more complicated situations.\n" " See src/lib/strex.doc\n" "In addition to the table stanza there can be a 'define' stanza that defines variables\n" "that can be used in sourceFields for tables. This looks like:\n" " define\n" " variable1 sourceField1\n" " variable2 sourceField2\n" ); } /* Command line validation table. */ static struct optionSpec options[] = { {"id", OPTION_STRING}, {"startId", OPTION_INT}, {NULL, 0}, @@ -76,30 +81,31 @@ fvVar, fvLink, fvExp, }; struct newFieldInfo /* An expression that can define what fits in a field */ { struct newFieldInfo *next; /* Might want to hang these on a list. */ char *name; /* Name of field in new table */ enum fieldValType type; /* Constant, link, or variable */ int oldIx; /* For variable and link ones where field is in old table */ int newIx; /* Where field is in new table. */ char *val; /* For constant ones the string value */ int arrayIx; /* If it's an array then the value */ struct newFieldInfo *link; /* If it's fvLink then pointer to the linked field */ struct strexParse *exp; /* A parsed out string expression */ + boolean optional; /* If true, then skip rather than stop if old field doesn't exist */ }; struct newFieldInfo *findField(struct newFieldInfo *list, char *name) /* Find named element in list, or NULL if not found. */ { struct newFieldInfo *el; for (el = list; el != NULL; el = el->next) if (sameString(name, el->name)) return el; return NULL; } struct newTableInfo /* Info on a new table we are making */ { @@ -181,66 +187,77 @@ return FALSE; } return TRUE; } int gTotalFields = 0, gStrexFields = 0, gLinkFields = 0; struct newFieldInfo *parseFieldVal(char *name, char *input, char *fileName, int fileLineNumber, struct symRec *symbols, StrexLookup lookup) /* return a newFieldInfo based on the contents of input, which are not destroyed */ { /* Make up return structure. */ struct newFieldInfo *fv; AllocVar(fv); +char c = name[0]; +if (c == '?') + { + fv->optional = TRUE; + name += 1; + } +else if (!isalpha(c) && (c != '_')) + { + errAbort("Strange character %c starting line %d of %s", c, fileLineNumber, fileName); + } fv->name = cloneString(name); char *s = trimSpaces(input); if (isEmpty(s)) { fv->type = fvVar; s = fv->val = cloneString(name); } -char c = s[0]; +c = s[0]; if (c == '@') { char *val = fv->val = cloneString(skipLeadingSpaces(s+1)); if (isEmpty(val)) errAbort("Nothing following %c", c); fv->type = fvLink; ++gLinkFields; } else { if (isTotallySimple(s) && hashLookup(symbols->varHash, s) == NULL) { fv->val = cloneString(skipLeadingSpaces(s)); eraseTrailingSpaces(fv->val); fv->type = fvVar; } else { fv->val = cloneString(s); fv->exp = strexParseString(fv->val, fileName, fileLineNumber-1, symbols, lookup); fv->type = fvExp; gStrexFields += 1; } } gTotalFields += 1; return fv; } + static void symRecSetupPrecomputes(struct symRec *symbols) /* Clear out any precomputed variable values - should be * executed on each new line of table. */ { /* Clear up any old precomputes - sort of sad these can't currently * be shared between output tables. Probably not enough of a time * bottleneck to be worth fixing though. */ struct varVal *v; for (v = symbols->varList; v != NULL; v = v->next) { freez(&v->val); } } static void warnHandler(void *record, char *message) @@ -455,33 +472,37 @@ struct newFieldInfo *fvList = NULL; char *fieldName, *fieldSpec; int fieldCount = 0; while (raNextTagVal(lf, &fieldName, &fieldSpec, NULL)) { verbose(2, " fieldName %s fieldSpec (%s)\n", fieldName, fieldSpec); struct newFieldInfo *fv = parseFieldVal(fieldName, fieldSpec, lf->fileName, lf->lineIx, symbols, symExists); if (fv->type == fvVar) { char *oldName = fieldSpec; if (isEmpty(oldName)) oldName = fieldName; int oldIx = stringArrayIx(oldName, inTable->fields, inTable->fieldCount); if (oldIx < 0) + { + if (fv->optional) + continue; // Just skip optional ones we don't have errAbort("%s doesn't exist in the %d fields of %s line %d of %s", oldName, inTable->fieldCount, inTable->name, lf->lineIx, lf->fileName); + } fv->oldIx = oldIx; } fv->newIx = fieldCount++; slAddHead(&fvList, fv); } slReverse(&fvList); /* Create array of field names for output. */ char *fieldNames[fieldCount]; int i; struct newFieldInfo *fv = NULL; for (i=0, fv=fvList; i<fieldCount; ++i, fv=fv->next) fieldNames[i] = fv->name; /* Create empty output table and track which fields of input go to output. */