c4c03a112fa516468b6ce875108c5b1749e11104
kent
  Fri Sep 6 14:08:20 2019 -0700
Making it so that a newField can have a ? before it, in which case if the corresponding oldField is missing it is not an error, it simply is skipped.

diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c
index 6820d01..e3fc031 100644
--- src/tabFile/tabToTabDir/tabToTabDir.c
+++ src/tabFile/tabToTabDir/tabToTabDir.c
@@ -30,30 +30,35 @@
 "usage:\n"
 "   in.tsv is a tab-separated input file.  The first line is the label names and may start with #\n"
 "   spec.txt is a file that says what columns to put into the output, described in more detail below\n"
 "   outDir is a directory that will be populated with tab-separated files\n"
 "The spec.txt file contains one blank line separated stanza per output table.\n"
 "Each stanza should look like:\n"
 "        table tableName    key-column\n"
 "        columnName1	sourceField1\n"
 "        columnName2	sourceField2\n"
 "              ...\n"
 "if the sourceField is missing it is assumed to be a column of the same name in in.tsv\n"
 "The sourceField can either be a column name in the in.tsv, or a string enclosed literal\n"
 "or an @ followed by a table name, in which case it refers to the key of that table.\n"
 "If the source column is in comma-separated-values format then the sourceField can include a\n"
 "constant array index to pick out an item from the csv list.\n"
+"\n"
+"If there is a '?' in front of the column name it is taken to mean an optional field.\n"
+"if the corresponding source field does not exist then there's no error (and no output)\n"
+"for that column\n"
+"\n"
 "You can also use strex expressions for more complicated situations.\n"
 "            See src/lib/strex.doc\n"
 "In addition to the table stanza there can be a 'define' stanza that defines variables\n"
 "that can be used in sourceFields for tables.  This looks like:\n"
 "         define\n"
 "         variable1 sourceField1\n"
 "         variable2 sourceField2\n"
 );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"id", OPTION_STRING},
    {"startId", OPTION_INT},
    {NULL, 0},
@@ -76,30 +81,31 @@
     fvVar, fvLink, fvExp,
     };
 
 struct newFieldInfo
 /* An expression that can define what fits in a field */
     {
     struct newFieldInfo *next;	/* Might want to hang these on a list. */
     char *name;			/* Name of field in new table */
     enum fieldValType type;	/* Constant, link, or variable */
     int oldIx;			/* For variable and link ones where field is in old table */
     int newIx;			/* Where field is in new table. */
     char *val;			/* For constant ones the string value */
     int arrayIx;		/* If it's an array then the value */
     struct newFieldInfo *link;	/* If it's fvLink then pointer to the linked field */
     struct strexParse *exp;	/* A parsed out string expression */
+    boolean optional;		/* If true, then skip rather than stop if old field doesn't exist */
     };
 
 struct newFieldInfo *findField(struct newFieldInfo *list, char *name)
 /* Find named element in list, or NULL if not found. */
 {
 struct newFieldInfo *el;
 for (el = list; el != NULL; el = el->next)
     if (sameString(name, el->name))
         return el;
 return NULL;
 }
 
 struct newTableInfo
 /* Info on a new table we are making */
     {
@@ -181,66 +187,77 @@
 	return FALSE;
     }
 return TRUE;
 }
 
 int gTotalFields = 0, gStrexFields = 0, gLinkFields = 0;
 
 struct newFieldInfo *parseFieldVal(char *name, 
     char *input, char *fileName, int fileLineNumber, struct symRec  *symbols, StrexLookup lookup)
 /* return a newFieldInfo based on the contents of input, which are not destroyed */
 {
 /* Make up return structure. */
 
 struct newFieldInfo *fv;
 AllocVar(fv);
+char c = name[0];
+if (c == '?')
+    {
+    fv->optional = TRUE;
+    name += 1;
+    }
+else if (!isalpha(c) && (c != '_'))
+    {
+    errAbort("Strange character %c starting line %d of %s", c, fileLineNumber, fileName);
+    }
 fv->name = cloneString(name);
 
 char *s = trimSpaces(input);
 if (isEmpty(s))
     {
     fv->type = fvVar;
     s = fv->val = cloneString(name);
     }
-char c = s[0];
+c = s[0];
 if (c == '@')
     {
     char *val = fv->val = cloneString(skipLeadingSpaces(s+1));
     if (isEmpty(val))
 	errAbort("Nothing following %c", c);
     fv->type = fvLink;
     ++gLinkFields;
     }
 else 
     {
     if (isTotallySimple(s) && hashLookup(symbols->varHash, s) == NULL)
 	{
 	fv->val = cloneString(skipLeadingSpaces(s));
 	eraseTrailingSpaces(fv->val);
 	fv->type = fvVar;
 	}
     else
 	{
 	fv->val = cloneString(s);
 	fv->exp = strexParseString(fv->val, fileName, fileLineNumber-1, symbols, lookup);
 	fv->type = fvExp;
 	gStrexFields += 1;
 	}
     }
 gTotalFields += 1;
 return fv;
 }
+
 static void symRecSetupPrecomputes(struct symRec *symbols)
 /* Clear out any precomputed variable values - should be
  * executed on each new line of table. */
 {
 /* Clear up any old precomputes - sort of sad these can't currently
  * be shared between output tables. Probably not enough of a time
  * bottleneck to be worth fixing though. */
 struct varVal *v;
 for (v = symbols->varList; v != NULL; v = v->next)
     {
     freez(&v->val);
     }
 }
 
 static void warnHandler(void *record, char *message)
@@ -455,33 +472,37 @@
     struct newFieldInfo *fvList = NULL;
     char *fieldName, *fieldSpec;
     int fieldCount = 0;
     while (raNextTagVal(lf, &fieldName, &fieldSpec, NULL))
         {
 	verbose(2, "  fieldName %s fieldSpec (%s)\n", fieldName, fieldSpec);
 	struct newFieldInfo *fv = parseFieldVal(fieldName, 
 	    fieldSpec, lf->fileName, lf->lineIx, symbols, symExists);
 	if (fv->type == fvVar)
 	    {
 	    char *oldName = fieldSpec;
 	    if (isEmpty(oldName))
 	       oldName = fieldName;
 	    int oldIx = stringArrayIx(oldName, inTable->fields, inTable->fieldCount);
 	    if (oldIx < 0)
+	       {
+	       if (fv->optional)
+	           continue;	    // Just skip optional ones we don't have
 	       errAbort("%s doesn't exist in the %d fields of %s line %d of %s", 
 		oldName, inTable->fieldCount, inTable->name,
 		    lf->lineIx, lf->fileName);
+	       }
 	    fv->oldIx = oldIx;
 	    }
 	fv->newIx = fieldCount++;
 	slAddHead(&fvList, fv);
 	}
     slReverse(&fvList);
 
     /* Create array of field names for output. */
     char *fieldNames[fieldCount];
     int i;
     struct newFieldInfo *fv = NULL;
     for (i=0, fv=fvList; i<fieldCount; ++i, fv=fv->next)
 	fieldNames[i] = fv->name;
 
     /* Create empty output table and track which fields of input go to output. */