a3c905cd9655de851ad5288797aacade677b1a57
kent
  Mon Aug 12 21:42:35 2019 -0700
Updating non-unique key error message to be much more informative. Making it so that if index value is empty the row is skipped for the table being indexed. Fixing off-by-one bug in line number for error reporting. Updated usage statement a little.

diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c
index d9ff0ce..89ba41f 100644
--- src/tabFile/tabToTabDir/tabToTabDir.c
+++ src/tabFile/tabToTabDir/tabToTabDir.c
@@ -14,31 +14,31 @@
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
 "tabToTabDir - Convert a large tab-separated table to a directory full of such tables according\n"
 "to a specification.\n"
 "usage:\n"
 "   tabToTabDir in.tsv spec.txt outDir\n"
 "where:\n"
 "   in.tsv is a tab-separated input file.  The first line is the label names and may start with #\n"
 "   spec.txt is a file that says what columns to put into the output, described in more detail below\n"
 "   outDir is a directory that will be populated with tab-separated files\n"
 "The spec.txt file contains one blank line separated stanza per output table.\n"
 "Each stanza should look like:\n"
-"        tableName    key-column\n"
+"        table tableName    key-column\n"
 "        columnName1	sourceField1\n"
 "        columnName2	sourceField2\n"
 "              ...\n"
 "if the sourceField is missing it is assumed to be a column of the same name in in.tsv\n"
 "The sourceField can either be a column name in the in.tsv, or a string enclosed literal\n"
 "or an @ followed by a table name, in which case it refers to the key of that table.\n"
 "If the source column is in comma-separated-values format then the sourceField can include a\n"
 "constant array index to pick out an item from the csv list.\n"
 "You can also use strex expressions for more complicated situations.  See src/lib/strex.doc\n"
 );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {NULL, 0},
@@ -144,31 +144,31 @@
 	if (isEmpty(val))
 	    errAbort("Nothing following %c", c);
 	fv->type = fvLink;
 	}
     else 
         {
 	if (isTotallySimple(s))
 	    {
 	    fv->val = cloneString(skipLeadingSpaces(s));
 	    eraseTrailingSpaces(fv->val);
 	    fv->type = fvVar;
 	    }
 	else
 	    {
 	    fv->val = cloneString(s);
-	    fv->exp = strexParseString(fv->val, fileName, fileLineNumber);
+	    fv->exp = strexParseString(fv->val, fileName, fileLineNumber-1);
 	    fv->type = fvExp;
 	    }
 	}
     }
 return fv;
 }
 
 struct symRec
 /* Something we pass as a record to symLookup */
     {
     struct hash *hash;	    /* The hash with symbol to row index */
     char **row;		    /* The row we are working on */
     };
 
 static char *symLookup(void *record, char *key)
@@ -212,45 +212,49 @@
 	struct newFieldInfo *fv = unlinkedFv;
 	while (fv->type == fvLink)
 	    fv = fv->link;
 	
 	if (fv->type == fvVar)
 	    outRow[i] = inRow[fv->oldIx];
 	else if (fv->type == fvExp)
 	    {
 	    struct symRec symRec = {inFieldHash, inRow};
 	    outRow[i] = strexEvalAsString(fv->exp, &symRec, symLookup);
 	    verbose(2, "evaluated %s to %s\n", fv->val, outRow[i]);
 	    }
 	}
 
     char *key = outRow[keyFieldIx];
+    if (!isEmpty(key))
+	{
 	struct fieldedRow *uniqFr = hashFindVal(uniqHash, key);
 	if (uniqFr == NULL)
 	    {
 	    uniqFr = fieldedTableAdd(outTable, outRow, outFieldCount, 0);
 	    hashAdd(uniqHash, key, uniqFr);
 	    }
 	else    /* Do error checking for true uniqueness of key */
 	    {
 	    int differentIx = firstDifferentIx(outRow, uniqFr->row, outFieldCount);
 	    if (differentIx >= 0)
 		{
 		warn("There is a problem with the key to table %s in %s", outTable->name, specFile);
 		warn("%s %s", uniqFr->row[keyFieldIx], uniqFr->row[differentIx]);
 		warn("%s %s", outRow[keyFieldIx], outRow[differentIx]);
-	    errAbort("both exist, so they key is not unique to all values");
+		errAbort("both exist, so key is not unique for all values of %s", 
+		    outTable->fields[differentIx]);
+		}
 	    }
 	}
     }
 dyStringFree(&csvScratch);
 }
 
 
 
 struct hash *hashFieldIx(char **fields, int fieldCount)
 /* Create a hash filled with fields with integer valued indexes */
 {
 int i;
 struct hash *hash = hashNew(0);
 for (i=0; i<fieldCount; ++i)
    hashAdd(hash, fields[i], intToPt(i));
@@ -318,31 +322,31 @@
     /* Create array of field names for output. */
     char *fieldNames[fieldCount];
     int i;
     struct newFieldInfo *fv = NULL;
     for (i=0, fv=fvList; i<fieldCount; ++i, fv=fv->next)
 	fieldNames[i] = fv->name;
 
     /* Create empty output table and track which fields of input go to output. */
     struct fieldedTable *outTable = fieldedTableNew(tableName, fieldNames, fieldCount);
     outTable->startsSharp = inTable->startsSharp;
 
     /* Make sure that key field is actually in field list */
     struct newFieldInfo *keyField = findField(fvList, keyFieldName);
     if (keyField == NULL)
        errAbort("key field %s is not found in field list for %s in %s\n", 
-	tableName, keyFieldName, lf->fileName);
+	keyFieldName, tableName, lf->fileName);
 
     /* Allocate structure to save results of this pass in and so so. */
     newTable->keyField = keyField;
     newTable->fieldList = fvList;
     newTable->table = outTable;
     slAddHead(&newTableList, newTable);
 
     /* Clean up */
     freez(&keyFieldName);
     }
 slReverse(&newTableList);
 
 /* Do links between tables */
 for (newTable = newTableList; newTable != NULL; newTable = newTable->next)
     {