dd06af8866cf7caca22463089b2c78a6cfa60dee kent Sat Aug 31 18:26:33 2019 -0700 Adding -id and -startId options to fieldedTable to get steadily incrementing numerical id's for django. diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c index 597fc73..787a338 100644 --- src/tabFile/tabToTabDir/tabToTabDir.c +++ src/tabFile/tabToTabDir/tabToTabDir.c @@ -1,64 +1,73 @@ /* tabToTabDir - Convert a large tab-separated table to a directory full of such tables according * to a specification.. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "obscure.h" #include "sqlNum.h" #include "portable.h" #include "ra.h" #include "csv.h" #include "fieldedTable.h" #include "strex.h" +char *clId = NULL; // Flag set from command line to add an id column +int clStartId = 1; // What number id column should start with + void usage() /* Explain usage and exit. */ { errAbort( "tabToTabDir - Convert a large tab-separated table to a directory full of such tables according\n" "to a specification.\n" -"usage:\n" +"command line:\n" " tabToTabDir in.tsv spec.txt outDir\n" -"where:\n" +"options:\n" +" -id=fieldName - Add a numeric id field of given name that starts at 1 and autoincrements \n" +" for each table\n" +" -startId=fieldName - sets starting ID to be something other than 1\n" +"usage:\n" " in.tsv is a tab-separated input file. The first line is the label names and may start with #\n" " spec.txt is a file that says what columns to put into the output, described in more detail below\n" " outDir is a directory that will be populated with tab-separated files\n" "The spec.txt file contains one blank line separated stanza per output table.\n" "Each stanza should look like:\n" " table tableName key-column\n" " columnName1 sourceField1\n" " columnName2 sourceField2\n" " ...\n" "if the sourceField is missing it is assumed to be a column of the same name in in.tsv\n" "The sourceField can either be a column name in the in.tsv, or a string enclosed literal\n" "or an @ followed by a table name, in which case it refers to the key of that table.\n" "If the source column is in comma-separated-values format then the sourceField can include a\n" "constant array index to pick out an item from the csv list.\n" "You can also use strex expressions for more complicated situations.\n" " See src/lib/strex.doc\n" "In addition to the table stanza there can be a 'define' stanza that defines variables\n" "that can be used in sourceFields for tables. This looks like:\n" " define\n" " variable1 sourceField1\n" " variable2 sourceField2\n" ); } /* Command line validation table. */ static struct optionSpec options[] = { + {"id", OPTION_STRING}, + {"startId", OPTION_INT}, {NULL, 0}, }; static int firstDifferentIx(char **aa, char **bb, int count) /* Return true if first count of strings between aa and bb are the same */ { int i; for (i=0; i<count; ++i) if (!sameString(aa[i], bb[i])) return i; return -1; } enum fieldValType @@ -517,31 +526,33 @@ /* Output tables */ verbose(1, "Outputting %d tables to %s\n", slCount(newTableList), outDir); for (newTable = newTableList; newTable != NULL; newTable = newTable->next) { /* Populate table */ struct fieldedTable *outTable = newTable->table; selectUniqueIntoTable(inTable, symbols, specFile, newTable->fieldList, newTable->keyField->newIx, outTable); /* Create output file name and save file. */ char outTabName[FILENAME_LEN]; safef(outTabName, sizeof(outTabName), "%s/%s.tsv", outDir, newTable->name); verbose(1, "Writing %s of %d columns %d rows\n", outTabName, outTable->fieldCount, outTable->rowCount); - fieldedTableToTabFile(outTable, outTabName); + fieldedTableToTabFileWithId(outTable, outTabName, clId, clStartId); } verbose(1, "%d fields, %d (%g%%) evaluated with strex, %d (%.2f) links\n", gTotalFields, gStrexFields, 100.0 * gStrexFields / gTotalFields, gLinkFields, 100.0 * gLinkFields/gTotalFields); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); +clId = optionVal("id", clId); +clStartId = optionInt("startId", clStartId); if (argc != 4) usage(); tabToTabDir(argv[1], argv[2], argv[3]); return 0; }