41f5cd680816b869e17b611553b498fb4c2d859d kent Fri Aug 2 14:42:54 2019 -0700 Making the index for a table be expressed in that tables fields rather than the big input tables fields. diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c index 689fa07..464aabb 100644 --- src/tabFile/tabToTabDir/tabToTabDir.c +++ src/tabFile/tabToTabDir/tabToTabDir.c @@ -56,30 +56,40 @@ /* A type */ { vtVar, vtLink, vtConst, }; struct fieldVal /* An expression that can define what fits in a field */ { struct fieldVal *next; /* Might want to hang these on a list. */ char *name; enum valType type; /* Constant, link, or variable */ int oldIx; /* For variable and link ones where field is in old table */ char *val; /* For constant ones the string value */ }; +struct fieldVal *fieldValFind(struct fieldVal *list, char *name) +/* Find named element in list, or NULL if not found. */ +{ +struct fieldVal *el; +for (el = list; el != NULL; el = el->next) + if (sameString(name, el->name)) + return el; +return NULL; +} + struct fieldVal *parseFieldVal(char *name, char *input) /* return a fieldVal based on the contents of input, which are not destroyed */ { /* Make up return structure. */ struct fieldVal *fv; AllocVar(fv); fv->name = cloneString(name); char *s = skipLeadingSpaces(input); char c = s[0]; if (c == 0) { fv->type = vtVar; fv->val = cloneString(name); @@ -162,63 +172,66 @@ inTabFile); struct lineFile *lf = lineFileOpen(specFile, TRUE); makeDirsOnPath(outDir); struct slPair *specStanza = NULL; while ((specStanza = raNextStanzAsPairs(lf)) != NULL) { /* Parse out table name and key field name. */ verbose(2, "Processing spec stanza of %d lines\n", slCount(specStanza)); struct slPair *table = specStanza; char *tableName = table->name; char *keyFieldName = trimSpaces(table->val); if (isEmpty(keyFieldName)) errAbort("No key field for table %s.", tableName); - /* Make sure that key field is actually in field list */ - struct slPair *fieldList = table->next; - int keyFieldIx = fieldedTableMustFindFieldIx(inTable, keyFieldName); - if (keyFieldIx < 0) - errAbort("key field %s is not found in field list for %s\n", tableName, keyFieldName); - + /* Have dealt with first line of stanza, which is about table, rest of lines are fields */ + struct slPair *fieldList = specStanza->next; + int fieldCount = slCount(fieldList); /* Create empty output table and track which fields of input go to output. */ - int fieldCount = slCount(fieldList); char *fieldNames[fieldCount]; int i; struct slPair *field; struct fieldVal *fvList = NULL; for (i=0, field=fieldList; i<fieldCount; ++i, field=field->next) { char *newName = field->name; struct fieldVal *fv = parseFieldVal(newName, field->val); if (fv->type == vtVar) fv->oldIx = fieldedTableMustFindFieldIx(inTable, fv->val); else if (fv->type == vtLink) errAbort("Can't handle links yet for %s", fv->val); fieldNames[i] = newName; slAddHead(&fvList, fv); } slReverse(&fvList); struct fieldedTable *outTable = fieldedTableNew(tableName, fieldNames, fieldCount); outTable->startsSharp = inTable->startsSharp; + /* Make sure that key field is actually in field list */ + struct fieldVal *keyField = fieldValFind(fvList, keyFieldName); + if (keyField == NULL) + errAbort("key field %s is not found in field list for %s\n", tableName, keyFieldName); + int keyFieldIx = keyField->oldIx; + /* Populate table */ selectUniqueIntoTable(inTable, fvList, keyFieldIx, outTable); /* Create output file name and save file. */ char outTabName[FILENAME_LEN]; safef(outTabName, sizeof(outTabName), "%s/%s.tsv", outDir, tableName); - verbose(1, "Writing %s of %d fields %d rows\n", outTabName, outTable->fieldCount, outTable->rowCount); + verbose(1, "Writing %s of %d fields %d rows\n", + outTabName, outTable->fieldCount, outTable->rowCount); fieldedTableToTabFile(outTable, outTabName); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); tabToTabDir(argv[1], argv[2], argv[3]); return 0; }