9dcd0edee0049d4cf6aca9e5cce5c4a521cce1d2 kent Wed Aug 14 19:09:10 2019 -0700 Passing symbol table to parse for better error checking. diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c index 89ba41f..3299bba 100644 --- src/tabFile/tabToTabDir/tabToTabDir.c +++ src/tabFile/tabToTabDir/tabToTabDir.c @@ -107,31 +107,32 @@ boolean isTotallySimple(char *s) /* We are only alphanumerical and dotty things, we even begin with a alnum or _*/ { char c = *s++; if (!isalpha(c) && (c != '_')) return FALSE; while ((c = *s++) != 0) { if (!(isalnum(c) || (c == '_') || (c == '.'))) return FALSE; } return TRUE; } -struct newFieldInfo *parseFieldVal(char *name, char *input, char *fileName, int fileLineNumber) +struct newFieldInfo *parseFieldVal(char *name, struct hash *inFieldHash, + char *input, char *fileName, int fileLineNumber, void *symbols, StrexLookup lookup) /* return a newFieldInfo based on the contents of input, which are not destroyed */ { /* Make up return structure. */ struct newFieldInfo *fv; AllocVar(fv); fv->name = cloneString(name); char *s = skipLeadingSpaces(input); if (isEmpty(s)) { fv->type = fvVar; fv->val = cloneString(name); } else @@ -144,31 +145,31 @@ if (isEmpty(val)) errAbort("Nothing following %c", c); fv->type = fvLink; } else { if (isTotallySimple(s)) { fv->val = cloneString(skipLeadingSpaces(s)); eraseTrailingSpaces(fv->val); fv->type = fvVar; } else { fv->val = cloneString(s); - fv->exp = strexParseString(fv->val, fileName, fileLineNumber-1); + fv->exp = strexParseString(fv->val, fileName, fileLineNumber-1, symbols, lookup); fv->type = fvExp; } } } return fv; } struct symRec /* Something we pass as a record to symLookup */ { struct hash *hash; /* The hash with symbol to row index */ char **row; /* The row we are working on */ }; static char *symLookup(void *record, char *key) @@ -254,66 +255,72 @@ struct hash *hashFieldIx(char **fields, int fieldCount) /* Create a hash filled with fields with integer valued indexes */ { int i; struct hash *hash = hashNew(0); for (i=0; i<fieldCount; ++i) hashAdd(hash, fields[i], intToPt(i)); return hash; } void tabToTabDir(char *inTabFile, char *specFile, char *outDir) /* tabToTabDir - Convert a large tab-separated table to a directory full of such tables * according to a specification.. */ { +/* Read input table */ struct fieldedTable *inTable = fieldedTableFromTabFile(inTabFile, inTabFile, NULL, 0); verbose(1, "Read %d columns, %d rows from %s\n", inTable->fieldCount, inTable->rowCount, inTabFile); -struct lineFile *lf = lineFileOpen(specFile, TRUE); + +/* Compute info on the fields */ +struct hash *inFieldHash = hashFieldIx(inTable->fields, inTable->fieldCount); +struct symRec symbols = {inFieldHash, inTable->fields}; // Sym lookup just returns symbol name during parsing /* Read in spec file as ra file stanzas that we convert into tableInfos. */ +struct lineFile *lf = lineFileOpen(specFile, TRUE); struct newTableInfo *newTableList = NULL, *newTable; while (raSkipLeadingEmptyLines(lf, NULL)) { /* Read first tag, which we know is there because it's right after raSkipLeadingEmptyLines. * Make sure the tag is table, and that there is a following table name and key field name. */ char *tableString, *tableSpec; raNextTagVal(lf, &tableString, &tableSpec, NULL); verbose(2, "Processing table %s '%s' line %d of %s\n", tableString, tableSpec, lf->lineIx, lf->fileName); if (!sameString(tableString, "table")) errAbort("stanza that doesn't start with 'table' ending line %d of %s", lf->lineIx, lf->fileName); char *tableName = nextWord(&tableSpec); char *keyFieldName = cloneString(nextWord(&tableSpec)); if (isEmpty(keyFieldName)) errAbort("No key field for table %s line %d of %s", tableName, lf->lineIx, lf->fileName); /* Start filling out newTable with these fields */ AllocVar(newTable); newTable->name = cloneString(tableName); tableName = newTable->name; /* Keep this handy variable. */ /* Make up field list out of rest of the stanza */ struct newFieldInfo *fvList = NULL; char *fieldName, *fieldSpec; int fieldCount = 0; while (raNextTagVal(lf, &fieldName, &fieldSpec, NULL)) { verbose(2, " fieldName %s fieldSpec ((%s))\n", fieldName, fieldSpec); - struct newFieldInfo *fv = parseFieldVal(fieldName, fieldSpec, lf->fileName, lf->lineIx); + struct newFieldInfo *fv = parseFieldVal(fieldName, inFieldHash, + fieldSpec, lf->fileName, lf->lineIx, &symbols, symLookup); if (fv->type == fvVar) { char *oldName = fieldSpec; if (isEmpty(oldName)) oldName = fieldName; int oldIx = stringArrayIx(oldName, inTable->fields, inTable->fieldCount); if (oldIx < 0) errAbort("%s doesn't exist in the %d fields of %s line %d of %s", oldName, inTable->fieldCount, inTable->name, lf->lineIx, lf->fileName); fv->oldIx = oldIx; } fv->newIx = fieldCount++; slAddHead(&fvList, fv); } @@ -351,44 +358,43 @@ for (newTable = newTableList; newTable != NULL; newTable = newTable->next) { struct newFieldInfo *field; for (field = newTable->fieldList; field != NULL; field = field->next) { if (field->type == fvLink) { struct newTableInfo *linkedTable = findTable(newTableList, field->val); if (linkedTable == NULL) errAbort("@%s doesn't exist", field->name); field->link = linkedTable->keyField; } } } -struct hash *inFieldHash = hashFieldIx(inTable->fields, inTable->fieldCount); makeDirsOnPath(outDir); /* Output tables */ for (newTable = newTableList; newTable != NULL; newTable = newTable->next) { /* Populate table */ struct fieldedTable *outTable = newTable->table; selectUniqueIntoTable(inTable, inFieldHash, specFile, newTable->fieldList, newTable->keyField->newIx, outTable); /* Create output file name and save file. */ char outTabName[FILENAME_LEN]; safef(outTabName, sizeof(outTabName), "%s/%s.tsv", outDir, newTable->name); - verbose(1, "Writing %s of %d fields %d rows\n", + verbose(1, "Writing %s of %d columns %d rows\n", outTabName, outTable->fieldCount, outTable->rowCount); fieldedTableToTabFile(outTable, outTabName); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); tabToTabDir(argv[1], argv[2], argv[3]); return 0; }