6322c0c620672f1c1a4529110fdc8fabef4ce7fe kent Sun Aug 25 20:01:50 2019 -0700 Keeping track of strex calls and fixing problem where it was called when not needed. diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c index fef3e28..597fc73 100644 --- src/tabFile/tabToTabDir/tabToTabDir.c +++ src/tabFile/tabToTabDir/tabToTabDir.c @@ -99,139 +99,139 @@ struct newFieldInfo *keyField; /* Key field within table */ struct newFieldInfo *fieldList; /* List of fields */ struct fieldedTable *table; /* Table to fill in. */ }; struct newTableInfo *findTable(struct newTableInfo *list, char *name) /* Find named element in list, or NULL if not found. */ { struct newTableInfo *el; for (el = list; el != NULL; el = el->next) if (sameString(name, el->name)) return el; return NULL; } +struct varVal +/* A variable, what we need to compute it, and it's value */ + { + struct varVal *next; /* Next in list */ + char *name; /* Variable name */ + struct strexParse *exp; /* Parsed out expression. */ + char *val; /* Computed value - not owned by us. */ + }; + +struct varVal *varValNew(char *name, struct strexParse *exp) +/* Allocate new varVal structure */ +{ +struct varVal *v; +AllocVar(v); +v->name = cloneString(name); +v->exp = exp; +return v; +} + + +struct symRec +/* Something we pass as a record to symLookup */ + { + struct hash *rowHash; /* The hash with symbol to row index */ + char **tableRow; /* The input row we are working on. You own.*/ + struct hash *varHash; /* Variables with varVal values */ + struct varVal *varList; /* List of all variables, same info as in hash above. */ + struct lm *lm; /* Local memory to use during eval phase */ + char *fileName; /* File name of big input tab file */ + int lineIx; /* Line number of big input tab file */ + }; + +struct symRec *symRecNew(struct hash *rowHash, struct hash *varHash, char *fileName, int lineIx) +/* Return a new symRec. The rowHash is required and contains a hash with + * values that are indexes into the table row. The varHash is optional, + * and if present should have variable names keying parseExp values. */ +{ +struct symRec *rec; +AllocVar(rec); +rec->rowHash = rowHash; +if (varHash != NULL) + { + rec->varHash = varHash; + rec->fileName = fileName; + rec->lineIx = lineIx; + } +return rec; +} + boolean isTotallySimple(char *s) /* We are only alphanumerical and dotty things, we even begin with a alnum or _*/ { char c = *s++; if (!isalpha(c) && (c != '_')) return FALSE; while ((c = *s++) != 0) { if (!(isalnum(c) || (c == '_') || (c == '.'))) return FALSE; } return TRUE; } +int gTotalFields = 0, gStrexFields = 0, gLinkFields = 0; + struct newFieldInfo *parseFieldVal(char *name, - char *input, char *fileName, int fileLineNumber, void *symbols, StrexLookup lookup) + char *input, char *fileName, int fileLineNumber, struct symRec *symbols, StrexLookup lookup) /* return a newFieldInfo based on the contents of input, which are not destroyed */ { /* Make up return structure. */ struct newFieldInfo *fv; AllocVar(fv); fv->name = cloneString(name); -char *s = skipLeadingSpaces(input); +char *s = trimSpaces(input); if (isEmpty(s)) { fv->type = fvVar; - fv->val = cloneString(name); + s = fv->val = cloneString(name); } -else - { char c = s[0]; if (c == '@') { char *val = fv->val = cloneString(skipLeadingSpaces(s+1)); - trimSpaces(val); if (isEmpty(val)) errAbort("Nothing following %c", c); fv->type = fvLink; + ++gLinkFields; } else { - if (isTotallySimple(s) && lookup(symbols, s) == NULL) + if (isTotallySimple(s) && hashLookup(symbols->varHash, s) == NULL) { fv->val = cloneString(skipLeadingSpaces(s)); eraseTrailingSpaces(fv->val); fv->type = fvVar; } else { fv->val = cloneString(s); fv->exp = strexParseString(fv->val, fileName, fileLineNumber-1, symbols, lookup); fv->type = fvExp; + gStrexFields += 1; } } - } +gTotalFields += 1; return fv; } - -struct varVal -/* A variable, what we need to compute it, and it's value */ - { - struct varVal *next; /* Next in list */ - char *name; /* Variable name */ - struct strexParse *exp; /* Parsed out expression. */ - char *val; /* Computed value - not owned by us. */ - }; - -struct varVal *varValNew(char *name, struct strexParse *exp) -/* Allocate new varVal structure */ -{ -struct varVal *v; -AllocVar(v); -v->name = cloneString(name); -v->exp = exp; -return v; -} - - -struct symRec -/* Something we pass as a record to symLookup */ - { - struct hash *rowHash; /* The hash with symbol to row index */ - char **tableRow; /* The input row we are working on. You own.*/ - struct hash *varHash; /* Variables with varVal values */ - struct varVal *varList; /* List of all variables, same info as in hash above. */ - struct lm *lm; /* Local memory to use during eval phase */ - char *fileName; /* File name of big input tab file */ - int lineIx; /* Line number of big input tab file */ - }; - -struct symRec *symRecNew(struct hash *rowHash, struct hash *varHash, char *fileName, int lineIx) -/* Return a new symRec. The rowHash is required and contains a hash with - * values that are indexes into the table row. The varHash is optional, - * and if present should have variable names keying parseExp values. */ -{ -struct symRec *rec; -AllocVar(rec); -rec->rowHash = rowHash; -if (varHash != NULL) - { - rec->varHash = varHash; - rec->fileName = fileName; - rec->lineIx = lineIx; - } -return rec; -} - static void symRecSetupPrecomputes(struct symRec *symbols) /* Clear out any precomputed variable values - should be * executed on each new line of table. */ { /* Clear up any old precomputes - sort of sad these can't currently * be shared between output tables. Probably not enough of a time * bottleneck to be worth fixing though. */ struct varVal *v; for (v = symbols->varList; v != NULL; v = v->next) { freez(&v->val); } } static void warnHandler(void *record, char *message) @@ -519,26 +519,29 @@ verbose(1, "Outputting %d tables to %s\n", slCount(newTableList), outDir); for (newTable = newTableList; newTable != NULL; newTable = newTable->next) { /* Populate table */ struct fieldedTable *outTable = newTable->table; selectUniqueIntoTable(inTable, symbols, specFile, newTable->fieldList, newTable->keyField->newIx, outTable); /* Create output file name and save file. */ char outTabName[FILENAME_LEN]; safef(outTabName, sizeof(outTabName), "%s/%s.tsv", outDir, newTable->name); verbose(1, "Writing %s of %d columns %d rows\n", outTabName, outTable->fieldCount, outTable->rowCount); fieldedTableToTabFile(outTable, outTabName); } +verbose(1, "%d fields, %d (%g%%) evaluated with strex, %d (%.2f) links\n", + gTotalFields, gStrexFields, 100.0 * gStrexFields / gTotalFields, + gLinkFields, 100.0 * gLinkFields/gTotalFields); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); tabToTabDir(argv[1], argv[2], argv[3]); return 0; }