c1702f4d7e442e45db8d6f40d581f70b975d1340 kent Fri May 26 10:59:08 2017 -0700 Creating a new directory for tab-separated-file utilities and seeding it with tabQuery formerly in the utils directory. diff --git src/utils/tabQuery/tabQuery.c src/utils/tabQuery/tabQuery.c deleted file mode 100644 index 4aed2d0..0000000 --- src/utils/tabQuery/tabQuery.c +++ /dev/null @@ -1,157 +0,0 @@ -/* tabQuery - Run sql-like query on a tab separated file.. */ -#include "common.h" -#include "linefile.h" -#include "hash.h" -#include "localmem.h" -#include "dystring.h" -#include "fieldedTable.h" -#include "rql.h" - -void usage() -/* Explain usage and exit. */ -{ -errAbort( - "tabQuery - Run sql-like query on a tab separated file.\n" - "usage:\n" - " tabQuery rqlStatement\n" - "where rqlStatement is much like a SQL statement, but with no joins and no commands\n" - "other than select allowed. The input file name is taken from the 'from' clause.\n" - "examples\n" - " tabQuery select file,date from manifest.tsv\n" - "This will output the file and date fields from the manifest.tsv file\n" - " tabQuery select file,date,lab from manifest.tsv where lab like 'myLab%%'\n" - "This will output the selected three fields from the file where the lab starts with myLab\n" - " tabQuery select file,data from manifest.tsv where lab='myLab'\n" - "This will output the selected two fields where the lab field is exactly myLab.\n" - " tabQuery select * from manifest.tsv where lab='myLab'\n" - "This will output all fields where the lab field is exactly myLab.\n" - " tabQuery select a*,b* from manifest.tsv where lab='myLab'\n" - "This will output all fields starting with a or b where the lab field is exactly myLab.\n" - " tabQuery select count(*) from manifest.tsv where type='fastq' and size < 1000\n" - "This will count the number of records where type is fastq and size less than 1000\n" - ); -} - -struct fieldedTable *gTable; -struct hash *gFieldHash; - -char *lookup(void *record, char *key) -/* Lookup key in record */ -{ -struct fieldedRow *row = record; -int fieldIx = hashIntValDefault(gFieldHash, key, -1); -if (fieldIx < 0) - errAbort("Field %s isn't found in %s", key, gTable->name); -return row->row[fieldIx]; -} - -void tabQuery(char *query) -/* tabQuery - Run sql-like query on a tab separated file.. */ -{ -/* Parse statement and make sure that it just references one table */ -struct rqlStatement *rql = rqlStatementParseString(query); -int tableCount = slCount(rql->tableList); -if (tableCount != 1) - errAbort("One and only one file allowed in the from clause\n"); - -boolean doCount = FALSE; -if (sameWord(rql->command, "count")) - doCount = TRUE; -else if (sameWord(rql->command, "select")) - doCount = FALSE; -else - errAbort("Unrecognized rql command %s", rql->command); - -/* Read in tab separated value file */ -char *tabFile = rql->tableList->name; -gTable = fieldedTableFromTabFile(tabFile, tabFile, NULL, 0); - -/* Make an integer valued hash of field indexes */ -gFieldHash = hashNew(0); -int i; -for (i=0; ifieldCount; ++i) - hashAddInt(gFieldHash, gTable->fields[i], i); - -/* Make sure all fields in query exist */ -struct slName *field; -for (field = rql->fieldList; field != NULL; field = field->next) - if (!hashLookup(gFieldHash, field->name)) - { - if (!anyWild(field->name)) - errAbort("field %s doesn't exist in %s", field->name, tabFile); - } - -/* Make list of fields as opposed to array */ -struct slName *allFieldList = NULL; -for (i=0; ifieldCount; ++i) - slNameAddHead(&allFieldList, gTable->fields[i]); -slReverse(&allFieldList); - -/* Expand any field names with wildcards. */ -rql->fieldList = wildExpandList(allFieldList, rql->fieldList, TRUE); - - -/* Print out label row. */ -if (!doCount) - { - printf("#"); - char *sep = ""; - for (field = rql->fieldList; field != NULL; field = field->next) - { - printf("%s%s", sep, field->name); - sep = "\t"; - } - printf("\n"); - } - -/* Print out or just count selected fields that match query */ -int matchCount = 0; -struct lm *lm = lmInit(0); -struct fieldedRow *row; -for (row = gTable->rowList; row != NULL; row = row->next) - { - boolean pass = TRUE; - if (rql->whereClause != NULL) - { - struct rqlEval res = rqlEvalOnRecord(rql->whereClause, row, lookup, lm); - res = rqlEvalCoerceToBoolean(res); - pass = res.val.b; - } - if (pass) - { - if (doCount) - ++matchCount; - else - { - char *sep = ""; - for (field = rql->fieldList; field != NULL; field = field->next) - { - int fieldIx = hashIntVal(gFieldHash, field->name); - printf("%s%s", sep, row->row[fieldIx]); - sep = "\t"; - } - printf("\n"); - } - } - } - -if (doCount) - printf("%d\n", matchCount); -} - -int main(int argc, char *argv[]) -/* Process command line. */ -{ -if (argc < 2) - usage(); -struct dyString *query = dyStringNew(0); -int i; -for (i=1; istring); -return 0; -}