src/utils/raSqlQuery/raSqlQuery.c 1.16
1.16 2009/11/22 01:37:41 kent
Implementing arbitrary databases in db= option. Adding db=all option.
Index: src/utils/raSqlQuery/raSqlQuery.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/raSqlQuery/raSqlQuery.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -b -B -U 4 -r1.15 -r1.16
--- src/utils/raSqlQuery/raSqlQuery.c 22 Nov 2009 00:25:26 -0000 1.15
+++ src/utils/raSqlQuery/raSqlQuery.c 22 Nov 2009 01:37:41 -0000 1.16
@@ -22,8 +22,9 @@
static char *clNoInheritField = "noInherit";
static boolean clMerge = FALSE;
static boolean clParent = FALSE;
static boolean clAddFile = FALSE;
+static boolean clAddDb = FALSE;
static char *clRestrict = NULL;
static char *clDb = NULL;
static boolean clOverrideNeeded = FALSE;
@@ -57,11 +58,13 @@
" -noInheritField=field - If field is present don't inherit fields from parent\n"
" -merge - If there are multiple raFiles, records with the same keyField will be\n"
" merged together with fields in later files overriding fields in earlier files\n"
" -addFile - Add 'file' field to say where record is defined\n"
+ " -addDb - Add 'db' field to say where record is defined\n"
" -restrict=keyListFile - restrict output to only ones with keys in file, which\n"
" -db=hg19 - Acts on trackDb files for the given database. Sets up list of files\n"
" appropriately and sets parent, merge, and override all.\n"
+ " Use db=all for all databases\n"
"The output will be to stdout, in the form of a .ra file if the select command is used\n"
"and just a simple number if the count command is used\n"
, clKey, clParentField
);
@@ -76,35 +79,66 @@
{"parent", OPTION_BOOLEAN},
{"parentField", OPTION_STRING},
{"noInheritField", OPTION_STRING},
{"addFile", OPTION_BOOLEAN},
+ {"addDb", OPTION_BOOLEAN},
{"restrict", OPTION_STRING},
{"db", OPTION_STRING},
{"overrideNeeded", OPTION_BOOLEAN},
{NULL, 0},
};
struct dbPath
+/* A database directory and path. */
{
+ struct dbPath *next;
char *db;
char *dir;
};
-struct dbPath dbPath[] = {
- {"hg19", "human/hg19"},
- {"hg18", "human/hg18"},
- {"hg17", "human/hg17"},
- {"mm9", "mouse/mm9"},
- {"mm8", "mouse/mm8"},
-};
+static struct dbPath *getDbPathList(char *rootDir)
+/* Get list of all "database" directories with any trackDb.ra files two under us. */
+{
+char *root = simplifyPathToDir(rootDir);
+struct dbPath *pathList = NULL, *path;
+struct fileInfo *org, *orgList = listDirX(root, "*", TRUE);
+for (org = orgList; org != NULL; org = org->next)
+ {
+ if (org->isDir)
+ {
+ struct fileInfo *db, *dbList = listDirX(org->name, "*", TRUE);
+ for (db = dbList; db != NULL; db = db->next)
+ {
+ if (db->isDir)
+ {
+ char trackDbPath[PATH_LEN];
+ safef(trackDbPath, sizeof(trackDbPath), "%s/trackDb.ra", db->name);
+ if (fileExists(trackDbPath))
+ {
+ AllocVar(path);
+ path->dir = cloneString(db->name);
+ char *s = strrchr(db->name, '/');
+ assert(s != NULL);
+ path->db = cloneString(s+1);
+ slAddHead(&pathList, path);
+ }
+ }
+ }
+ slFreeList(&dbList);
+ }
+ }
+slFreeList(&orgList);
+slReverse(&pathList);
+freez(&root);
+return pathList;
+}
static struct slName *dbPathToFiles(struct dbPath *p)
/* Convert dbPath to a list of files. */
{
struct slName *pathList = NULL;
-char dbDir[PATH_LEN];
-safef(dbDir, sizeof(dbDir), "%s/%s", clTrackDbRootDir, p->dir);
+char *dbDir = p->dir;
char *buf = cloneString(clTrackDbRelPath);
char *line = buf, *word;
while ((word = nextWord(&line)) != NULL)
{
@@ -125,24 +159,8 @@
slReverse(&pathList);
return pathList;
}
-static struct slName *dbToTrackDbFiles(char *db)
-/* Given a database, figure out list of trackDb files. */
-{
-int i;
-for (i=0; i<ArraySize(dbPath); ++i)
- {
- struct dbPath *p = &dbPath[i];
- if (sameString(p->db, db))
- {
- return dbPathToFiles(p);
- }
- }
-errAbort("Couldn't find db %s", db);
-return NULL;
-}
-
static void mergeRecords(struct raRecord *old, struct raRecord *record, char *key, struct lm *lm)
/* Merge record into old, updating any old fields with new record values. */
@@ -186,9 +204,10 @@
}
}
static struct raRecord *readRaRecords(int inCount, char *inNames[], char *keyField,
- boolean doMerge, boolean addFile, boolean overrideNeeded, struct lm *lm)
+ boolean doMerge, boolean addFile, char *db, boolean addDb,
+ boolean overrideNeeded, struct lm *lm)
/* Scan through files, merging records on key if doMerge. */
{
if (inCount <= 0)
return NULL;
@@ -204,8 +223,10 @@
while ((record = raRecordReadOne(lf, keyField, lm)) != NULL)
{
if (addFile)
record->posList = raFilePosNew(lm, fileName, lf->lineIx);
+ if (addDb)
+ record->db = db;
char *key = record->key;
if (key != NULL)
{
struct raRecord *oldRecord = hashFindVal(recordHash, key);
@@ -267,12 +288,14 @@
}
}
void rqlStatementOutput(struct rqlStatement *rql, struct raRecord *ra,
- char *addFileField, FILE *out)
+ char *addFileField, boolean addDb, FILE *out)
/* Output fields from ra to file. If addFileField is non-null add a new
* field with this name at end of output. */
{
+if (addDb)
+ fprintf(out, "db %s\n", ra->db);
struct slName *fieldList = rql->fieldList, *field;
for (field = fieldList; field != NULL; field = field->next)
{
struct raField *r;
@@ -364,17 +387,22 @@
}
}
}
-void raSqlQuery(int inCount, char *inNames[], struct lineFile *query, boolean doMerge,
- char *parentField, char *noInheritField, struct lm *lm, FILE *out)
+void raSqlQuery(int inCount, char *inNames[],
+ char *db, char *parentField, struct lm *lm, FILE *out)
/* raSqlQuery - Do a SQL-like query on a RA file.. */
{
+struct lineFile *query;
+if (clQuery)
+ query = lineFileOnString("query", TRUE, cloneString(clQuery));
+else
+ query = lineFileOpen(clQueryFile, TRUE);
struct raRecord *raList = readRaRecords(inCount, inNames, clKey,
- doMerge, clAddFile, clOverrideNeeded, lm);
+ clMerge, clAddFile, db, clAddDb, clOverrideNeeded, lm);
if (parentField != NULL)
{
- inheritFromParents(raList, parentField, noInheritField, lm);
+ inheritFromParents(raList, parentField, clNoInheritField, lm);
}
if (clRestrict)
{
struct hash *restrictHash = hashAllWordsInFile(clRestrict);
@@ -405,9 +433,9 @@
{
matchCount += 1;
if (doSelect)
{
- rqlStatementOutput(rql, ra, (clAddFile ? "file" : NULL), out);
+ rqlStatementOutput(rql, ra, (clAddFile ? "file" : NULL), clAddDb, out);
}
}
}
if (!doSelect)
@@ -425,8 +453,9 @@
clQueryFile = optionVal("queryFile", NULL);
clQuery = optionVal("query", NULL);
clNoInheritField = optionVal("noInheritField", clNoInheritField);
clAddFile = optionExists("addFile");
+clAddDb = optionExists("addDb");
clRestrict = optionVal("restrict", NULL);
clOverrideNeeded = optionExists("overrideNeeded");
clDb = optionVal("db", NULL);
if (argc < 2 && !clDb)
@@ -434,29 +463,33 @@
if (clQueryFile == NULL && clQuery == NULL)
errAbort("Please specify either the query or queryFile option.");
if (clQueryFile != NULL && clQuery != NULL)
errAbort("Please specify just one of the query or queryFile options.");
-struct lineFile *query = NULL;
-if (clQuery)
- query = lineFileOnString("query", TRUE, cloneString(clQuery));
-else
- query = lineFileOpen(clQueryFile, TRUE);
struct lm *lm = lmInit(0);
if (clDb != NULL)
{
clMerge = TRUE;
clParent = TRUE;
clOverrideNeeded = TRUE;
clKey = "track";
+ if (sameString(clDb, "all"))
+ clAddDb = TRUE;
}
+char *parentField = (clParent ? clParentField : NULL);
char **fileNames;
int fileCount;
if (clDb)
{
if (argc != 1)
usage();
- struct slName *path, *pathList = dbToTrackDbFiles(clDb);
+ struct dbPath *db, *dbList = getDbPathList(clTrackDbRootDir);
+ boolean gotAny = FALSE;
+ for (db = dbList; db != NULL; db = db->next)
+ {
+ if (sameString(clDb, "all") || sameString(clDb, db->db))
+ {
+ struct slName *path, *pathList = dbPathToFiles(db);
fileCount = slCount(pathList);
if (fileCount == 0)
errAbort("No paths returned by dbToTrackDbFiles(%s)", clDb);
AllocArray(fileNames, fileCount);
@@ -464,14 +497,19 @@
for (i=0, path = pathList; path != NULL; path = path->next, ++i)
{
fileNames[i] = path->name;
}
+ raSqlQuery(fileCount, fileNames, db->db, parentField, lm, stdout);
+ gotAny = TRUE;
+ }
+ }
+ if (!gotAny)
+ errAbort("No database named %s found off %s\n", clDb, clTrackDbRootDir);
}
else
{
fileNames = argv+1;
fileCount = argc-1;
+ raSqlQuery(fileCount, fileNames, "n/a", parentField, lm, stdout);
}
-char *parentField = (clParent ? clParentField : NULL);
-raSqlQuery(fileCount, fileNames, query, clMerge, parentField, clNoInheritField, lm, stdout);
return 0;
}