src/hg/utils/tdbQuery/tdbQuery.c 1.14

1.14 2009/12/04 22:15:16 kent
Making -check do something, specifically look for tagTypes.tab and check tags against it.
Index: src/hg/utils/tdbQuery/tdbQuery.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/tdbQuery/tdbQuery.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -b -B -U 4 -r1.13 -r1.14
--- src/hg/utils/tdbQuery/tdbQuery.c	3 Dec 2009 20:28:58 -0000	1.13
+++ src/hg/utils/tdbQuery/tdbQuery.c	4 Dec 2009 22:15:16 -0000	1.14
@@ -92,20 +92,86 @@
 recordLocationReport(rec, stderr);
 noWarnAbort();
 }
 
+struct hash *readTagTypeHash(char *fileName)
+/* Set up tagTypeHash and other stuff needed for checking. */
+{
+struct hash *hash = hashNew(0);
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+char *line;
+while (lineFileNextReal(lf, &line))
+    {
+    struct slName *typeList = NULL;
+    char *tag = nextWord(&line);
+    char *word;
+    while ((word = nextWord(&line)) != NULL)
+	slNameAddHead(&typeList, word);
+    hashAdd(hash, tag, typeList);
+    }
+lineFileClose(&lf);
+return hash;
+}
+
+static boolean matchAnyWild(struct slName *wildList, char *s)
+/* Return TRUE if s matches any wildcard in list. */
+{
+struct slName *wild;
+for (wild = wildList; wild != NULL; wild = wild->next)
+    {
+    if (wildMatch(wild->name, s))
+        return TRUE;
+    }
+return FALSE;
+}
+
+static void doChecks(struct tdbRecord *recordList, struct lm *lm)
+/* Do additional checks. */
+{
+/* Do checks that tags are all legitimate and with correct types. */
+char tagTypeFile[PATH_LEN];
+safef(tagTypeFile, sizeof(tagTypeFile), "%s/%s", clRoot, "tagTypes.tab");
+uglyf("tagTypeFile %s, clRoot %s\n", tagTypeFile, clRoot);
+struct hash *tagTypeHash = readTagTypeHash(tagTypeFile);
+struct tdbRecord *record;
+for (record = recordList; record != NULL; record = record->next)
+    {
+    struct tdbField *typeField = tdbRecordField(record, "type");
+    char *fullType = (typeField != NULL ? typeField->val : record->key);
+    char *type = lmCloneFirstWord(lm, fullType);
+    struct tdbField *field;
+    for (field = record->fieldList; field != NULL; field = field->next)
+        {
+	struct slName *typeList = hashFindVal(tagTypeHash, field->name);
+	if (typeList == NULL)
+	    {
+	    recordAbort(record, 
+	    	"Tag '%s' not found in %s.\nIf it's not a typo please add %s to that file.  "
+		"The tag is", 
+	    	field->name, tagTypeFile, field->name);
+	    }
+	if (!matchAnyWild(typeList, type))
+	    {
+	    recordAbort(record, 
+	    	"Tag '%s' not allowed for tracks of type '%s'.  Please add it to supported types\n"
+		"in %s if this is not a mistake.  The tag is", 
+	    	field->name, type, tagTypeFile);
+	    }
+	}
+    }
+}
+
 struct dbPath
 /* A database directory and path. */
     {
     struct dbPath *next;
     char *db;
     char *dir;
     };
 
-static struct dbPath *getDbPathList(char *rootDir)
+static struct dbPath *getDbPathList(char *root)
 /* Get list of all "database" directories with any trackDb.ra files two under us. */
 {
-char *root = simplifyPathToDir(rootDir);
 struct dbPath *pathList = NULL, *path;
 struct fileInfo *org, *orgList = listDirX(root, "*", TRUE);
 for (org = orgList; org != NULL; org = org->next)
     {
@@ -133,9 +199,8 @@
 	}
     }
 slFreeList(&orgList);
 slReverse(&pathList);
-freez(&root);
 return pathList;
 }
 
 
@@ -812,8 +877,11 @@
     checkDupeKeys(recordList, FALSE);
 
     overridePrioritiesAndVisibilities(recordList, p, lm);
 
+    if (clCheck)
+        doChecks(recordList, lm);
+
     struct tdbRecord *record;
     boolean doSelect = sameString(rql->command, "select");
     for (record = recordList; record != NULL; record = record->next)
 	{
@@ -866,9 +934,9 @@
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
-clRoot = optionVal("root", clRoot);
+clRoot = simplifyPathToDir(optionVal("root", clRoot));
 clCheck = optionExists("check");
 clStrict = optionExists("strict");
 clAlpha = optionExists("alpha");
 tdbQuery(argv[1]);