src/hg/utils/tdbQuery/tdbQuery.c 1.19
1.19 2009/12/05 22:47:39 kent
Adding additional checks on child/parent relationships. Forcing children to be close to parent in file.
Index: src/hg/utils/tdbQuery/tdbQuery.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/tdbQuery/tdbQuery.c,v
retrieving revision 1.18
retrieving revision 1.19
diff -b -B -U 4 -r1.18 -r1.19
--- src/hg/utils/tdbQuery/tdbQuery.c 5 Dec 2009 19:30:24 -0000 1.18
+++ src/hg/utils/tdbQuery/tdbQuery.c 5 Dec 2009 22:47:39 -0000 1.19
@@ -151,39 +151,8 @@
var->name, var->name, glTagTypeFile);
}
}
-static void doRecordChecks(struct tdbRecord *recordList, struct lm *lm)
-/* Do additional checks on records. */
-{
-struct tdbRecord *record;
-for (record = recordList; record != NULL; record = record->next)
- {
- struct tdbField *typeField = tdbRecordField(record, "type");
- char *fullType = (typeField != NULL ? typeField->val : record->key);
- char *type = lmCloneFirstWord(lm, fullType);
- struct tdbField *field;
- for (field = record->fieldList; field != NULL; field = field->next)
- {
- struct slName *typeList = hashFindVal(glTagTypes, field->name);
- if (typeList == NULL)
- {
- recordAbort(record,
- "Tag '%s' not found in %s.\nIf it's not a typo please add %s to that file. "
- "The tag is",
- field->name, glTagTypeFile, field->name);
- }
- if (!matchAnyWild(typeList, type))
- {
- recordAbort(record,
- "Tag '%s' not allowed for tracks of type '%s'. Please add it to supported types\n"
- "in %s if this is not a mistake. The tag is",
- field->name, type, glTagTypeFile);
- }
- }
- }
-}
-
struct dbPath
/* A database directory and path. */
{
struct dbPath *next;
@@ -577,9 +546,9 @@
static int parentChildFileDistance(struct tdbRecord *parent, struct tdbRecord *child)
/* Return distance of two records. If they're in different files the
* distance gets pretty big. Would be flaky on records split across
* different files, hence the ad-hoc in the name. Not worth implementing
- * somthing that handles this though with the hope that the parent/child
+ * something that handles this though with the hope that the parent/child
* relationship will become indentation rather than ID based. */
{
struct tdbFilePos *parentFp = parent->posList, *childFp = child->posList;
if (!sameString(parentFp->fileName, childFp->fileName))
@@ -598,12 +567,8 @@
{
struct tdbField *parentField = tdbRecordField(rec, parentFieldName);
if (parentField == NULL)
return NULL;
-#ifdef OLD
-if (!recordMatchesRelease(rec, alpha))
- return NULL;
-#endif /* OLD */
char *parentLine = parentField->val;
int len = strlen(parentLine);
char buf[len+1];
strcpy(buf, parentLine);
@@ -615,20 +580,15 @@
for (hel = hashLookup(hash, parentName); hel != NULL; hel = hashLookupNext(hel))
{
gotParentSomeRelease = TRUE;
struct tdbRecord *parent = hel->val;
-#ifdef OLD
- if (recordMatchesRelease(parent, alpha))
-#endif /* OLD */
- {
int distance = parentChildFileDistance(parent, rec);
if (distance < closestDistance)
{
closestParent = parent;
closestDistance = distance;
}
}
- }
if (closestParent != NULL)
return closestParent;
/* If we haven't matched so far, it could be that the release tag is set in the parent
@@ -679,14 +639,14 @@
hashFree(&hash);
}
-struct tdbRecord *tdbsForDbPath(struct dbPath *p, struct lm *lm, struct hash *recordHash,
+struct tdbRecord *tdbsForDbPath(struct dbPath *p, struct lm *lm,
char *parentField, boolean alpha)
/* Assemble recordList for given database. This looks at the root/organism/assembly
- * levels. It returns a list, and fills in a hash (which should be passed in empty)
- * of the records keyed by record->key. */
+ * levels. It returns a list of records. */
{
+struct hash *recordHash = hashNew(0);
struct slName *fileLevelList = dbPathToFiles(p), *fileLevel;
struct tdbRecord *recordList = NULL;
for (fileLevel = fileLevelList; fileLevel != NULL; fileLevel = fileLevel->next)
{
@@ -721,10 +681,10 @@
slAddHead(&recordList, record);
}
}
}
+hashFree(&recordHash);
slReverse(&recordList);
-
return recordList;
}
static void mergeParentRecord(struct tdbRecord *record, struct tdbRecord *parent,
@@ -896,8 +856,116 @@
}
return FALSE;
}
+static struct tdbRecord *closestParentInFile(struct slRef *allParentRefs,
+ struct tdbFilePos *childPos)
+/* Find parent that comes closest to (but before) childPos. */
+{
+struct slRef *parentRef;
+struct tdbRecord *closestParent = NULL;
+int closestDistance = BIGNUM;
+for (parentRef = allParentRefs; parentRef != NULL; parentRef = parentRef->next)
+ {
+ struct tdbRecord *parent = parentRef->val;
+ struct tdbFilePos *pos;
+ for (pos = parent->posList; pos != NULL; pos = pos->next)
+ {
+ if (sameString(pos->fileName, childPos->fileName))
+ {
+ int distance = childPos->lineIx - pos->lineIx;
+ if (distance > 0)
+ {
+ if (distance < closestDistance)
+ {
+ closestDistance = distance;
+ closestParent = parent;
+ }
+ }
+ }
+ }
+ }
+return closestParent;
+}
+
+static void checkChildUnderNearestParent(struct slRef *allParentRefs,
+ struct tdbRecord *parent, struct tdbRecord *child)
+/* Make sure that parent record occurs before child, and that indeed it is the
+ * closest parent before the child. */
+{
+/* We do the check for each file the child is in */
+struct tdbFilePos *childFp, *parentFp;
+for (childFp = child->posList; childFp != NULL; childFp = childFp->next)
+ {
+ /* Find parentFp that is in this file if any. */
+ for (parentFp = parent->posList; parentFp != NULL; parentFp = parentFp->next)
+ {
+ if (sameString(parentFp->fileName, childFp->fileName))
+ {
+ if (parentFp->lineIx > childFp->lineIx)
+ errAbort("Child before parent in %s\n"
+ "Child (%s) at line %d, parent (%s) at line %d",
+ childFp->fileName, child->key, childFp->lineIx,
+ parent->key, parentFp->lineIx);
+ struct tdbRecord *closestParent = closestParentInFile(allParentRefs, childFp);
+ assert(closestParent != NULL);
+ if (closestParent != parent)
+ errAbort("%s comes between parent (%s) and child (%s) in %s\n"
+ "Parent at line %d, child at line %d.",
+ closestParent->key, parent->key, child->key, childFp->fileName,
+ parentFp->lineIx, childFp->lineIx);
+ }
+ }
+ }
+}
+
+static void doRecordChecks(struct tdbRecord *recordList, struct lm *lm)
+/* Do additional checks on records. */
+{
+/* Check fields against tagType.tag. */
+struct tdbRecord *record;
+for (record = recordList; record != NULL; record = record->next)
+ {
+ struct tdbField *typeField = tdbRecordField(record, "type");
+ char *fullType = (typeField != NULL ? typeField->val : record->key);
+ char *type = lmCloneFirstWord(lm, fullType);
+ struct tdbField *field;
+ for (field = record->fieldList; field != NULL; field = field->next)
+ {
+ struct slName *typeList = hashFindVal(glTagTypes, field->name);
+ if (typeList == NULL)
+ {
+ recordAbort(record,
+ "Tag '%s' not found in %s.\nIf it's not a typo please add %s to that file. "
+ "The tag is",
+ field->name, glTagTypeFile, field->name);
+ }
+ if (!matchAnyWild(typeList, type))
+ {
+ recordAbort(record,
+ "Tag '%s' not allowed for tracks of type '%s'. Please add it to supported types\n"
+ "in %s if this is not a mistake. The tag is",
+ field->name, type, glTagTypeFile);
+ }
+ }
+ }
+
+/* Create parent list, which we'll use for various child/parent checks. */
+struct slRef *parentRefList = NULL;
+for (record = recordList; record != NULL; record = record->next)
+ {
+ if (record->children != NULL)
+ refAdd(&parentRefList, record);
+ }
+
+/* Additional child/parent checks. */
+for (record = recordList; record != NULL; record = record->next)
+ {
+ if (record->parent != NULL)
+ checkChildUnderNearestParent(parentRefList, record->parent, record);
+ }
+}
+
void tdbQuery(char *sql)
/* tdbQuery - Query the trackDb system using SQL syntax.. */
{
/* Load in hash of legitimate tags. */
@@ -931,10 +999,9 @@
{
struct lm *lm = lmInit(0);
struct dbPath *p = dbOrder->val;
char *db = p->db;
- struct hash *recordHash = hashNew(0);
- struct tdbRecord *recordList = tdbsForDbPath(p, lm, recordHash, "subTrack", clAlpha);
+ struct tdbRecord *recordList = tdbsForDbPath(p, lm, "subTrack", clAlpha);
verbose(2, "Composed %d records from %s\n", slCount(recordList), db);
inheritFromParents(recordList, "subTrack", "noInherit", clAlpha, lm);
@@ -985,9 +1052,8 @@
}
}
}
lmCleanup(&lm);
- hashFree(&recordHash);
}
dyStringFree(&fileString);
if (sameString(rql->command, "count"))