src/hg/utils/tdbQuery/tdbQuery.c 1.30
1.30 2010/03/07 01:32:42 kent
Improving handling of release tags. Case where have alpha and beta in composite tracks works better.
Index: src/hg/utils/tdbQuery/tdbQuery.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/tdbQuery/tdbQuery.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -b -B -U 4 -r1.29 -r1.30
--- src/hg/utils/tdbQuery/tdbQuery.c 6 Feb 2010 21:43:03 -0000 1.29
+++ src/hg/utils/tdbQuery/tdbQuery.c 7 Mar 2010 01:32:42 -0000 1.30
@@ -23,10 +23,8 @@
static boolean clNoBlank = FALSE; /* If set suppress blank lines in output. */
static char *clRewrite = NULL; /* Rewrite to given directory. */
static boolean clNoCompSub = FALSE; /* If set don't do subtrack inheritence of fields. */
-boolean uglyOne;
-
void usage()
/* Explain usage and exit. */
{
errAbort(
@@ -280,8 +278,21 @@
return FALSE;
}
}
+boolean compatibleReleases(char *a, char *b)
+/* Return TRUE if either a or b is null, or if a and b are the same. */
+{
+return a == NULL || b == NULL || sameString(a, b);
+}
+
+boolean sameKeyCompatibleRelease(struct tdbRecord *a, struct tdbRecord *b)
+/* Return TRUE if a and b have the same key and compatible releases. */
+{
+return sameString(a->key, b->key) &&
+ compatibleReleases(tdbRecordFieldVal(a, "release"), tdbRecordFieldVal(b, "release"));
+}
+
struct tdbRecord *filterOnRelease(struct tdbRecord *list, boolean alpha)
/* Return release-filtered version of list. */
{
struct tdbRecord *newList = NULL;
@@ -355,24 +366,14 @@
{
doAbort = FALSE;
char *oldRelease = findFieldValInSelfOrParents(oldRecord, "release");
char *newRelease = findFieldValInSelfOrParents(record, "release");
- if (oldRelease == NULL || newRelease == NULL)
- doAbort = TRUE;
- else
- {
- if (sameString(oldRelease, newRelease))
- doAbort = TRUE;
- }
+ doAbort = compatibleReleases(oldRelease, newRelease);
}
if (doAbort)
{
- char *oldRelease = NULL;
- struct tdbField *oldField = tdbRecordField(oldRecord, "release");
- if (oldField) oldRelease = oldField->val;
- char *newRelease = NULL;
- struct tdbField *newField = tdbRecordField(record, "release");
- if (newField) newRelease = newField->val;
+ char *oldRelease = tdbRecordFieldVal(oldRecord, "release");
+ char *newRelease = tdbRecordFieldVal(record, "release");
if (newRelease == NULL && oldRelease != NULL)
{
errAbort("Have release tag for track %s at line %d of %s, but not "
"at line %d of %s",
@@ -484,72 +485,8 @@
}
old->posList = slCat(old->posList, record->posList);
}
-static void overrideFieldFromFile(struct tdbRecord *recordList,
- char *raFile, char *fieldName, struct lm *lm)
-/* Look for raFile in assembly and organism directories in that order. Use the first
- * file that you find to override the given field inside of recordList. */
-{
-/* Build up hash of recordList. */
-struct hash *hash = hashNew(0);
-struct tdbRecord *record;
-for (record = recordList; record != NULL; record = record->next)
- hashAdd(hash, record->key, record);
-
-struct lineFile *lf = lineFileOpen(raFile, TRUE);
-while ((record = tdbRecordReadOne(lf, glKeyField, lm)) != NULL)
- {
- struct tdbRecord *oldRecord = hashFindVal(hash, record->key);
- if (oldRecord == NULL)
- {
- continue;
- }
- if (slCount(record->fieldList) != 2)
- {
- errAbort("Expecting just two fields, track and %s, got %d in record starting line %d of %s",
- fieldName, slCount(record->fieldList), tdbRecordLineIx(record), lf->fileName);
- }
- struct tdbField *field = tdbRecordField(record, fieldName);
- if (field == NULL)
- {
- errAbort("Missing %s tag in record starting line %d of %s", fieldName,
- tdbRecordLineIx(record), lf->fileName);
- }
- mergeRecords(oldRecord, record, glKeyField, lm);
- }
-lineFileClose(&lf);
-hashFree(&hash);
-}
-
-static void overrideFieldFromFileOnPath(struct tdbRecord *recordList, struct dbPath *p,
- char *raFile, char *field, struct lm *lm)
-/* Look for raFile in assembly and organism directories in that order. Use the first
- * file that you find to override the given field inside of recordList. */
-{
-/* Find raFile. */
-char path[PATH_LEN];
-safef(path, sizeof(path), "%s/%s", p->dir, raFile);
-if (!fileExists(path))
- {
- char orgDir[PATH_LEN];
- splitPath(p->dir, orgDir, NULL, NULL);
- safef(path, sizeof(path), "%s%s", orgDir, raFile);
- if (!fileExists(path))
- return; /* Nothing to do. */
- }
-
-overrideFieldFromFile(recordList, path, field, lm);
-}
-
-static void overridePrioritiesAndVisibilities(struct tdbRecord *recordList, struct dbPath *p,
- struct lm *lm)
-/* Look for visibility.ra and priority.ra files and layer them onto recordList. */
-{
-overrideFieldFromFileOnPath(recordList, p, "visibility.ra", "visibility", lm);
-overrideFieldFromFileOnPath(recordList, p, "priority.ra", "priority", lm);
-}
-
static int parentChildFileDistance(struct tdbRecord *parent, struct tdbRecord *child)
/* Return distance of two records. If they're in different files the
* distance gets pretty big. Would be flaky on records split across
* different files, hence the ad-hoc in the name. Not worth implementing
@@ -565,13 +502,14 @@
return distance;
}
static struct tdbRecord *findParent(struct tdbRecord *rec,
- char *parentFieldName, struct hash *hash, boolean alpha)
+ char *parentFieldName, struct hash *hash)
/* Find parent record if possible. This is a bit complicated by wanting to
* match parents and children from the same release if possible. Our
- * strategy is to just ignore records from the wrond release. */
+ * strategy is to just ignore records from the wrong release. */
{
+char *release = tdbRecordFieldVal(rec, "release");
if (clNoCompSub)
return NULL;
struct tdbField *parentField = tdbRecordField(rec, parentFieldName);
if (parentField == NULL)
@@ -588,30 +526,22 @@
for (hel = hashLookup(hash, parentName); hel != NULL; hel = hashLookupNext(hel))
{
gotParentSomeRelease = TRUE;
struct tdbRecord *parent = hel->val;
+ if (compatibleReleases(release, tdbRecordFieldVal(parent, "release")))
+ {
int distance = parentChildFileDistance(parent, rec);
if (distance < closestDistance)
{
closestParent = parent;
closestDistance = distance;
}
}
+ }
if (closestParent != NULL)
return closestParent;
-/* If we haven't matched so far, it could be that the release tag is set in the parent
- * but not in us, and the parent is not our release parent. In this case we go ahead
- * and return the out-of-release parent, so we can inherit the out-of-release release
- * tag, so we get filtered out! */
-struct tdbField *releaseField = tdbRecordField(rec, "release");
-if (gotParentSomeRelease && releaseField == NULL)
- {
- struct tdbRecord *parent = hashFindVal(hash, parentName);
- assert(parent != NULL);
- return parent;
- }
-recordWarn(rec, "parent %s of %s doesn't exist", parentName, rec->key);
+recordWarn(rec, "parent %s of %s release %s doesn't exist", parentName, rec->key, naForNull(release));
return NULL;
}
static void linkUpParents(struct tdbRecord *list, char *parentField, boolean alpha)
@@ -633,9 +563,9 @@
/* Scan through linking up parents. */
for (rec = list; rec != NULL; rec = rec->next)
{
- struct tdbRecord *parent = findParent(rec, parentField, hash, alpha);
+ struct tdbRecord *parent = findParent(rec, parentField, hash);
if (parent != NULL)
{
rec->parent = parent;
rec->olderSibling = parent->children;
@@ -667,9 +597,9 @@
{
nextRecord = record->next;
char *key = record->key;
struct tdbRecord *oldRecord = hashFindVal(recordHash, key);
- if (oldRecord != NULL)
+ if (oldRecord != NULL && sameKeyCompatibleRelease(record, oldRecord))
{
if (!record->override)
{
oldRecord->fieldList = record->fieldList;
@@ -971,10 +901,8 @@
verbose(2, "After filterOnRelease %d records\n", slCount(recordList));
linkUpParents(recordList, "parent", clAlpha);
checkDupeKeys(recordList, FALSE);
- overridePrioritiesAndVisibilities(recordList, p, lm);
-
if (clCheck)
doRecordChecks(recordList, lm);
struct tdbRecord *record;