src/hg/utils/tdbQuery/tdbQuery.c 1.30

1.30 2010/03/07 01:32:42 kent
Improving handling of release tags. Case where have alpha and beta in composite tracks works better.
Index: src/hg/utils/tdbQuery/tdbQuery.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/tdbQuery/tdbQuery.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -b -B -U 4 -r1.29 -r1.30
--- src/hg/utils/tdbQuery/tdbQuery.c	6 Feb 2010 21:43:03 -0000	1.29
+++ src/hg/utils/tdbQuery/tdbQuery.c	7 Mar 2010 01:32:42 -0000	1.30
@@ -23,10 +23,8 @@
 static boolean clNoBlank = FALSE;	/* If set suppress blank lines in output. */
 static char *clRewrite = NULL;		/* Rewrite to given directory. */
 static boolean clNoCompSub = FALSE;	/* If set don't do subtrack inheritence of fields. */
 
-boolean uglyOne;
-
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
@@ -280,8 +278,21 @@
     return FALSE;
     }
 }
 
+boolean compatibleReleases(char *a, char *b)
+/* Return TRUE if either a or b is null, or if a and b are the same. */
+{
+return a == NULL || b == NULL || sameString(a, b);
+}
+
+boolean sameKeyCompatibleRelease(struct tdbRecord *a, struct tdbRecord *b)
+/* Return TRUE if a and b have the same key and compatible releases. */
+{
+return sameString(a->key, b->key) && 
+	compatibleReleases(tdbRecordFieldVal(a, "release"), tdbRecordFieldVal(b, "release"));
+}
+
 struct tdbRecord *filterOnRelease(struct tdbRecord *list, boolean alpha)
 /* Return release-filtered version of list. */
 {
 struct tdbRecord *newList = NULL;
@@ -355,24 +366,14 @@
 		{
 		doAbort = FALSE;
 		char *oldRelease = findFieldValInSelfOrParents(oldRecord, "release");
 		char *newRelease = findFieldValInSelfOrParents(record, "release");
-		if (oldRelease == NULL || newRelease == NULL)
-		    doAbort = TRUE;
-		else
-		    {
-		    if (sameString(oldRelease, newRelease))
-		       doAbort = TRUE;
-		    }
+		doAbort = compatibleReleases(oldRelease, newRelease);
 		}
 	    if (doAbort)
 		{
-		char *oldRelease = NULL;
-		struct tdbField *oldField = tdbRecordField(oldRecord, "release");
-		if (oldField) oldRelease = oldField->val;
-		char *newRelease = NULL;
-		struct tdbField *newField = tdbRecordField(record, "release");
-		if (newField) newRelease = newField->val;
+		char *oldRelease = tdbRecordFieldVal(oldRecord, "release");
+		char *newRelease = tdbRecordFieldVal(record, "release");
 		if (newRelease == NULL && oldRelease != NULL)
 		    {
 		    errAbort("Have release tag for track %s at line %d of %s, but not "
 		    	     "at line %d of %s", 
@@ -484,72 +485,8 @@
     }
 old->posList = slCat(old->posList, record->posList);
 }
 
-static void overrideFieldFromFile(struct tdbRecord *recordList, 
-	char *raFile, char *fieldName, struct lm *lm)
-/* Look for raFile in assembly and organism directories in that order.  Use the first
- * file that you find to override the given field inside of recordList. */
-{
-/* Build up hash of recordList. */
-struct hash *hash = hashNew(0);
-struct tdbRecord *record;
-for (record = recordList; record != NULL; record = record->next)
-    hashAdd(hash, record->key, record);
-
-struct lineFile *lf = lineFileOpen(raFile, TRUE);
-while ((record = tdbRecordReadOne(lf, glKeyField, lm)) != NULL)
-    {
-    struct tdbRecord *oldRecord = hashFindVal(hash, record->key);
-    if (oldRecord == NULL)
-        {
-	continue;
-	}
-    if (slCount(record->fieldList) != 2)
-        {
-	errAbort("Expecting just two fields, track and %s, got %d in record starting line %d of %s",
-		fieldName, slCount(record->fieldList), tdbRecordLineIx(record), lf->fileName);
-	}
-    struct tdbField *field = tdbRecordField(record, fieldName);
-    if (field == NULL)
-        {
-	errAbort("Missing %s tag in record starting line %d of %s", fieldName,
-		tdbRecordLineIx(record), lf->fileName);
-	}
-    mergeRecords(oldRecord, record, glKeyField, lm);
-    }
-lineFileClose(&lf);
-hashFree(&hash);
-}
-
-static void overrideFieldFromFileOnPath(struct tdbRecord *recordList, struct dbPath *p,
-	char *raFile, char *field, struct lm *lm)
-/* Look for raFile in assembly and organism directories in that order.  Use the first
- * file that you find to override the given field inside of recordList. */
-{
-/* Find raFile. */
-char path[PATH_LEN];
-safef(path, sizeof(path), "%s/%s", p->dir, raFile);
-if (!fileExists(path))
-    {
-    char orgDir[PATH_LEN];
-    splitPath(p->dir, orgDir, NULL, NULL);
-    safef(path, sizeof(path), "%s%s", orgDir, raFile);
-    if (!fileExists(path))
-        return;		/* Nothing to do. */
-    }
-
-overrideFieldFromFile(recordList, path, field, lm);
-}
-
-static void overridePrioritiesAndVisibilities(struct tdbRecord *recordList, struct dbPath *p,
-	struct lm *lm)
-/* Look for visibility.ra and priority.ra files and layer them onto recordList. */
-{
-overrideFieldFromFileOnPath(recordList, p, "visibility.ra", "visibility", lm);
-overrideFieldFromFileOnPath(recordList, p, "priority.ra", "priority", lm);
-}
-
 static int parentChildFileDistance(struct tdbRecord *parent, struct tdbRecord *child)
 /* Return distance of two records.  If they're in different files the
  * distance gets pretty big.  Would be flaky on records split across
  * different files, hence the ad-hoc in the name.  Not worth implementing
@@ -565,13 +502,14 @@
 return distance;
 }
 
 static struct tdbRecord *findParent(struct tdbRecord *rec, 
-	char *parentFieldName, struct hash *hash, boolean alpha)
+	char *parentFieldName, struct hash *hash)
 /* Find parent record if possible.  This is a bit complicated by wanting to
  * match parents and children from the same release if possible.  Our
- * strategy is to just ignore records from the wrond release. */
+ * strategy is to just ignore records from the wrong release. */
 {
+char *release = tdbRecordFieldVal(rec, "release");
 if (clNoCompSub)
     return NULL;
 struct tdbField *parentField = tdbRecordField(rec, parentFieldName);
 if (parentField == NULL)
@@ -588,30 +526,22 @@
 for (hel = hashLookup(hash, parentName); hel != NULL; hel = hashLookupNext(hel))
     {
     gotParentSomeRelease = TRUE;
     struct tdbRecord *parent = hel->val;
+    if (compatibleReleases(release, tdbRecordFieldVal(parent, "release")))
+	{
     int distance = parentChildFileDistance(parent, rec);
     if (distance < closestDistance)
 	{
 	closestParent = parent;
 	closestDistance = distance;
 	}
     }
+    }
 if (closestParent != NULL)
     return closestParent;
 
-/* If we haven't matched so far, it could be that the release tag is set in the parent
- * but not in us, and the parent is not our release parent.  In this case we go ahead
- * and return the out-of-release parent, so we can inherit the out-of-release release
- * tag, so we get filtered out! */
-struct tdbField *releaseField = tdbRecordField(rec, "release");
-if (gotParentSomeRelease && releaseField == NULL)
-     {
-     struct tdbRecord *parent = hashFindVal(hash, parentName);
-     assert(parent != NULL);
-     return parent;
-     }
-recordWarn(rec, "parent %s of %s doesn't exist", parentName, rec->key);
+recordWarn(rec, "parent %s of %s release %s doesn't exist", parentName, rec->key, naForNull(release));
 return NULL;
 }
 
 static void linkUpParents(struct tdbRecord *list, char *parentField, boolean alpha)
@@ -633,9 +563,9 @@
 
 /* Scan through linking up parents. */
 for (rec = list; rec != NULL; rec = rec->next)
     {
-    struct tdbRecord *parent = findParent(rec, parentField, hash, alpha);
+    struct tdbRecord *parent = findParent(rec, parentField, hash);
     if (parent != NULL)
 	{
 	rec->parent = parent;
 	rec->olderSibling = parent->children;
@@ -667,9 +597,9 @@
 	{
 	nextRecord = record->next;
 	char *key = record->key;
 	struct tdbRecord *oldRecord = hashFindVal(recordHash, key);
-	if (oldRecord != NULL)
+	if (oldRecord != NULL && sameKeyCompatibleRelease(record, oldRecord))
 	    {
 	    if (!record->override)
 		{
 		oldRecord->fieldList = record->fieldList;
@@ -971,10 +901,8 @@
     verbose(2, "After filterOnRelease %d records\n", slCount(recordList));
     linkUpParents(recordList, "parent", clAlpha);
     checkDupeKeys(recordList, FALSE);
 
-    overridePrioritiesAndVisibilities(recordList, p, lm);
-
     if (clCheck)
         doRecordChecks(recordList, lm);
 
     struct tdbRecord *record;