cf6c86d4ab35eecaebbaca76640393fd060af845 angie Fri May 15 14:35:22 2020 -0700 Changed compatibleReleases to use recordMatchesRelease instead of sameString so that whitespace differences won't throw it off (and one record listing more releases than the other won't be a problem). Also changed buildReleaseBits to copy and clobber instead of clobbering and fixing (which could fail if there is a space before a comma). Also removed some logic for inheriting release from a parent because hgTrackDb doesn't do that. diff --git src/hg/utils/tdbQuery/tdbQuery.c src/hg/utils/tdbQuery/tdbQuery.c index f8b4b0c..c420bc1 100644 --- src/hg/utils/tdbQuery/tdbQuery.c +++ src/hg/utils/tdbQuery/tdbQuery.c @@ -255,92 +255,90 @@ freeMem(buf); slReverse(&pathList); return pathList; } struct dbPath *dbPathFind(struct dbPath *list, char *db) /* Return element on list corresponding to db, or NULL if it doesn't exist. */ { struct dbPath *p; for (p=list; p != NULL; p = p->next) if (sameString(p->db, db)) break; return p; } -unsigned buildReleaseBits(struct tdbRecord *record, char *rel) +unsigned buildReleaseBits(struct tdbRecord *record) /* unpack the comma separated list of possible release tags */ { +char *rel = tdbRecordFieldVal(record, "release"); if (rel == NULL) return RELEASE_ALPHA | RELEASE_BETA | RELEASE_PUBLIC; +char relCpy[strlen(rel) + 1]; +safecpy(relCpy, sizeof relCpy, rel); +rel = relCpy; unsigned bits = 0; while(rel) { char *end = strchr(rel, ','); - if (end) - *end = 0; + *end++ = 0; rel = trimSpaces(rel); if (sameString(rel, "alpha")) bits |= RELEASE_ALPHA; else if (sameString(rel, "beta")) bits |= RELEASE_BETA; else if (sameString(rel, "public")) bits |= RELEASE_PUBLIC; else - errAbort("Tracks must have a release combination of alpha, beta, and public on line %d of %s", - tdbRecordLineIx(record), tdbRecordFileName(record)); - - if (end) - *end++ = ','; + errAbort("Tracks must have a release combination of alpha, beta, and public on line %d of %s" + "(not '%s')", + tdbRecordLineIx(record), tdbRecordFileName(record), rel); rel = end; } return bits; } boolean recordMatchesRelease( struct tdbRecord *record, unsigned currentReleaseBit) /* Return TRUE if record is compatible with release. */ { -unsigned bits; -char *release = NULL; -struct tdbField *releaseField = tdbRecordField(record, "release"); - -if (releaseField != NULL) - release = releaseField->val; - -bits = buildReleaseBits(record, release); +unsigned bits = buildReleaseBits(record); if (bits & currentReleaseBit) return TRUE; return FALSE; } -boolean compatibleReleases(char *a, char *b) -/* Return TRUE if either a or b is null, or if a and b are the same. */ +boolean compatibleReleases(struct tdbRecord *a, struct tdbRecord *b, unsigned currentReleaseBit) +/* Return TRUE if either a or b release is null, or if a and b are the same regarding the + * current release. */ { -return a == NULL || b == NULL || sameString(a, b); +return (tdbRecordFieldVal(a, "release") == NULL || + tdbRecordFieldVal(b, "release") == NULL || + recordMatchesRelease(a, currentReleaseBit) == recordMatchesRelease(b, currentReleaseBit)); } -boolean sameKeyCompatibleRelease(struct tdbRecord *a, struct tdbRecord *b) +boolean sameKeyCompatibleRelease(struct tdbRecord *a, struct tdbRecord *b, + unsigned currentReleaseBit) /* Return TRUE if a and b have the same key and compatible releases. */ { return sameString(a->key, b->key) && - compatibleReleases(tdbRecordFieldVal(a, "release"), tdbRecordFieldVal(b, "release")); + compatibleReleases(a, b, currentReleaseBit); } struct tdbRecord *filterOnRelease( struct tdbRecord *list, unsigned currentReleaseBit) /* Return release-filtered version of list. */ { struct tdbRecord *newList = NULL; struct tdbRecord *record, *next; for (record = list; record != NULL; record = next) { next = record->next; if (recordMatchesRelease(record, currentReleaseBit)) { slAddHead(&newList, record); } } @@ -373,75 +371,50 @@ static void checkDupeFields(struct tdbRecord *record, struct lineFile *lf) /* Make sure that each field in record is unique. */ { struct hash *uniqHash = hashNew(0); struct tdbField *field; for (field = record->fieldList; field != NULL; field = field->next) { if (hashLookup(uniqHash, field->name)) errAbort("Duplicate tag %s in record starting line %d of %s", field->name, tdbRecordLineIx(record), lf->fileName); hashAdd(uniqHash, field->name, NULL); } hashFree(&uniqHash); } -static struct tdbField *findFieldInSelfOrParents(struct tdbRecord *record, char *fieldName) -/* Find field if it exists in self or ancestors. */ -{ -struct tdbRecord *p; -for (p = record; p != NULL; p = p->parent) - { - struct tdbField *field = tdbRecordField(p, fieldName); - if (field != NULL) - return field; - } -return NULL; -} - -static char *findFieldValInSelfOrParents(struct tdbRecord *record, char *fieldName) -/* Find value of given field if it exists in self or ancestors. Return NULL if - * field does not exist. */ -{ -struct tdbField *field = findFieldInSelfOrParents(record, fieldName); -return (field != NULL ? field->val : NULL); -} - -static void checkDupeKeys(struct tdbRecord *recordList, boolean checkRelease) +static void checkDupeKeys(struct tdbRecord *recordList, boolean checkRelease, + unsigned currentReleaseBit) /* Make sure that there are no duplicate records (with keys) */ { struct tdbRecord *record; struct hash *uniqHash = hashNew(0); for (record = recordList; record != NULL; record = record->next) { char *key = record->key; if (key != NULL) { struct hashEl *hel; for (hel = hashLookup(uniqHash, key); hel != NULL; hel = hashLookupNext(hel)) { struct tdbRecord *oldRecord = hel->val; struct tdbFilePos *oldPos = oldRecord->posList; struct tdbFilePos *newPos = record->posList; boolean doAbort = TRUE; if (checkRelease) - { - doAbort = FALSE; - char *oldRelease = findFieldValInSelfOrParents(oldRecord, "release"); - char *newRelease = findFieldValInSelfOrParents(record, "release"); - doAbort = compatibleReleases(oldRelease, newRelease); - } + doAbort = compatibleReleases(oldRecord, record, currentReleaseBit); if (doAbort) { char *oldRelease = tdbRecordFieldVal(oldRecord, "release"); char *newRelease = tdbRecordFieldVal(record, "release"); if (newRelease == NULL && oldRelease != NULL) { errAbort("Have release tag for track %s at line %d of %s, but not " "at line %d of %s", key, oldPos->startLineIx, oldPos->fileName, newPos->startLineIx, newPos->fileName); } else if (oldRelease == NULL && newRelease != NULL) { errAbort("Have release tag for track %s at line %d of %s, but not " "at line %d of %s", @@ -576,126 +549,127 @@ * distance gets pretty big. Would be flaky on records split across * different files, hence the ad-hoc in the name. Not worth implementing * something that handles this though with the hope that the parent/child * relationship will become indentation rather than ID based. */ { struct tdbFilePos *parentFp = parent->posList, *childFp = child->posList; if (!sameString(parentFp->fileName, childFp->fileName)) return BIGNUM/2; int distance = childFp->startLineIx - parentFp->startLineIx; if (distance < 0) return BIGNUM/4 - distance; return distance; } static struct tdbRecord *findParent(struct tdbRecord *rec, - char *parentFieldName, struct hash *hash) + char *parentFieldName, struct hash *hash, + unsigned currentReleaseBit) /* Find parent record if possible. This is a bit complicated by wanting to * match parents and children from the same release if possible. Our * strategy is to just ignore records from the wrong release. */ { -char *release = tdbRecordFieldVal(rec, "release"); if (clNoCompSub) return NULL; struct tdbField *parentField = tdbRecordField(rec, parentFieldName); if (parentField == NULL) return NULL; char *parentLine = parentField->val; int len = strlen(parentLine); char buf[len+1]; strcpy(buf, parentLine); char *parentName = firstWordInLine(buf); struct hashEl *hel; struct tdbRecord *closestParent = NULL; int closestDistance = BIGNUM; for (hel = hashLookup(hash, parentName); hel != NULL; hel = hashLookupNext(hel)) { struct tdbRecord *parent = hel->val; - if (compatibleReleases(release, tdbRecordFieldVal(parent, "release"))) + if (compatibleReleases(rec, parent, currentReleaseBit)) { int distance = parentChildFileDistance(parent, rec); if (distance < closestDistance) { closestParent = parent; closestDistance = distance; } } } if (closestParent != NULL) return closestParent; -recordWarn(rec, "parent %s of %s release %s doesn't exist", parentName, rec->key, naForNull(release)); +recordWarn(rec, "parent %s of %s release %s doesn't exist", parentName, rec->key, + naForNull(tdbRecordFieldVal(rec, "release"))); return NULL; } static void linkUpParents(struct tdbRecord *list, char *parentField, unsigned currentReleaseBit) /* Link up records according to parent/child relationships. */ { /* Zero out children, parent, and older sibling fields, since going to recalculate * them and need lists to start out empty. */ struct tdbRecord *rec; for (rec = list; rec != NULL; rec = rec->next) rec->parent = rec->olderSibling = rec->children = NULL; /* Build up hash of records indexed by key field. */ struct hash *hash = hashNew(0); for (rec = list; rec != NULL; rec = rec->next) { if (rec->key != NULL) hashAdd(hash, rec->key, rec); } /* Scan through linking up parents. */ for (rec = list; rec != NULL; rec = rec->next) { - struct tdbRecord *parent = findParent(rec, parentField, hash); + struct tdbRecord *parent = findParent(rec, parentField, hash, currentReleaseBit); if (parent != NULL) { rec->parent = parent; rec->olderSibling = parent->children; parent->children = rec; } } hashFree(&hash); } struct tdbRecord *tdbsForDbPath(struct dbPath *p, struct lm *lm, char *parentField, unsigned currentReleaseBit) /* Assemble recordList for given database. This looks at the root/organism/assembly * levels. It returns a list of records. */ { struct hash *recordHash = hashNew(0); struct slName *fileLevelList = dbPathToFiles(p), *fileLevel; struct tdbRecord *recordList = NULL; for (fileLevel = fileLevelList; fileLevel != NULL; fileLevel = fileLevel->next) { char *fileName = fileLevel->name; struct tdbRecord *fileRecords = readStartingFromFile(fileName, lm); verbose(2, "Read %d records starting from %s\n", slCount(fileRecords), fileName); fileRecords = filterOnRelease(fileRecords, currentReleaseBit); verbose(2, "After filterOnRelease %d records\n", slCount(fileRecords)); linkUpParents(fileRecords, parentField, currentReleaseBit); - checkDupeKeys(fileRecords, TRUE); + checkDupeKeys(fileRecords, TRUE, currentReleaseBit); struct tdbRecord *record, *nextRecord; for (record = fileRecords; record != NULL; record = nextRecord) { nextRecord = record->next; char *key = record->key; struct tdbRecord *oldRecord = hashFindVal(recordHash, key); - if (oldRecord != NULL && sameKeyCompatibleRelease(record, oldRecord)) + if (oldRecord != NULL && sameKeyCompatibleRelease(record, oldRecord, currentReleaseBit)) { if (!record->override) { oldRecord->fieldList = record->fieldList; oldRecord->posList = record->posList; } else mergeRecords(oldRecord, record, glKeyField, lm); } else { hashAdd(recordHash, record->key, record); slAddHead(&recordList, record); } } @@ -1024,31 +998,31 @@ /* Loop through each database. */ int matchCount = 0; struct dyString *fileString = dyStringNew(0); /* Buffer for file field. */ for (dbOrder = dbOrderList; dbOrder != NULL; dbOrder = dbOrder->next) { struct lm *lm = lmInit(0); struct dbPath *p = dbOrder->val; char *db = p->db; struct tdbRecord *recordList = tdbsForDbPath(p, lm, "parent", releaseBit); verbose(2, "Composed %d records from %s\n", slCount(recordList), db); inheritFromParents(recordList, "parent", "noInherit", releaseBit, lm); linkUpParents(recordList, "parent", releaseBit); - checkDupeKeys(recordList, FALSE); + checkDupeKeys(recordList, FALSE, releaseBit); if (clCheck) doRecordChecks(recordList, lm); struct tdbRecord *record; boolean doSelect = sameString(rql->command, "select"); for (record = recordList; record != NULL; record = record->next) { /* Add "db" field, making sure it doesn't already exist. */ struct tdbField *dbField = tdbRecordField(record, "db"); if (dbField != NULL) recordAbort(record, "using reserved field 'db'"); dbField = tdbFieldNew("db", db, lm); slAddHead(&record->fieldList, dbField);