1c7eb8c5ac6e94dbfd2a0aca80c5c1e585602e60 wong Mon Oct 31 14:22:12 2011 -0700 using correct path from dropped files diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py index 21db35d..0d01c9b 100644 --- python/lib/ucscgenomics/mkChangeNotes.py +++ python/lib/ucscgenomics/mkChangeNotes.py @@ -1,17 +1,17 @@ #!/hive/groups/encode/dcc/bin/python -import sys, os, re, argparse, subprocess, math, datetime +import sys, os, re, argparse, subprocess, math, datetime, time from ucscgenomics import ra, track, qa, ucscUtils class makeNotes(object): def checkMetaDbForFiles(self, status, state): if state == 'new': (mdb, files, loose) = (self.newMdb, self.newReleaseFiles, self.loose) elif state == 'old': (mdb, files, loose) = (self.oldMdb, self.oldReleaseFiles, self.loose) errors = [] revokedset = set() revokedfiles = set() atticset = set() supplementalset = set() filtermdb = ra.RaFile() @@ -33,30 +33,32 @@ if 'attic' in j: atticset.add(j.name) else: #pass if loose and re.match('.*bai', i): pass else: errors.append("metaDb: %s is not mentioned in %s" % (i, status)) return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors) def __checkAlphaForDropped(self, status, type): errors=[] diff = set(self.oldMdb) - set(self.newMdb) for i in diff: + if re.match('.*MAGIC.*', i): + continue errors.append("%s: %s missing from %s" % (type, i, status)) return errors def __checkFilesForDropped(self): diff = set(self.oldReleaseFiles) - set(self.newReleaseFiles) return diff def checkTableStatus(self, status, state): errors=[] revokedset = set() (database, composite, loose) = (self.database, self.composite, self.loose) if state == 'new': (mdb, files, revokedset) = (self.newMdb, self.newReleaseFiles, self.revokedSet) elif state == 'old': (mdb, files) = (self.oldMdb, self.oldReleaseFiles) @@ -367,48 +369,47 @@ removedSupp = oldSupplementalSet - newSupplementalSet untouchedSupp = oldSupplementalSet & newSupplementalSet allOther = additionalList | oldAdditionalList removedOther = oldAdditionalList - additionalList output.extend(self.__qaHeader(output, newTableSet, filesNoRevoke, newGbdbSet, newSupp, additionalList, revokedTableSet, revokedFiles, revokedGbdbs, pushFiles, pushGbdbs, args, c)) output.extend(self.__printSection(pushTables, untouchedTables, revokedTableSet, allTables, "tables", 0, args['summary'])) output.extend(self.__printSection(pushFiles, untouchedFiles, revokedFiles, allFiles, "download", self.releasePath, args['summary'])) output.extend(self.__printSection(pushGbdbs, untouchedGbdbs, revokedGbdbs, allGbdbs, "gbdbs", self.gbdbPath, args['summary'])) output.extend(self.__printSection(newSupp, untouchedSupp, removedSupp, allSupp, "supplemental", self.releasePath, args['summary'])) #These attributes are the critical ones that are used by qaInit, others could potentially use these also. - otherprint = len(allOther) if otherprint: output.append("\n") output.append("OTHER FILES:") output.append("New: %s" % len(additionalList)) output.append("Revoked/Replace: %s" % len(removedOther)) output.append("Total: %s" % len(allOther)) if otherprint and not args['summary']: output.append("") output.append("New Other Files (%s):" % len(additionalList)) output.extend(sorted(list(self.newOthers))) output.append("") output.append("Revoked Other Files (%s):" % len(removedOther)) output.extend(ucscUtils.printIter((removedOther), self.releasePath)) output.append("\n") - output.extend(self.__addMissingToReport(missingFiles, "Files", self.releasePath)) + output.extend(self.__addMissingToReport(missingFiles, "Files", self.releasePathOld)) output.append("\n") output.extend(self.__addMissingToReport(self.droppedTables, "Tables")) if not args['ignore']: output.append("No Errors") else: output.append("The counts here were generated by ignoring errors, they may not be correct") return output def __printSectionOne(self, output, set, title): output = [] if set: output.append("%s (%s):" % (title, len(set))) output.extend(sorted(list(set))) return output @@ -448,31 +449,31 @@ errorsDict = {} output = [] for i in errors: line = i.split(":", 1) try: errorsDict[line[0]].append(line[1]) except: errorsDict[line[0]] = [] errorsDict[line[0]].append(line[1]) output.append("Errors (%s):" % len(errors)) for i in sorted(errorsDict.keys()): output.append("%s:" % i) for j in sorted(errorsDict[i]): output.append("%s" % j) output.append("\n") - output.extend(self.__addMissingToReport(missingFiles, "Files", self.releasePath)) + output.extend(self.__addMissingToReport(missingFiles, "Files", self.releasePathOld)) output.append("\n") output.extend(self.__addMissingToReport(self.droppedTables, "Tables")) return output def __init__(self, args): self.releaseNew = args['releaseNew'] self.releaseOld = args['releaseOld'] self.database = args['database'] self.composite = args['composite'] self.loose = args['loose'] self.ignore = args['ignore'] self.summary = args['summary'] self.specialMdb = args['specialMdb'] self.args = args @@ -486,30 +487,31 @@ self.releaseOlf = 'solo' elif self.releaseOld > self.releaseNew: self.releaseOld = 'solo' self.releasePath = c.httpDownloadsPath + 'release' + args['releaseNew'] self.gbdbPath = "/gbdb/%s/bbi" % args['database'] self.trackDbFile = c.currentTrackDb if not self.trackDbFile: errors.append("track: There is no entry in trackDb.wgEncode.ra for %s with the alpha tag" % self.composite) else: self.trackDb = ra.RaFile(self.trackDbFile) if int(self.releaseNew) > 1 and str(self.releaseOld) != 'solo': self.newReleaseFiles = c.releases[int(self.releaseNew)-1] self.oldReleaseFiles = c.releases[int(self.releaseOld)-1] + self.releasePathOld = c.httpDownloadsPath + 'release' + args['releaseOld'] self.newMdb = c.alphaMetaDb self.oldMdb = c.publicMetaDb #make a list of missing files self.missingFiles = self.__checkFilesForDropped() #filter them out of old release files #check if all files listed in release directories have associated metaDb entries (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new") (self.oldMdb, spam, eggs, ham, self.oldSupplementalSet, oldFileErrors) = self.checkMetaDbForFiles("public metaDb", "old")