b4be49d733d0d35ed6e71ddbeede17ff008004a7 wong Wed Nov 23 15:54:53 2011 -0800 changes for TrackReport diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py index a0b2b23..3a4b52e 100644 --- python/lib/ucscgenomics/mkChangeNotes.py +++ python/lib/ucscgenomics/mkChangeNotes.py @@ -242,37 +242,38 @@ if bytes >= (1024**2): terabytes = bytes / (1024**2) size = '%.2f TB' % terabytes elif bytes >= (1024): gigabytes = bytes / (1024) size = '%.0f GB' % gigabytes else: return 0 return size def __printSize(self, size, output, totalsize, type): nicesize = self.__determineNiceSize(size) if nicesize: - output.append("%s: %d MB (%s)" % (type, size, nicesize)) + strout = "%s: %d MB (%s)" % (type, size, nicesize) + output.append(strout) else: - output.append("%s: %d MB" % (type, size)) - + strout = "%s: %d MB" % (type, size) + output.append(strout) totalsize = totalsize + size - return (output, totalsize) + return (output, totalsize, strout) def __printSection(self, new, untouched, revoked, all, title, path, summary): output = [] removeline = "Revoked/Replaced/Renamed" totaline = "Total (New + Untouched + Revoked/Replaced/Renamed)" caps = title.upper() if title == "supplemental": removeline = "Removed" totaline = "Total" title = title + " files" caps = title.upper() elif title == 'gbdbs': caps = "GBDBS" title = "gbdb files" elif title == "download": @@ -325,36 +326,42 @@ output.append("Tables: %d" % int(len(newTableSet))) output.append("Files: %d" % int(len(filesNoRevoke))) output.append("Gbdbs: %d" % int(len(newGbdbSet))) output.append("Supplemental: %d" % int(len(newSupp))) output.append("Other: %d" % int(len(additionalList))) output.append("") output.append("REVOKED:") output.append("Tables: %s" % len(revokedTables)) output.append("Files: %s" % len(revokedFiles)) output.append("Gbdbs: %s" % len(revokedGbdbs)) output.append("") output.append("Sizes of New:") totalsize = 0 - (output, totalsize) = self.__printSize(tableSize, output, totalsize, "Table") - (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(pushFiles, self.releasePath)), output, totalsize, "Files") - (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(pushGbdbs, self.releasePath)), output, totalsize, "Gbdbs") - (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(newSupp, self.releasePath)), output, totalsize, "Supplemental") - (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(additionalList, self.releasePath)), output, totalsize, "Other") - (output, totalsize) = self.__printSize(totalsize, output, 0, "Total") + (output, totalsize, strout) = self.__printSize(tableSize, output, totalsize, "Table") + self.totalTableSize = strout + (output, totalsize, strout) = self.__printSize(int(ucscUtils.makeFileSizes(pushFiles, self.releasePath)), output, totalsize, "Files") + self.totalFilesSize = strout + (output, totalsize, strout) = self.__printSize(int(ucscUtils.makeFileSizes(pushGbdbs, self.releasePath)), output, totalsize, "Gbdbs") + self.totalGbdbsSize = strout + (output, totalsize, strout) = self.__printSize(int(ucscUtils.makeFileSizes(newSupp, self.releasePath)), output, totalsize, "Supplemental") + self.totalSupplementalSize = strout + (output, totalsize, strout) = self.__printSize(int(ucscUtils.makeFileSizes(additionalList, self.releasePath)), output, totalsize, "Other") + self.totalAdditionalSize = strout + (output, totalsize, strout) = self.__printSize(totalsize, output, 0, "Total") + self.totalEverythingSize = strout output.append("") return output def __addMissingToReport(self, missing, type, path=None): output = [] if missing: output.append("%s that dropped between releases (%s):" % (type, len(missing))) output.extend(ucscUtils.printIter(missing, path)) output.append("\n") return output def __checkAtticNotInTrackDb(self): errors = [] atticTables = self.newMdb.filter(lambda s: s['objType'] == 'table' and 'attic' in s, lambda s: s['tableName']) @@ -503,54 +510,61 @@ errors = [] c = track.CompositeTrack(self.database, self.composite, None, self.specialMdb) #sanitize arguments if not self.releaseOld.isdigit(): self.releaseOld = 'solo' elif int(self.releaseOld) <= 0: self.releaseOlf = 'solo' elif self.releaseOld > self.releaseNew: self.releaseOld = 'solo' self.releasePath = c.httpDownloadsPath + 'release' + args['releaseNew'] self.gbdbPath = "/gbdb/%s/bbi" % args['database'] self.trackDbFile = c.currentTrackDb if not self.trackDbFile: + self.trackDb = None errors.append("track: There is no entry in trackDb.wgEncode.ra for %s with the alpha tag" % self.composite) else: self.trackDb = ra.RaFile(self.trackDbFile) + if int(self.releaseNew) > 1 and str(self.releaseOld) != 'solo': self.newReleaseFiles = c.releases[int(self.releaseNew)-1] self.oldReleaseFiles = c.releases[int(self.releaseOld)-1] self.releasePathOld = c.httpDownloadsPath + 'release' + args['releaseOld'] self.newMdb = c.alphaMetaDb self.oldMdb = c.publicMetaDb + + #make a list of missing files self.missingFiles = self.__checkFilesForDropped() #filter them out of old release files #check if all files listed in release directories have associated metaDb entries (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new") (self.oldMdb, spam, eggs, ham, self.oldSupplementalSet, oldFileErrors) = self.checkMetaDbForFiles("public metaDb", "old") + self.expIds = set(self.newMdb.filter(lambda s: 'expId' in s, lambda s: s['expId'])) + #check that attic fiels aren't in trackDb + if self.trackDb: errors.extend(self.__checkAtticNotInTrackDb()) #checks to see that nothing has disappeared between public and alpha errors.extend(self.__checkAlphaForDropped("alpha metaDb", "stanza")) errors.extend(self.__checkMd5sums()) #checks and gets tables that are present, also returns a revoked set of tables for new (self.newTableSet, self.revokedTableSet, self.newMissingTables, newTableError) = self.checkTableStatus("alpha metaDb", "new") (self.oldTableSet, spam, self.droppedTables, oldTableError) = self.checkTableStatus("public metaDb", "old") #same as above except for gbdbs (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new") (self.oldGbdbSet, eggs, oldGbdbError) = self.getGbdbFiles("old") @@ -591,37 +605,40 @@ self.newSupplemental = set(ucscUtils.printIter(self.newSupp, self.releasePath)) self.newOthers = set(ucscUtils.printIter(self.additionalList, self.releasePath)) self.errors = errors #don't output.append(report unless ignore option is on or no errors #module mode doesn't generate output by default if (not errors) or self.ignore: self.output = self.printReport(args, c) else: self.output = self.printErrors(errors, self.missingFiles) elif self.releaseOld == 'solo': self.newReleaseFiles = c.releases[int(self.releaseNew)-1] + self.oldReleaseFiles = set() self.newMdb = c.alphaMetaDb #check that attic fiels aren't in trackDb + if self.trackDb: errors.extend(self.__checkAtticNotInTrackDb()) (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new") + self.expIds = set(self.newMdb.filter(lambda s: 'expId' in s, lambda s: s['expId'])) (self.newTableSet, self.revokedTableSet, spam, newTableError) = self.checkTableStatus("alpha metaDb", "new") self.tableSize = self.__getTableSize() (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new") #collect errors errors.extend(newFileErrors) errors.extend(newTableError) errors.extend(newGbdbError) #set for easy operations totalFiles = set(self.newReleaseFiles)