d8070c1d0f2f1edfcd758e110add7f74b6b9af43 wong Mon Oct 31 15:37:00 2011 -0700 added some more human readable stuff, and put in a precheck for a condition that won't exist in release 1 mode. diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py index 0d01c9b..ab582db 100644 --- python/lib/ucscgenomics/mkChangeNotes.py +++ python/lib/ucscgenomics/mkChangeNotes.py @@ -113,30 +113,32 @@ for i in missingTableNames: errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status)) missingFromDb = mdbtableset - sqltableset if missingFromDb: for i in missingFromDb: errors.append("table: %s table not found in Db called by %s" % (i, status)) return (mdbtableset, revokedtableset, missingFromDb, errors) def __checkGbdbFileStatus(self, i, set, errors, state): filelist = i['fileName'].split(',') #preprocess filelist, delete after bai in mdb issue is revoled #print filelist[0] + + if self.loose: if re.match('\S+.bam', filelist[0]) and filelist[0] in self.oldReleaseFiles and (filelist[0] + '.bai') not in filelist: filelist.append(filelist[0] + '.bai') for j in filelist: if ucscUtils.isGbdbFile(j, i['tableName'], self.database): set.add(j) else: errors.append("gbdb: %s%s does not exist in %s" % (state, j, self.gbdbPath)) return set, errors def getGbdbFiles(self, state): revokedset = set() if state == 'new': (tableset, revokedset, mdb) = (self.newTableSet, self.revokedSet, self.newMdb) elif state == 'old': (tableset, mdb) = (self.oldTableSet, self.oldMdb) @@ -223,83 +225,99 @@ for i in oldReleaseFiles: if not re.match('wgEncode.*', i): if i in totalFiles: pass elif i in newSupplementalSet: continue else: oldAdditionalList.add(i) else: newOld.add(i) oldReleaseFiles = newOld return(newOld, additionalList, oldAdditionalList, newTotal) + def __determineNiceSize(self, bytes): + + bytes = float(bytes) + if bytes >= (1024**2): + terabytes = bytes / (1024**2) + size = '%.2f TB' % terabytes + + elif bytes >= (1024): + gigabytes = bytes / (1024) + size = '%.0f GB' % gigabytes + else: + return 0 + return size + def __printSize(self, size, output, totalsize, type): - sizeGb = int(size/1024) - if sizeGb > 1: - output.append("%s: %d MB (%d GB)" % (type, size, sizeGb)) + nicesize = self.__determineNiceSize(size) + if nicesize: + output.append("%s: %d MB (%s)" % (type, size, nicesize)) else: output.append("%s: %d MB" % (type, size)) totalsize = totalsize + size return (output, totalsize) def __printSection(self, new, untouched, revoked, all, title, path, summary): output = [] removeline = "Revoked/Replaced/Renamed" totaline = "Total (New + Untouched + Revoked/Replaced/Renamed)" caps = title.upper() if title == "supplemental": removeline = "Removed" totaline = "Total" title = title + " files" caps = title.upper() elif title == 'gbdbs': caps = "GBDBS" title = "gbdb files" elif title == "download": title = title + " files" caps = title.upper() if all: - output.append("\n") + output.append("") output.append("%s:" % caps) output.append("New: %s" % len(new)) output.append("Untouched: %s" % len(untouched)) output.append("%s: %s" % (removeline, len(revoked))) output.append("New + Untouched: %s" % len(new | untouched)) output.append("%s: %s" % (totaline, len(all))) intersect = new & revoked if intersect: output.append("") output.append("These %s objects exist in both new and revoked %s:" % (len(intersect), title)) for i in intersect: output.append("%s" % i) if all and not summary: output.append("") output.append("New %s (%s):" % (title.title(), len(new))) output.extend(ucscUtils.printIter(new, path)) output.append("") output.append("Untouched %s (%s):" % (title.title(), len(untouched))) output.extend(ucscUtils.printIter(untouched, path)) output.append("") output.append("%s %s (%s):" % (removeline, title.title(), len(revoked))) output.extend(ucscUtils.printIter(revoked, path)) + if all: + output.append("") return output def __qaHeader(self, output, newTableSet, filesNoRevoke, newGbdbSet, newSupp, additionalList, revokedTables, revokedFiles, revokedGbdbs, pushFiles, pushGbdbs, args, c): output = [] tableSize = self.__getTableSize() output.append("mkChangeNotes v2") title = "%s %s Release %s" % (args['database'], args['composite'], args['releaseNew']) if args['releaseOld'] != "solo": title = title + " vs Release %s" % args['releaseOld'] if args['summary']: title = "Summary for " + title output.append(title) d = datetime.date.today() output.append("%s" % str(d)) @@ -314,30 +332,31 @@ output.append("REVOKED:") output.append("Tables: %s" % len(revokedTables)) output.append("Files: %s" % len(revokedFiles)) output.append("Gbdbs: %s" % len(revokedGbdbs)) output.append("") output.append("Sizes of New:") totalsize = 0 (output, totalsize) = self.__printSize(tableSize, output, totalsize, "Table") (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(pushFiles, self.releasePath)), output, totalsize, "Files") (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(pushGbdbs, self.releasePath)), output, totalsize, "Gbdbs") (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(newSupp, self.releasePath)), output, totalsize, "Supplemental") (output, totalsize) = self.__printSize(int(ucscUtils.makeFileSizes(additionalList, self.releasePath)), output, totalsize, "Other") (output, totalsize) = self.__printSize(totalsize, output, 0, "Total") + output.append("") return output def __addMissingToReport(self, missing, type, path=None): output = [] if missing: output.append("%s that dropped between releases (%s):" % (type, len(missing))) output.extend(ucscUtils.printIter(missing, path)) output.append("\n") return output def __checkAtticNotInTrackDb(self): errors = [] atticTables = self.newMdb.filter(lambda s: s['objType'] == 'table' and 'attic' in s, lambda s: s['tableName']) for i in atticTables: @@ -400,30 +419,33 @@ output.extend(self.__addMissingToReport(missingFiles, "Files", self.releasePathOld)) output.append("\n") output.extend(self.__addMissingToReport(self.droppedTables, "Tables")) if not args['ignore']: output.append("No Errors") else: output.append("The counts here were generated by ignoring errors, they may not be correct") return output def __printSectionOne(self, output, set, title): output = [] if set: output.append("%s (%s):" % (title, len(set))) output.extend(sorted(list(set))) + else: + return output + output.append("\n") return output def printReportOne(self, args, c): (totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, atticSet, newSupplementalSet, tableSize) = (self.totalFiles, self.revokedFiles, self.newGbdbSet, self.revokedGbdbs, self.newTableSet, self.revokedTableSet, self.additionalList, self.atticSet, self.newSupplementalSet, self.tableSize) output = [] newTables = newTableSet - revokedTables newFiles = totalFiles - revokedFiles newGbdbs = newGbdbSet - revokedGbdbs output.extend(self.__qaHeader(output, newTables, newFiles, newGbdbSet, newSupplementalSet, additionalList, revokedTables, revokedFiles, revokedGbdbs, totalFiles, newGbdbSet, args, c)) self.newTables = set(newTables) self.newFiles = set(ucscUtils.printIter(newFiles, self.releasePath)) self.newGbdbs = set(ucscUtils.printIter(newGbdbs, self.releasePath)) self.newSupplemental = set(ucscUtils.printIter(newSupplementalSet, self.releasePath)) self.newOthers = set(ucscUtils.printIter(additionalList, self.releasePath))