c67fe99ceeee00960c2a79bb5deb4b8b9fd26fed wong Fri Oct 21 10:35:22 2011 -0700 refactored to allow for using unittest to unit test diff --git python/programs/mkChangeNotes/mkChangeNotes python/programs/mkChangeNotes/mkChangeNotes index f76cecd..dfb7323 100755 --- python/programs/mkChangeNotes/mkChangeNotes +++ python/programs/mkChangeNotes/mkChangeNotes @@ -1,633 +1,658 @@ #!/hive/groups/encode/dcc/bin/python import sys, os, re, argparse, subprocess, math from ucscgenomics import ra, track -def checkMetaDbForFiles(mdb, files, status, loose): +class mkChangeNotes(object): + def checkMetaDbForFiles(self, mdb, files, status, loose): errors = [] revokedset = set() revokedfiles = set() atticset = set() supplementalset = set() filtermdb = ra.RaFile() for i in files: if re.match('supplemental', i): supplementalset.add(i) if not re.match('wgEncode.*', i): continue filestanza = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s) #should only return 1, just in case if filestanza: for j in filestanza: filtermdb[j.name] = j if 'objStatus' in j and re.search('revoked|replaced|renamed', j['objStatus']): revokedfiles.add(i) revokedset.add(j.name) if 'attic' in j: atticset.add(j.name) else: #pass if loose and re.match('.*bai', i): pass else: errors.append("metaDb: %s is not mentioned in %s" % (i, status)) return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors) -def checkAlphaForDropped(new, old, status, type): + def checkAlphaForDropped(self, new, old, status, type): errors=[] diff = set(old) -set(new) for i in diff: errors.append("%s: %s missing from %s" % (type, i, status)) return errors -def checkFilesForDropped(new, old): + def checkFilesForDropped(self, new, old): diff = set(old) - set(new) return diff -def checkTableStatus(mdb, files, database, composite, status, loose, revokedset): + def checkTableStatus(self, mdb, files, database, composite, status, loose, revokedset): errors=[] #home = os.environ['HOME'] #dbhost = '' #dbuser = '' #dbpassword = '' #p = re.compile('db.(\S+)=(\S+)') #with open("%s/.hg.conf" % home) as f: # for line in f: # line.rstrip("\n\r") # if p.match(line): # m = p.match(line) # if m.groups(1)[0] == 'host': # dbhost = m.groups(1)[1] # if m.groups(1)[0] == 'user': # dbuser = m.groups(1)[1] # if m.groups(1)[0] == 'password': # dbpassword = m.groups(1)[1] - #print dbhost - #print dbuser - #print dbpassword #db = MySQLdb.connect (host = dbhost, # user = dbuser, # passwd = dbpassword, # db = database) #cursor = db.cursor () #cursor.execute ("show tables like '%s%s'" % (composite, "%")) #tableset = set(cursor.fetchall()) mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject'])) mdbtableset = mdbtableset - revokedset mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbtableset, lambda s: s['tableName'])) revokedtableset = set(mdb.filter(lambda s: s['metaObject'] in revokedset, lambda s: s['tableName'])) sep = "','" tablestr = sep.join(mdbtableset) tablestr = "'" + tablestr + "'" #this should really be using python's database module, but I'd need admin access to install it #at this point, I am just parsing the output form hgsql cmd = "hgsql %s -e \"select table_name from information_schema.TABLES where table_name in (%s)\"" % (database, tablestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() sqltableset = set(output.split("\n")[1:]) missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject'])) missingFromDb = mdbtableset - sqltableset if missingTableNames: for i in missingTableNames: errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status)) if missingFromDb: for i in missingFromDb: errors.append("table: %s table not found in Db called by %s" % (i, status)) return (mdbtableset, revokedtableset, errors) -def getGbdbFiles(database, tableset, revokedset, mdb): + def getGbdbFiles(self, database, tableset, revokedset, mdb): errors = [] sep = "','" tablestr = sep.join(tableset) tablestr = "'" + tablestr + "'" revokestr = sep.join(revokedset) revokestr = "'" + revokestr + "'" cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, tablestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() gbdbtableset = set(output.split("\n")[1:]) cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, revokestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() revokedtableset = set(output.split("\n")[1:]) file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s) revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s) gbdbfileset = set() revokedfileset = set() for i in file1stanzalist: filelist = i['fileName'].split(',') for j in filelist: if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): gbdbfileset.add(j) else: errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, database)) for i in revokedstanzalist: filelist = i['fileName'].split(',') for j in filelist: if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): revokedfileset.add(j) else: errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, database)) return (gbdbfileset, revokedfileset, errors) -def getTableSize(mdbtableset, database): + def getTableSize(self, mdbtableset, database): tablesize = float(0) tablelist = list() for i in mdbtableset: tablelist.append("table_name = '%s'" % i) orsep = " OR " orstr = orsep.join(tablelist) cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE table_name = %s\"" % (database, orstr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() for i in output.split("\n")[1:]: fields = i.split() for j in fields: tablesize = tablesize + float(j) return math.ceil(tablesize) -def checkMd5sums(newfiles, oldfiles, loose): + def checkMd5sums(self, newfiles, oldfiles, loose): errors = [] for i in oldfiles: if i not in newfiles: pass elif re.match('wgEncode.*', i): if oldfiles[i].md5sum != newfiles[i].md5sum: errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum)) if loose: return list() else: return errors -def makeFileSizes(c, args, inlist): + def makeFileSizes(self, c, args, inlist): checklist = list() for i in inlist: - checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) + checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args['releaseNew'], i)) filesizes = 0 for i in checklist: realpath = os.path.realpath(i) filesizes = filesizes + int(os.path.getsize(realpath)) filesizes = math.ceil(float(filesizes) / (1024**2)) return int(filesizes) -def cleanSpecialFiles(inlist): + def cleanSpecialFiles(self, inlist): specialRemoveList = ['md5sum.history'] for i in specialRemoveList: if i in inlist: inlist.remove(i) return(inlist) -def separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet): + def separateOutAdditional(self, oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet): additionalList = set() oldAdditionalList = set() newTotal = set() newOld = set() for i in totalFiles: if i in newSupplementalSet: continue elif not re.match('wgEncode.*', i): additionalList.add(i) else: newTotal.add(i) for i in oldReleaseFiles: if not re.match('wgEncode.*', i): if i in totalFiles: pass elif i in newSupplementalSet: continue else: oldAdditionalList.add(i) else: newOld.add(i) oldReleaseFiles = newOld return(newOld, additionalList, oldAdditionalList, newTotal) -def printWithPath(set, c, release): + def printWithPath(self, set, c, release): + output = [] for i in sorted(set): - print "%s/%s" % (c.httpDownloadsPath + 'release' + release, i) - -def printGbdbPath(set, database): + output.append("%s/%s" % (c.httpDownloadsPath + 'release' + release, i)) + return output + def printGbdbPath(self, set, database): + output = [] for i in sorted(set): - print "/gbdb/%s/bbi/%s" % (database, i) + output.append("/gbdb/%s/bbi/%s" % (database, i)) + return output -def printIter(inlist): + def printIter(self, inlist): + output = [] for i in sorted(inlist): - print i + output.append(i) + return output -def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, mdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize): + def printReport(self, args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, mdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize): #the groups here need to be predefined, I just copied and pasted after working out what they were sep = "\n" - print "mkChangeNotes v2" - print "%s %s Release %s vs Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld) - print "" - print "QA Count Summaries for Release %s:" % args.releaseNew - print "Tables: %d" % int(len(newTableSet)) - print "Files: %d" % int(len(totalFiles - revokedFiles)) - print "Gbdbs: %d" % int(len(newGbdbSet)) - print "Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet)) - print "Other: %d" % int(len(additionalList)) - print "\n" + output = [] + output.append("mkChangeNotes v2") + output.append("%s %s Release %s vs Release %s" % (args['database'], args['composite'], args['releaseNew'], args['releaseOld'])) + output.append("") + output.append("QA Count Summaries for Release %s:" % args['releaseNew']) + output.append("Tables: %d" % int(len(newTableSet))) + output.append("Files: %d" % int(len(totalFiles - revokedFiles))) + output.append("Gbdbs: %d" % int(len(newGbdbSet))) + output.append("Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet))) + output.append("Other: %d" % int(len(additionalList))) + output.append("\n") totalsize = 0 size = 0 - print "Sizes of New:" + output.append("Sizes of New:") tableGb = int(tableSize/1024) if tableGb > 1: - print "Tables: %d MB (%d GB)" % (tableSize, tableGb) + output.append("Tables: %d MB (%d GB)" % (tableSize, tableGb)) elif tableSize: - print "Tables: %d MB" % tableSize + output.append("Tables: %d MB" % tableSize) totalsize = totalsize + tableSize - size = int(makeFileSizes(c, args, pushFiles)) + size = int(self.makeFileSizes(c, args, pushFiles)) totalsize = totalsize + size if int(size/1024) > 1: - print "Files: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Files: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Files: %d MB" % size - size = int(makeFileSizes(c, args, pushGbdbs)) + output.append("Files: %d MB" % size) + size = int(self.makeFileSizes(c, args, pushGbdbs)) totalsize = totalsize + size if int(size/1024) > 1: - print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Gbdbs: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Gbdbs: %d MB" % size - size = int(makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet))) + output.append("Gbdbs: %d MB" % size) + size = int(self.makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet))) totalsize = totalsize + size if int(size/1024) > 1: - print "Supplemental: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Supplemental: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Supplemental: %d MB" % size - size = int(makeFileSizes(c, args, (additionalList))) + output.append("Supplemental: %d MB" % size) + size = int(self.makeFileSizes(c, args, (additionalList))) totalsize = totalsize + size if int(size/1024) > 1: - print "Other: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Other: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Other: %d MB" % size + output.append("Other: %d MB" % size) if int(totalsize/1024) > 1: - print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)) + output.append("Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024))) else: - print "Total: %d MB" % totalsize + output.append("Total: %d MB" % totalsize) tableprint = len(newTableSet | oldTableSet | revokedTableSet) if tableprint: - print "\n" - print "TABLES:" - print "New: %s" % len(pushTables) - print "Untouched: %s" % len(oldTableSet & newTableSet) - print "Revoked/Replaced/Renamed: %s" % len(revokedTableSet) - print "New + Untouched: %s" % len(newTableSet) - print "Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet) - if tableprint and not args.summary: - print "" - print "New Tables (%s):" % len(pushTables) - printIter(pushTables) - print "" - print "Untouched (%s):" % len(oldTableSet & newTableSet) - printIter(oldTableSet & newTableSet) - print "" - print "Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet) - printIter(revokedTableSet) + output.append("\n") + output.append("TABLES:") + output.append("New: %s" % len(pushTables)) + output.append("Untouched: %s" % len(oldTableSet & newTableSet)) + output.append("Revoked/Replaced/Renamed: %s" % len(revokedTableSet)) + output.append("New + Untouched: %s" % len(newTableSet)) + output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet)) + if tableprint and not args['summary']: + output.append("") + output.append("New Tables (%s):" % len(pushTables)) + output.extend(self.printIter(pushTables)) + output.append("") + output.append("Untouched (%s):" % len(oldTableSet & newTableSet)) + output.extend(self.printIter(oldTableSet & newTableSet)) + output.append("") + output.append("Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet)) + output.extend(self.printIter(revokedTableSet)) dlprint = len(totalFiles | oldReleaseFiles | revokedFiles) if dlprint: - print "\n" + output.append("\n") #downlaodables = total - revoked - print "DOWNLOAD FILES:" - print "New: %s" % len(pushFiles - revokedFiles) - print "Untouched: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles) - print "Revoked/Replaced/Renamed: %s" % len(revokedFiles) - print "New + Untouched: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles)) - print "Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles) - if dlprint and not args.summary: - print "" - print "New Download Files (%s):" % len(pushFiles - revokedFiles) - printWithPath((pushFiles - revokedFiles), c, args.releaseNew) - print "" - print "Untouched Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles) - printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args.releaseNew) - print "" - print "Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles) - printWithPath(revokedFiles, c, args.releaseNew) + output.append("DOWNLOAD FILES:") + output.append("New: %s" % len(pushFiles - revokedFiles)) + output.append("Untouched: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles)) + output.append("Revoked/Replaced/Renamed: %s" % len(revokedFiles)) + output.append("New + Untouched: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles))) + output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles)) + if dlprint and not args['summary']: + output.append("") + output.append("New Download Files (%s):" % len(pushFiles - revokedFiles)) + output.extend(self.printWithPath((pushFiles - revokedFiles), c, args['releaseNew'])) + output.append("") + output.append("Untouched Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles)) + output.extend(self.printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args['releaseNew'])) + output.append("") + output.append("Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles)) + output.extend(self.printWithPath(revokedFiles, c, args['releaseNew'])) gbdbprint = len(newGbdbSet | oldGbdbSet | revokedGbdbs) if gbdbprint: - print "\n" - print "GBDBS:" - print "New: %s" % len(pushGbdbs) - print "Untouched: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs) - print "Revoked/Replaced/Renamed: %s" % len(revokedGbdbs) - print "New + Untouched: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs)) - print "Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs) - if gbdbprint and not args.summary: - print "" - print "New Gbdb Files (%s):" % len(pushGbdbs) - printGbdbPath(pushGbdbs, args.database) - print "" - print "Untouched Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs) - printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args.database) - print "" - print "Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs) - printGbdbPath(revokedGbdbs, args.database) + output.append("\n") + output.append("GBDBS:") + output.append("New: %s" % len(pushGbdbs)) + output.append("Untouched: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs)) + output.append("Revoked/Replaced/Renamed: %s" % len(revokedGbdbs)) + output.append("New + Untouched: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs))) + output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs)) + if gbdbprint and not args['summary']: + output.append("") + output.append("New Gbdb Files (%s):" % len(pushGbdbs)) + output.extend(self.printGbdbPath(pushGbdbs, args['database'])) + output.append("") + output.append("Untouched Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs)) + output.extend(self.printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args['database'])) + output.append("") + output.append("Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs)) + output.extend(self.printGbdbPath(revokedGbdbs, args['database'])) supplementalprint = len(newSupplementalSet | oldSupplementalSet) if supplementalprint: - print "\n" - print "SUPPLEMENTAL FILES:" - print "New: %s" % len(newSupplementalSet - oldSupplementalSet) - print "Untouched: %s" % len(oldSupplementalSet & newSupplementalSet) - print "Removed: %s" % len(oldSupplementalSet - newSupplementalSet) - print "New + Untouched: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet)) - print "Total: %s" % len(newSupplementalSet | oldSupplementalSet) - if supplementalprint and not args.summary: - print "" - print "New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet) - printWithPath(newSupplementalSet - oldSupplementalSet, c, args.releaseNew) - print "" - print "Untouched Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet) - printWithPath(oldSupplementalSet & newSupplementalSet, c, args.releaseNew) - print "" - print "Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet) - printWithPath(oldSupplementalSet - newSupplementalSet, c, args.releaseNew) + output.append("\n") + output.append("SUPPLEMENTAL FILES:") + output.append("New: %s" % len(newSupplementalSet - oldSupplementalSet)) + output.append("Untouched: %s" % len(oldSupplementalSet & newSupplementalSet)) + output.append("Removed: %s" % len(oldSupplementalSet - newSupplementalSet)) + output.append("New + Untouched: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet))) + output.append("Total: %s" % len(newSupplementalSet | oldSupplementalSet)) + if supplementalprint and not args['summary']: + output.append("") + output.append("New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet)) + output.extend(self.printWithPath(newSupplementalSet - oldSupplementalSet, c, args['releaseNew'])) + output.append("") + output.append("Untouched Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet)) + output.extend(self.printWithPath(oldSupplementalSet & newSupplementalSet, c, args['releaseNew'])) + output.append("") + output.append("Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet)) + output.extend(self.printWithPath(oldSupplementalSet - newSupplementalSet, c, args['releaseNew'])) otherprint = len(additionalList | oldAdditionalList) if otherprint: - print "\n" - print "OTHER FILES:" - print "New: %s" % len(additionalList | (additionalList & oldAdditionalList)) - print "Revoked/Replace: %s" % len(oldAdditionalList - additionalList) - print "Total: %s" % len(additionalList | oldAdditionalList) - if otherprint and not args.summary: - print "" - print "New Other Files (%s):" % len(additionalList | (additionalList & oldAdditionalList)) - printWithPath(additionalList, c, args.releaseNew) - print "" - print "Revoked Other Files (%s):" % len(oldAdditionalList - additionalList) - printWithPath(oldAdditionalList, c, args.releaseNew) - print "\n" + output.append("\n") + output.append("OTHER FILES:") + output.append("New: %s" % len(additionalList | (additionalList & oldAdditionalList))) + output.append("Revoked/Replace: %s" % len(oldAdditionalList - additionalList)) + output.append("Total: %s" % len(additionalList | oldAdditionalList)) + if otherprint and not args['summary']: + output.append("") + output.append("New Other Files (%s):" % len(additionalList | (additionalList & oldAdditionalList))) + output.extend(self.printWithPath(additionalList, c, args['releaseNew'])) + output.append("") + output.append("Revoked Other Files (%s):" % len(oldAdditionalList - additionalList)) + output.extend(self.printWithPath(oldAdditionalList, c, args['releaseNew'])) + output.append("\n") if len(missingFiles): - print "Files that dropped between releases (%s):" % len(missingFiles) - printWithPath(missingFiles, c, args.releaseOld) - print "\n" - if not args.ignore: - print "No Errors" - -def printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, c, atticSet, newSupplementalSet, tableSize): - print "mkChangeNotes v2" - print "%s %s Release %s" % (args.database, args.composite, args.releaseNew) - print "" - print "QA Count Summaries for Release %s:" % args.releaseNew - print "Tables: %d" % int(len(newTableSet - revokedTables)) - print "Files: %d" % int(len(totalFiles - revokedFiles)) - print "Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs)) - print "Supplemental: %d" % int(len(newSupplementalSet)) - print "Other: %d" % int(len(additionalList)) - print "" - print "REVOKED:" - print "Tables: %s" % len(revokedTables) - print "Files: %s" % len(revokedFiles) - print "Gbdbs: %s" % len(revokedGbdbs) - print "\n" + output.append("Files that dropped between releases (%s):" % len(missingFiles)) + output.extend(self.printWithPath(missingFiles, c, args['releaseOld'])) + output.append("\n") + if not args['ignore']: + output.append("No Errors") + return output + + def printReportOne(self, args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, c, atticSet, newSupplementalSet, tableSize): + output = [] + output.append("mkChangeNotes v2") + output.append("%s %s Release %s" % (args['database'], args['composite'], args['releaseNew'])) + output.append("") + output.append("QA Count Summaries for Release %s:" % args['releaseNew']) + output.append("Tables: %d" % int(len(newTableSet - revokedTables))) + output.append("Files: %d" % int(len(totalFiles - revokedFiles))) + output.append("Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs))) + output.append("Supplemental: %d" % int(len(newSupplementalSet))) + output.append("Other: %d" % int(len(additionalList))) + output.append("") + output.append("REVOKED:") + output.append("Tables: %s" % len(revokedTables)) + output.append("Files: %s" % len(revokedFiles)) + output.append("Gbdbs: %s" % len(revokedGbdbs)) + output.append("\n") totalsize = 0; - print "Sizes of New:" + output.append("Sizes of New:") tableGb = int(tableSize / 1024) if tableGb > 1: - print "Tables: %d MB (%d GB)" % (tableSize, tableGb) + output.append("Tables: %d MB (%d GB)" % (tableSize, tableGb)) else: - print "Tables: %d MB" % tableSize + output.append("Tables: %d MB" % tableSize) totalsize = totalsize + tableSize - size = int(makeFileSizes(c, args, totalFiles - revokedFiles)) + size = int(self.makeFileSizes(c, args, totalFiles - revokedFiles)) totalsize = totalsize + size if int(size/1024) > 1: - print "Files: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Files: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Files: %d MB" % size - size = int(makeFileSizes(c, args, newGbdbSet - revokedGbdbs)) + output.append("Files: %d MB" % size) + size = int(self.makeFileSizes(c, args, newGbdbSet - revokedGbdbs)) totalsize = totalsize + size if int(size/1024) > 1: - print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Gbdbs: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Gbdbs: %d MB" % size - size = int(makeFileSizes(c, args, newSupplementalSet)) + output.append("Gbdbs: %d MB" % size) + size = int(self.makeFileSizes(c, args, newSupplementalSet)) totalsize = totalsize + size if int(size/1024) > 1: - print "Supplemental: %d MB (%d GB)" % (size, int(size/1024)) + output.append("Supplemental: %d MB (%d GB)" % (size, int(size/1024))) else: - print "Supplemental: %d MB" % size - size = int(makeFileSizes(c, args, (additionalList))) + output.append("Supplemental: %d MB" % size) + size = int(self.makeFileSizes(c, args, (additionalList))) totalsize = totalsize + size if int(size/1024) > 1: - print "Other: %d MB" % size + output.append("Other: %d MB" % size) else: - print "Other: %d MB" % size + output.append("Other: %d MB" % size) if int(totalsize/1024) > 1: - print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)) + output.append("Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024))) else: - print "Total: %d MB" % totalsize - print "\n" - if not args.summary: - print "" + output.append("Total: %d MB" % totalsize) + output.append("\n") + if not args['summary']: + output.append("") if len(newTableSet - revokedTables): - print "New Tables (%s):" % len(newTableSet - revokedTables) - printIter(newTableSet - revokedTables) - print "\n" + output.append("New Tables (%s):" % len(newTableSet - revokedTables)) + output.extend(self.printIter(newTableSet - revokedTables)) + output.append("\n") if len(totalFiles - revokedFiles): - print "New Download Files (%s):" % len(totalFiles - revokedFiles) - printWithPath(totalFiles - revokedFiles, c, args.releaseNew) - print "\n" + output.append("New Download Files (%s):" % len(totalFiles - revokedFiles)) + output.extend(self.printWithPath(totalFiles - revokedFiles, c, args['releaseNew'])) + output.append("\n") if len(newGbdbSet - revokedGbdbs): - print "New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs) - printGbdbPath(newGbdbSet - revokedGbdbs, args.database) - print "\n" + output.append("New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs)) + output.extend(self.printGbdbPath(newGbdbSet - revokedGbdbs, args['database'])) + output.append("\n") if len(newSupplementalSet): - print "New Supplemental Files (%s):" % len(newSupplementalSet) - printWithPath(newSupplementalSet, c, args.releaseNew) - print "\n" + output.append("New Supplemental Files (%s):" % len(newSupplementalSet)) + output.extend(self.printWithPath(newSupplementalSet, c, args['releaseNew'])) + output.append("\n") if len(additionalList): - print "New Other Files (%s):" % len(additionalList) - printWithPath(additionalList, c, args.releaseNew) - print "\n" + output.append("New Other Files (%s):" % len(additionalList)) + output.extend(self.printWithPath(additionalList, c, args['releaseNew'])) + output.append("\n") if len(revokedTables): - print "Revoked Tables (%s):" % len(revokedTables) - printIter(revokedTables) - print "\n" + output.append("Revoked Tables (%s):" % len(revokedTables)) + output.extend(self.printIter(revokedTables)) + output.append("\n") if len(revokedFiles): - print "Revoked Files (%s):" % len(revokedFiles) - printWithPath(revokedFiles, c, args.releaseNew) - print "\n" + output.append("Revoked Files (%s):" % len(revokedFiles)) + output.extend(self.printWithPath(revokedFiles, c, args['releaseNew'])) + output.append("\n") if len(revokedGbdbs): - print "Revoked Gbdbs (%s):" % len(revokedGbdbs) - printGbdbPath(revokedGbdbs, args.database) - print "\n" - if not args.ignore: - print "No Errors" + output.append("Revoked Gbdbs (%s):" % len(revokedGbdbs)) + output.extend(self.printGbdbPath(revokedGbdbs, args['database'])) + output.append("\n") + if not args['ignore']: + output.append("No Errors") + return output -def printErrors(errors): + def printErrors(self, errors): errorsDict = {} + output = [] for i in errors: line = i.split(":", 1) try: errorsDict[line[0]].append(line[1]) except: errorsDict[line[0]] = [] errorsDict[line[0]].append(line[1]) - print "Errors (%s):" % len(errors) + output.append("Errors (%s):" % len(errors)) for i in sorted(errorsDict.keys()): - print "%s:" % i + output.append("%s:" % i) for j in sorted(errorsDict[i]): - print "%s" % j - -def main(): - - parser = argparse.ArgumentParser( - prog='mkChangeNotes', - formatter_class=argparse.RawDescriptionHelpFormatter, - description='Writes out notes file for packing to QA', - epilog= -"""Examples: - -mkChangeNotes hg19 wgEncodeUwDnase 3 2 --loose -mkChangeNotes hg19 wgEncodeSydhTfbs 1 - --full -mkChangeNotes hg19 wgEncodeCshlLongRnaSeq 1 - - -""" - ) - parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') - parser.add_argument('-l', '--loose', action="store_true", default=0, help='Loose checking for legacy elements. Will be retired once all tracks go through a release cycle') - parser.add_argument('-i', '--ignore', action="store_true", default=0, help='Ignore errors, print out report.') - parser.add_argument('-s', '--summary', action="store_true", default=0, help='Print summary stats only.') - parser.add_argument('database', help='The database, typically hg19 or mm9') - parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance') - parser.add_argument('releaseNew', help='The new release to be released') - parser.add_argument('releaseOld', nargs='?', default='-', help='The old release that is already released, if on release 1, or solo release mode, put anything here') - - if len(sys.argv) == 1: - parser.print_help() - return - args = parser.parse_args(sys.argv[1:]) - if not args.releaseNew.isdigit(): - parser.print_help() - return - - c = track.CompositeTrack(args.database, args.composite) - - loose = args.loose - + output.append("%s" % j) + return output + + def __init__(self, args): + releaseNew = args['releaseNew'] + releaseOld = args['releaseOld'] + database = args['database'] + composite = args['composite'] + loose = args['loose'] + ignore = args['ignore'] + summary = args['summary'] errors = [] + c = track.CompositeTrack(database, composite) + if int(releaseNew) > 1 and str(releaseOld) != 'solo': - if not args.releaseOld.isdigit(): - args.releaseOld = 'solo' - elif int(args.releaseOld) > int(args.releaseNew): - errors.append("Old Release is higher than New Release") - args.releaseOld = args.releaseNew - printErrors(errors) - return - - - if int(args.releaseNew) > 1 and str(args.releaseOld) != 'solo': - - newReleaseFiles = c.releases[int(args.releaseNew)-1] - oldReleaseFiles = c.releases[int(args.releaseOld)-1] + newReleaseFiles = c.releases[int(releaseNew)-1] + oldReleaseFiles = c.releases[int(releaseOld)-1] newMdb = c.alphaMetaDb oldMdb = c.publicMetaDb #check if all files listed in release directories have associated metaDb entries - (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) - (oldMdb, spam, eggs, ham, oldSupplementalSet, oldFileErrors) = checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose) + (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) + (oldMdb, spam, eggs, ham, oldSupplementalSet, oldFileErrors) = self.checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose) errors.extend(newFileErrors) errors.extend(oldFileErrors) #checks to see that nothing has disappeared between public and alpha - errors.extend(checkAlphaForDropped(newMdb, oldMdb, "alpha metaDb", "stanza")) - missingFiles = checkFilesForDropped(newReleaseFiles, oldReleaseFiles) - errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles, loose)) + errors.extend(self.checkAlphaForDropped(newMdb, oldMdb, "alpha metaDb", "stanza")) + missingFiles = self.checkFilesForDropped(newReleaseFiles, oldReleaseFiles) + errors.extend(self.checkMd5sums(newReleaseFiles, oldReleaseFiles, loose)) #checks and gets tables that are present, also returns a revoked set of tables for new - (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet) - (oldTableSet, spam, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose, revokedSet) + (newTableSet, revokedTableSet, newTableError) = self.checkTableStatus(newMdb, newReleaseFiles, database, composite, "alpha metaDb", loose, revokedSet) + (oldTableSet, spam, oldTableError) = self.checkTableStatus(oldMdb, oldReleaseFiles, database, composite, "public metaDb", loose, revokedSet) errors.extend(newTableError) errors.extend(oldTableError) #same as above except for gbdbs - (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb) - (oldGbdbSet, eggs, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, set(), oldMdb) + (newGbdbSet, revokedGbdbs, newGbdbError) = self.getGbdbFiles(database, newTableSet, revokedTableSet, newMdb) + (oldGbdbSet, eggs, oldGbdbError) = self.getGbdbFiles(database, oldTableSet, set(), oldMdb) errors.extend(newGbdbError) errors.extend(oldGbdbError) #for ease of typing totalFiles = set(newReleaseFiles) #these could honestly be moved earlier, get a file list processing section or something #they clean out special fiels out and separated the master fiels list into the 3 required #ones: wgEncode, supplemental and additional. - totalFiles = cleanSpecialFiles(totalFiles) - oldReleaseFiles = cleanSpecialFiles(set(oldReleaseFiles)) - (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet) + totalFiles = self.cleanSpecialFiles(totalFiles) + oldReleaseFiles = self.cleanSpecialFiles(set(oldReleaseFiles)) + (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = self.separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet) #get the stuff you need to push, also table sizes pushTables = set(sorted((newTableSet - oldTableSet))) - tableSize = getTableSize(pushTables, args.database) + tableSize = self.getTableSize(pushTables, database) pushFiles = set(sorted((totalFiles - oldReleaseFiles))) pushGbdbs = set(sorted((newGbdbSet - oldGbdbSet))) - #don't print report unless ignore option is on or no errors - if (not errors) or args.ignore: - printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, newMdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize) + #don't output.append(report unless ignore option is on or no errors + if (not errors) or ignore: + self.output = self.printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, newMdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize) else: - printErrors(errors) + self.output = self.printErrors(errors) - elif args.releaseOld == 'solo': + elif releaseOld == 'solo': - newReleaseFiles = c.releases[int(args.releaseNew)-1] + newReleaseFiles = c.releases[int(releaseNew)-1] newMdb = c.alphaMetaDb - (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) + (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) errors.extend(newFileErrors) - (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet) + (newTableSet, revokedTableSet, newTableError) = self.checkTableStatus(newMdb, newReleaseFiles, database, composite, "alpha metaDb", loose, revokedSet) errors.extend(newTableError) - tableSize = getTableSize(newTableSet, args.database) + tableSize = self.getTableSize(newTableSet, database) - (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb) + (newGbdbSet, revokedGbdbs, newGbdbError) = self.getGbdbFiles(database, newTableSet, revokedTableSet, newMdb) errors.extend(newGbdbError) #set for easy operations totalFiles = set(newReleaseFiles) #clean out special fiels we don't push i.e. md5sum.history - totalFiles = cleanSpecialFiles(totalFiles) + totalFiles = self.cleanSpecialFiles(totalFiles) #makes list for additional files - (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(set(), totalFiles, newSupplementalSet, set()) - if (not errors) or args.ignore: - printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTableSet, additionalList, c, atticSet, newSupplementalSet, tableSize) + (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = self.separateOutAdditional(set(), totalFiles, newSupplementalSet, set()) + if (not errors) or ignore: + self.output = self.printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTableSet, additionalList, c, atticSet, newSupplementalSet, tableSize) else: + self.output = self.printErrors(errors) + + +def main(): + + + parser = argparse.ArgumentParser( + prog='mkChangeNotes', + formatter_class=argparse.RawDescriptionHelpFormatter, + description='Writes out notes file for packing to QA', + epilog= + """Examples: + + mkChangeNotes hg19 wgEncodeUwDnase 3 2 --loose + mkChangeNotes hg19 wgEncodeSydhTfbs 1 - --full + mkChangeNotes hg19 wgEncodeCshlLongRnaSeq 1 - + + """ + ) + parser.add_argument('-l', '--loose', action="store_true", default=0, help='Loose checking for legacy elements. Will be retired once all tracks go through a release cycle') + parser.add_argument('-i', '--ignore', action="store_true", default=0, help='Ignore errors, output.append(out report.') + parser.add_argument('-s', '--summary', action="store_true", default=0, help='output.append(summary stats only.') + parser.add_argument('database', help='The database, typically hg19 or mm9') + parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance') + parser.add_argument('releaseNew', help='The new release to be released') + parser.add_argument('releaseOld', nargs='?', default='-', help='The old release that is already released, if on release 1, or solo release mode, put anything here') + + if len(sys.argv) == 1: + parser.print_help() + return + args = parser.parse_args(sys.argv[1:]) + if not args.releaseNew.isdigit(): + parser.print_help() + return + + + + if not args.releaseOld.isdigit(): + args.releaseOld = 'solo' + elif int(args.releaseOld) > int(args.releaseNew): + errors.append("Old Release is higher than New Release") + args.releaseOld = args.releaseNew printErrors(errors) + return + + + c = track.CompositeTrack(args.database, args.composite) + + argsdict = {'database': args.database, 'composite': args.composite, 'releaseNew': args.releaseNew, 'releaseOld': args.releaseOld, 'loose': args.loose, 'ignore': args.ignore, 'summary': args.summary} + + notes = mkChangeNotes(argsdict) + + for line in notes.output: + print line if __name__ == '__main__': main()