8f77dded373115e5bee48bd8cdc4fbd538ac73e6 wong Mon Oct 17 10:44:53 2011 -0700 draft 2, reformatted output, changed how certain functions work, still could shed a lot of code, but conceptually it's there diff --git python/programs/mkChangeNotes/mkChangeNotes python/programs/mkChangeNotes/mkChangeNotes index 9f7c464..dd0c59f 100755 --- python/programs/mkChangeNotes/mkChangeNotes +++ python/programs/mkChangeNotes/mkChangeNotes @@ -1,413 +1,546 @@ #!/hive/groups/encode/dcc/bin/python -import sys, os, re, argparse, subprocess +import sys, os, re, argparse, subprocess, math from ucscgenomics import ra, track def checkMetaDbForFiles(mdb, files, status, loose): errors = [] + revokedset = set() + atticset = set() + supplementalset = set() + filtermdb = ra.RaFile() for i in files: + if re.match('.\/', i): + supplementalset.add(i) if not re.match('wgEncode.*', i): continue + filestanza = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s) + + #should only return 1, just in case if filestanza: - pass + for i in filestanza: + filtermdb[i.name] = i + if 'objStatus' in i and re.search('revoked|replaced|renamed', i['objStatus']): + revokedset.add(i.name) + if 'attic' in i: + atticset.add(i.name) else: #pass if loose and re.match('.*bai', i): pass else: errors.append("metaDb: %s has is not mentioned in %s" % (i, status)) - return errors + return (filtermdb, revokedset, atticset, supplementalset, errors) def checkAlphaForDropped(new, old, status, type): errors=[] - for i in old: - if re.search('MAGIC', i): - pass - if i in old: - errors.append("MAGIC number same in alpha and public metaDb") - else: - continue - if not re.match('wgEncode.*', i): - continue - if i in new: - pass - else: + diff = set(old) -set(new) + for i in diff: errors.append("%s: %s missing from %s" % (type, i, status)) return errors -def checkTableStatus(mdb, files, database, composite, status, loose): +def checkFilesForDropped(new, old): + diff = set(old) - set(new) + return diff + + +def checkTableStatus(mdb, files, database, composite, status, loose, revokedset): errors=[] #home = os.environ['HOME'] #dbhost = '' #dbuser = '' #dbpassword = '' #p = re.compile('db.(\S+)=(\S+)') #with open("%s/.hg.conf" % home) as f: # for line in f: # line.rstrip("\n\r") # if p.match(line): # m = p.match(line) # if m.groups(1)[0] == 'host': # dbhost = m.groups(1)[1] # if m.groups(1)[0] == 'user': # dbuser = m.groups(1)[1] # if m.groups(1)[0] == 'password': # dbpassword = m.groups(1)[1] #print dbhost #print dbuser #print dbpassword #db = MySQLdb.connect (host = dbhost, # user = dbuser, # passwd = dbpassword, # db = database) #cursor = db.cursor () #cursor.execute ("show tables like '%s%s'" % (composite, "%")) #tableset = set(cursor.fetchall()) - mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s and s['fileName'].split(",",1)[0] in files, lambda s: s['metaObject'])) - atticset = set(mdb.filter(lambda s: 'attic' in s, lambda s: s['metaObject'])) - revokedset = set(mdb.filter(lambda s: re.search('revoked|replaced|renamed', s['objStatus']), lambda s: s['metaObject'])) + mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject'])) mdbtableset = mdbtableset - revokedset - + mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbtableset, lambda s: s['tableName'])) + revokedtableset = set(mdb.filter(lambda s: s['metaObject'] in revokedset, lambda s: s['tableName'])) sep = "','" tablestr = sep.join(mdbtableset) tablestr = "'" + tablestr + "'" #this should really be using python's database module, but I'd need admin access to install it #at this point, I am just parsing the output form hgsql cmd = "hgsql %s -e \"select table_name from information_schema.TABLES where table_name in (%s)\"" % (database, tablestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() sqltableset = set(output.split("\n")[1:]) missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject'])) missingFromDb = mdbtableset - sqltableset if missingTableNames: for i in missingTableNames: errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status)) if missingFromDb: for i in missingFromDb: errors.append("table: %s table not found in Db called by %s" % (i, status)) - return (mdbtableset, revokedset, atticset, errors) -def getGbdbFiles(database, tableset, mdb): + + return (mdbtableset, revokedtableset, errors) + +def getGbdbFiles(database, tableset, revokedset, mdb): errors = [] sep = "','" tablestr = sep.join(tableset) tablestr = "'" + tablestr + "'" + revokestr = sep.join(tableset) + revokestr = "'" + tablestr + "'" cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, tablestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() gbdbtableset = set(output.split("\n")[1:]) - file1stanzalist = mdb.filter(lambda s: s['tableName'] in set(output.split("\n")[1:]), lambda s: s) + cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, revokestr) + p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + output = p.stdout.read() + + revokedtableset = set(output.split("\n")[1:]) + file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s) + revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s) gbdbfileset = set() + revokedfileset = set() + for i in file1stanzalist: filelist = i['fileName'].split(',') for j in filelist: if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): gbdbfileset.add(j) else: errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, databsase)) - return (gbdbfileset, errors) + for i in revokedstanzalist: + filelist = i['fileName'].split(',') + for j in filelist: + if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): + revokedfileset.add(j) + else: + errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, databsase)) + + return (gbdbfileset, revokedfileset, errors) + +def getTableSize(mdbtableset, database): + tablesize = float(0) + tablelist = list() + for i in mdbtableset: + tablelist.append("table_name = '%s'" % i) + orsep = " OR " + orstr = orsep.join(tablelist) + #print orstr + cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE table_name = %s\"" % (database, orstr) + p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + output = p.stdout.read() + for i in output.split("\n")[1:]: + fields = i.split() + for j in fields: + tablesize = tablesize + float(j) + return math.ceil(tablesize) def checkMd5sums(newfiles, oldfiles): errors = [] for i in oldfiles: if i not in newfiles: pass - else: + elif re.match('wgEncode.*', i): if oldfiles[i].md5sum != newfiles[i].md5sum: errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum)) return errors -def makeFileSizes(c, args, pushFiles, pushGbdbs, additionalList): - pushFileSize = list() - for i in pushGbdbs: - pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) - for i in pushFiles: - pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) - for i in additionalList: - pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) +def makeFileSizes(c, args, inlist): + checklist = list() + for i in inlist: + checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) filesizes = 0 - for i in pushFileSize: + for i in checklist: filesizes = filesizes + int(os.path.getsize(i)) filesizes = filesizes / (1024**2) return filesizes -def cleanSpecialFiles(pushFiles, totalFiles): +def cleanSpecialFiles(inlist): specialRemoveList = ['md5sum.history'] for i in specialRemoveList: - if i in pushFiles: - pushFiles.remove(i) - if i in totalFiles: - totalFiles.remove(i) + if i in inlist: + inlist.remove(i) - return(pushFiles, totalFiles) + return(inlist) -def separateOutAdditional(oldReleaseFiles, totalFiles, pushFiles): +def separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet): additionalList = set() + oldAdditionalList = set() + newTotal = set() + newOld = set() for i in totalFiles: if not re.match('wgEncode.*', i): additionalList.add(i) - for i in additionalList: - if i in pushFiles: - pushFiles.remove(i) - oldReleaseFiles = oldReleaseFiles - (oldReleaseFiles - totalFiles) + elif i in newSupplementalSet: + continue + else: + newTotal.add(i) + for i in oldReleaseFiles: + if not re.match('wgEncode.*', i): + if i in totalFiles: + pass + elif i in newSupplementalSet: + continue + else: + oldAdditionalList.add(i) + else: + newOld.add(i) - return(oldReleaseFiles, pushFiles, additionalList) + oldReleaseFiles = newOld -def printWithPath(set, c, args): + return(newOld, additionalList, oldAdditionalList, newTotal) + +def printWithPath(set, c, release): for i in sorted(set): - print "%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i) + print "%s/%s" % (c.downloadsDirectory + 'release' + release, i) + +def makeRevokedFiles(revokedSet, mdb): + revokedFiles = set() + for i in revokedSet: + file = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s['fileName']) + for j in file: + splitfile = j.split(",") + for k in splitfile: + revokedFiles.add(k) + return revokedFiles + +def printIter(inlist): + for i in sorted(inlist): + print i -def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, filesizes, newAtticSet, oldAtticSet, newRevokedSet, oldRevokedSet): - revokedSet = ((newRevokedSet | oldRevokedSet) - (oldRevokedSet - newRevokedSet)) - unrevokedSet = (oldRevokedSet - newRevokedSet) - atticSet = ((newAtticSet | oldAtticSet) - (oldAtticSet - newAtticSet)) - unatticSet = (oldAtticSet - newAtticSet) +def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, mdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize): + #the groups here need to be predefined, I just copied and pasted after working out what they were sep = "\n" print "mkChangeNotes v2" print "%s %s Release %s" % (args.database, args.composite, args.releaseNew) print "" - print "Totals:" - print "New Files and Gbdbs: %d" % (int(len(additionalList) + int(len(pushFiles)) + int(len(pushGbdbs)))) - print "Total size of files to be pushed: %d MB" % filesizes - print "" - print "Total Files: %d" % int(len(totalFiles | oldReleaseFiles)) - print "Total Gbdbs: %d" % int(len(newGbdbSet | oldGbdbSet)) - print "Total Tables: %d" % int(len(newTableSet | oldTableSet)) - print "Other Files: %d" % int(len(additionalList)) - print "Total Revoked: %d" % int(len(revokedSet)) - print "Total Unrevoked: %d" % int(len(unrevokedSet)) - print "Total Attic: %d" % int(len(atticSet)) - print "Total Un-attic: %d" % int(len(unatticSet)) - print "\n" - print "New Tables (%s):" % len(pushTables) - print sep.join(sorted(pushTables)) - print "\n" - print "New Files (%s):" % len(pushFiles) - printWithPath(pushFiles, c, args) - print "\n" - print "New Gbdbs (%s):" % len(pushGbdbs) - printWithPath(pushGbdbs, c, args) - print "\n" - print "Additional Files (%s):" % len(additionalList) - printWithPath(additionalList, c, args) - print "\n" - print "Active Untouched Tables (%s):" % len(oldTableSet) - print sep.join(sorted(oldTableSet)) + print "QA Count Summaries for Release %s:" % args.releaseNew + print "Tables: %d" % int(len(newTableSet)) + print "Files: %d" % int(len(totalFiles - revokedFiles)) + print "Gbdbs: %d" % int(len(newGbdbSet)) + print "Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet)) + print "Other: %d" % int(len(additionalList)) print "\n" - print "Active Untouched Files (%s):" % len(set(oldReleaseFiles)) - printWithPath(oldReleaseFiles, c, args) + totalsize = 0; + print "Sizes of New:" + print "Tables: %d MB" % tableSize + totalsize = totalsize + tableSize + size = int(makeFileSizes(c, args, pushFiles)) + totalsize = totalsize + size + print "Files: %d MB" % size + size = int(makeFileSizes(c, args, pushGbdbs)) + totalsize = totalsize + size + print "Gbdbs: %d MB" % size + size = int(makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet))) + totalsize = totalsize + size + print "Supplemental: %d MB" % size + size = int(makeFileSizes(c, args, (additionalList))) + totalsize = totalsize + size + print "Other: %d MB" % size + print "Total: %d MB" % totalsize print "\n" - print "Active Untouched Gbdbs (%s):" % len(oldGbdbSet) - printWithPath(oldGbdbSet, c, args) + print "TABLES:" + print "New: %s" % len(pushTables) + print "Persisting: %s" % len(oldTableSet & newTableSet) + print "Revoked/Replaced/Renamed: %s" % len(revokedTableSet) + print "New + Persisting: %s" % len(newTableSet) + print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet) + if args.full: + print "" + print "New Tables (%s):" % len(pushTables) + printIter(pushTables) + print "" + print "Persisting (%s):" % len(oldTableSet & newTableSet) + printIter(oldTableSet & newTableSet) + print "" + print "Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet) + printIter(revokedTableSet) print "\n" - print "Revoked Objects (%s):" % len(revokedSet) - for i in sorted(revokedSet): - print i + #downlaodables = total - revoked + print "DOWNLOAD FILES:" + print "New: %s" % len(pushFiles - revokedFiles) + print "Persisting: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles) + print "Revoked/Replaced/Renamed: %s" % len(revokedFiles) + print "New + Persisting: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles)) + print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles) + if args.full: + print "" + print "New Download Files (%s):" % len(pushFiles - revokedFiles) + printWithPath((pushFiles - revokedFiles), c, args.releaseNew) + print "" + print "Persisting Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles) + printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args.releaseNew) + print "" + print "Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles) + printWithPath(revokedFiles, c, args.releaseNew) print "\n" - print "Attic Objects (%s):" % len(atticSet) - for i in sorted(atticSet): - print i + print "GBDBS:" + print "New: %s" % len(pushGbdbs) + print "Persisting: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs) + print "Revoked/Replaced/Renamed: %s" % len(revokedGbdbs) + print "New + Persisting: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs)) + print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs) + if args.full: + print "" + print "New Gbdb Files (%s):" % len(pushGbdbs) + printWithPath(pushGbdbs, c, args.releaseNew) + print "" + print "Persisting Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs) + printWithPath(((newGbdbSet & oldGbdbSet) - revokedGbdbs), c, args.releaseNew) + print "" + print "Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs) + printWithPath(revokedGbdbs, c, args.releaseNew) print "\n" - print "Unrevoked Objects (%s):" % len(unrevokedSet) - for i in sorted(unrevokedSet): - print i + print "Supplemental Files:" + print "New: %s" % len(newSupplementalSet - oldSupplementalSet) + print "Persisting: %s" % len(oldSupplementalSet & newSupplementalSet) + print "Removed: %s" % len(oldSupplementalSet - newSupplementalSet) + print "New + Persisting: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet)) + print "Total: %s" % len(newSupplementalSet | oldSupplementalSet) + if args.full: + print "" + print "New Supplemental Files:" + printWithPath(newSupplementalSet - oldSupplementalSet, c, args.releaseNew) + print "" + print "Persisting Supplemental Files:" + printWithPath(oldSupplementalSet & newSupplementalSet, c, args.releaseNew) + print "" + print "Removed Supplemental Files:" + printWithPath(oldSupplementaList - newSupplementalSet, c, args.releaseNew) print "\n" - print "Un-attic Objects (%s):" % len(unatticSet) - for i in sorted(unatticSet): - print i + print "OTHER FILES:" + print "New: %s" % len(additionalList) + print "Revoked/Replace: %s" % len(oldAdditionalList) + print "Total: %s" % len(additionalList | oldAdditionalList) print "\n" - print "No Errors" + if args.full: + print "" + print "New Other Files (%s):" % len(additionalList) + printWithPath(additionalList, c, args.releaseNew) + print "" + print "Revoked Other Files (%s):" % len(oldAdditionalList) + printWithPath(oldAdditionalList, c, args.releaseNew) + print "Files that dropped between releases (%s):" % len(missingFiles) + printWithPath(missingFiles, c, args.releaseOld) -def printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, c, pushGbdbs, filesizes, atticSet, revokedSet): + +def printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, c, pushGbdbs, atticSet, newSupplementalSet, tableSize): print "mkChangeNotes v2" - print "%s %s Release %s against Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld) - print "" - print "Totals:" - print "Files and Gbdbs: %d" % (int(len(totalFiles)) + int(len(newGbdbSet))) - print "Total size of files to be pushed: %d MB" % filesizes + print "%s %s Release %s" % (args.database, args.composite, args.releaseNew) print "" + print "QA Count Summaries for Release %s:" % args.releaseNew + print "Tables: %d" % int(len(newTableSet)) print "Files: %d" % int(len(totalFiles)) print "Gbdbs: %d" % int(len(newGbdbSet)) - print "Tables: %d" % int(len(newTableSet)) - print "Other Files: %d" % int(len(additionalList)) - print "Total Revoked: %d" % int(len(revokedSet)) - print "Total Attic: %d" % int(len(atticSet)) + print "Supplemental: %d" % int(len(newSupplementalSet)) + print "Other: %d" % int(len(additionalList)) print "\n" - sep = "\n" - print "New Tables (%s):" % len(pushTables) - print sep.join(sorted(pushTables)) - print "\n" - print "New Files (%s):" % len(pushFiles) - printWithPath(pushFiles, c, args) + totalsize = 0; + print "Sizes of New:" + print "Tables: %d MB" % tableSize + totalsize = totalsize + tableSize + size = int(makeFileSizes(c, args, totalFiles)) + totalsize = totalsize + size + print "Files: %d MB" % size + size = int(makeFileSizes(c, args, pushGbdbs)) + totalsize = totalsize + size + print "Gbdbs: %d MB" % size + size = int(makeFileSizes(c, args, newSupplementalSet)) + totalsize = totalsize + size + print "Supplemental: %d MB" % size + size = int(makeFileSizes(c, args, (additionalList))) + totalsize = totalsize + size + print "Other: %d MB" % size + print "Total: %d MB" % totalsize print "\n" - print "New Gbdbs (%s):" % len(pushGbdbs) - printWithPath(pushGbdbs, c, args) + if args.full: + print "" + print "New Tables (%s):" % len(pushTables) + printIter(pushTables) print "\n" - print "Additional Files (%s):" % len(additionalList) - printWithPath(additionalList, c, args) + print "New Download Files (%s):" % len(totalFiles) + printWithPath(totalFiles, c, args.releaseNew) print "\n" - print "Revoked Objects (%s):" % len(revokedSet) - for i in sorted(revokedSet): - print i + print "New Gbdb Files (%s):" % len(pushGbdbs) + printWithPath(pushGbdbs, c, args.releaseNew) print "\n" - print "Attic Objects (%s):" % len(atticSet) - for i in sorted(atticSet): - print i + print "New Supplemental Files:" + printWithPath(newSupplementalSet, c, args.releaseNew) print "\n" - print "No Errors" + print "New Other Files (%s):" % len(additionalList) + printWithPath(additionalList, c, args.releaseNew) + + def printErrors(errors): errorsDict = {} for i in errors: line = i.split(":", 1) try: errorsDict[line[0]].append(line[1]) except: errorsDict[line[0]] = [] errorsDict[line[0]].append(line[1]) print "Errors (%s):" % len(errors) for i in sorted(errorsDict.keys()): print "%s:" % i for j in sorted(errorsDict[i]): print "%s" % j def main(): parser = argparse.ArgumentParser( prog='mkChangeNotes', description='Writes out notes file for packing to QA', epilog='example: encodeMkChange hg19 wgEncodeUwDnase 3 2' ) parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') parser.add_argument('-l', '--loose', action="store_true", default=0, help='Loose checking for legacy elements. Will be retired once all tracks go through a release cycle') parser.add_argument('-i', '--ignore', action="store_true", default=0, help='Ignore errors, print out report.') + parser.add_argument('-f', '--full', action="store_true", default=0, help='Print full stats.') parser.add_argument('database', help='The database, typically hg19 or mm9') parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance') parser.add_argument('releaseNew', help='The new release to be released') parser.add_argument('releaseOld', help='The old release that is already released') if len(sys.argv) == 1: parser.print_help() return args = parser.parse_args(sys.argv[1:]) if not args.releaseNew.isdigit(): parser.print_help() return c = track.CompositeTrack(args.database,args.composite) loose = args.loose errors = [] if args.releaseOld == "-": args.releaseOld = 0 if int(args.releaseOld) > int(args.releaseNew): errors.append("Old Release is higher than New Release") args.releaseOld = args.releaseNew if int(args.releaseNew) > 1: newReleaseFiles = c.releases[int(args.releaseNew)-1] oldReleaseFiles = c.releases[int(args.releaseOld)-1] newMdb = c.alphaMetaDb oldMdb = c.publicMetaDb - errors.extend(checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)) - errors.extend(checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose)) + (newMdb, revokedSet, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) + errors.extend(newFileErrors) + (oldMdb, spam, eggs, oldSupplementalSet, oldFileErrors) = checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose) + errors.extend(oldFileErrors) errors.extend(checkAlphaForDropped(newMdb, oldMdb, "alpha metaDb", "stanza")) - errors.extend(checkAlphaForDropped(newReleaseFiles, oldReleaseFiles, "new release download directory", "file")) + missingFiles = checkFilesForDropped(newReleaseFiles, oldReleaseFiles) errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles)) - (newTableSet, newRevokedSet, newAtticSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose) + (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet) errors.extend(newTableError) - (oldTableSet, oldRevokedSet, oldAtticSet, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose) + (oldTableSet, spam, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose, revokedSet) errors.extend(oldTableError) - (newGbdbSet, newGbdbError) = getGbdbFiles(args.database, newTableSet, newMdb) + (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb) errors.extend(newGbdbError) - (oldGbdbSet, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, oldMdb) + (oldGbdbSet, eggs, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, set(), oldMdb) errors.extend(oldGbdbError) droppedTables = oldTableSet - newTableSet if droppedTables: for i in droppedTables: errors.append("table: %s was dropped between releases" % i) -######### - #some weird suggestion from online about python 3 not being able to compare strings to ints implicitly, - #here for future reference in case something breaks - #pushTables = sorted((newTableSet - oldTableSet), key=lambda item: (int(item.partition(' ')[0]) - # if item[0].isdigit() else float('inf'), item)) - #pushFiles = sorted((newReleaseFiles - oldReleaseFiles), key=lambda item: (int(item.partition(' ')[0]) - # if item[0].isdigit() else float('inf'), item)) - #pushGbdbs = sorted((newGbdbSet - oldGbdbSet), key=lambda item: (int(item.partition(' ')[0]) - # if item[0].isdigit() else float('inf'), item)) -######### - - pushTables = sorted((newTableSet - oldTableSet)) - pushFiles = sorted((set(newReleaseFiles) - set(oldReleaseFiles))) - pushGbdbs = sorted((newGbdbSet - oldGbdbSet)) totalFiles = set(newReleaseFiles) - (pushFiles, totalFiles) = cleanSpecialFiles(pushFiles, totalFiles) - (oldReleaseFiles, totalFiles) = cleanSpecialFiles(set(oldReleaseFiles), totalFiles) - (oldReleaseFiles, pushFiles, additionalList) = separateOutAdditional(oldReleaseFiles, totalFiles, pushFiles) + #these could honestly be moved earlier, get a file list processing section or something + totalFiles = cleanSpecialFiles(totalFiles) + oldReleaseFiles = cleanSpecialFiles(set(oldReleaseFiles)) + (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet) - filesizes = makeFileSizes(c, args, pushFiles, pushGbdbs, additionalList) + revokedFiles = makeRevokedFiles(revokedSet, newMdb) + pushTables = set(sorted((newTableSet - oldTableSet))) + tableSize = getTableSize(pushTables, args.database) + pushFiles = set(sorted((totalFiles - oldReleaseFiles))) + pushGbdbs = set(sorted((newGbdbSet - oldGbdbSet))) if (not errors) or args.ignore: - printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, filesizes, newAtticSet, oldAtticSet, newRevokedSet, oldRevokedSet) - + printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, newMdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize) else: printErrors(errors) else: args.releaseOld = 0 newReleaseFiles = c.releases[int(args.releaseNew)-1] + newMdb = c.alphaMetaDb - errors.extend(checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)) - (newTableSet, newRevokedSet, newAtticSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose) + + (newMdb, revokedSet, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) + errors.extend(newFileErrors) + + (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet) errors.extend(newTableError) - (newGbdbSet, newGbdbError) = getGbdbFiles(args.database, newTableSet, newMdb) + tableSize = getTableSize(newTableSet, args.database) + + (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb) errors.extend(newGbdbError) + #set for easy operations totalFiles = set(newReleaseFiles) + #clean out special fiels we don't push i.e. md5sum.history - (pushFiles, totalFiles) = cleanSpecialFiles(totalFiles, totalFiles) + totalFiles = cleanSpecialFiles(totalFiles) + #makes list for additional files - (spam, pushFiles, additionalList) = separateOutAdditional(set(), totalFiles, pushFiles) - #makes files sizes - filesizes = makeFileSizes(c, args, pushFiles, newGbdbSet, additionalList) + (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(set(), totalFiles, newSupplementalSet, set()) if (not errors) or args.ignore: - printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, newTableSet, pushFiles, c, newGbdbSet, filesizes, newAtticSet, newRevokedSet) + printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, newTableSet, c, newGbdbSet, atticSet, newSupplementalSet, tableSize) else: printErrors(errors) if __name__ == '__main__': main()