9a49290fedc8b4f9c7a631dedcfe8ac3aa2b4cee chinhli Thu Oct 20 14:10:43 2011 -0700 merge conflict resolved diff --git python/programs/mkChangeNotes/mkChangeNotes python/programs/mkChangeNotes/mkChangeNotes index 9f7c464..297a870 100755 --- python/programs/mkChangeNotes/mkChangeNotes +++ python/programs/mkChangeNotes/mkChangeNotes @@ -1,413 +1,609 @@ #!/hive/groups/encode/dcc/bin/python -import sys, os, re, argparse, subprocess +import sys, os, re, argparse, subprocess, math from ucscgenomics import ra, track def checkMetaDbForFiles(mdb, files, status, loose): errors = [] + revokedset = set() + revokedfiles = set() + atticset = set() + supplementalset = set() + filtermdb = ra.RaFile() + for i in files: + if re.match('supplemental', i): + supplementalset.add(i) if not re.match('wgEncode.*', i): continue + filestanza = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s) + #should only return 1, just in case if filestanza: - pass + for j in filestanza: + filtermdb[j.name] = j + if 'objStatus' in j and re.search('revoked|replaced|renamed', j['objStatus']): + revokedfiles.add(i) + revokedset.add(j.name) + if 'attic' in j: + atticset.add(j.name) else: #pass if loose and re.match('.*bai', i): pass else: errors.append("metaDb: %s has is not mentioned in %s" % (i, status)) - return errors + return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors) def checkAlphaForDropped(new, old, status, type): errors=[] - for i in old: - if re.search('MAGIC', i): - pass - if i in old: - errors.append("MAGIC number same in alpha and public metaDb") - else: - continue - if not re.match('wgEncode.*', i): - continue - if i in new: - pass - else: + diff = set(old) -set(new) + for i in diff: errors.append("%s: %s missing from %s" % (type, i, status)) - return errors -def checkTableStatus(mdb, files, database, composite, status, loose): +def checkFilesForDropped(new, old): + diff = set(old) - set(new) + return diff + +def checkTableStatus(mdb, files, database, composite, status, loose, revokedset): errors=[] + #home = os.environ['HOME'] #dbhost = '' #dbuser = '' #dbpassword = '' #p = re.compile('db.(\S+)=(\S+)') #with open("%s/.hg.conf" % home) as f: # for line in f: # line.rstrip("\n\r") # if p.match(line): # m = p.match(line) # if m.groups(1)[0] == 'host': # dbhost = m.groups(1)[1] # if m.groups(1)[0] == 'user': # dbuser = m.groups(1)[1] # if m.groups(1)[0] == 'password': # dbpassword = m.groups(1)[1] #print dbhost #print dbuser #print dbpassword #db = MySQLdb.connect (host = dbhost, # user = dbuser, # passwd = dbpassword, # db = database) #cursor = db.cursor () #cursor.execute ("show tables like '%s%s'" % (composite, "%")) #tableset = set(cursor.fetchall()) - mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s and s['fileName'].split(",",1)[0] in files, lambda s: s['metaObject'])) - atticset = set(mdb.filter(lambda s: 'attic' in s, lambda s: s['metaObject'])) - revokedset = set(mdb.filter(lambda s: re.search('revoked|replaced|renamed', s['objStatus']), lambda s: s['metaObject'])) + mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject'])) mdbtableset = mdbtableset - revokedset - + mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbtableset, lambda s: s['tableName'])) + revokedtableset = set(mdb.filter(lambda s: s['metaObject'] in revokedset, lambda s: s['tableName'])) sep = "','" tablestr = sep.join(mdbtableset) tablestr = "'" + tablestr + "'" #this should really be using python's database module, but I'd need admin access to install it #at this point, I am just parsing the output form hgsql cmd = "hgsql %s -e \"select table_name from information_schema.TABLES where table_name in (%s)\"" % (database, tablestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() sqltableset = set(output.split("\n")[1:]) missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject'])) missingFromDb = mdbtableset - sqltableset if missingTableNames: for i in missingTableNames: errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status)) if missingFromDb: for i in missingFromDb: errors.append("table: %s table not found in Db called by %s" % (i, status)) - return (mdbtableset, revokedset, atticset, errors) + return (mdbtableset, revokedtableset, errors) -def getGbdbFiles(database, tableset, mdb): +def getGbdbFiles(database, tableset, revokedset, mdb): errors = [] sep = "','" tablestr = sep.join(tableset) tablestr = "'" + tablestr + "'" + revokestr = sep.join(revokedset) + revokestr = "'" + revokestr + "'" cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, tablestr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output = p.stdout.read() gbdbtableset = set(output.split("\n")[1:]) - file1stanzalist = mdb.filter(lambda s: s['tableName'] in set(output.split("\n")[1:]), lambda s: s) + cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, revokestr) + p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + output = p.stdout.read() + revokedtableset = set(output.split("\n")[1:]) + + file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s) + revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s) gbdbfileset = set() + revokedfileset = set() + for i in file1stanzalist: filelist = i['fileName'].split(',') for j in filelist: if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): gbdbfileset.add(j) else: - errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, databsase)) + errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, database)) + + for i in revokedstanzalist: + filelist = i['fileName'].split(',') + for j in filelist: + if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): + revokedfileset.add(j) + else: + errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, databsase)) - return (gbdbfileset, errors) + return (gbdbfileset, revokedfileset, errors) -def checkMd5sums(newfiles, oldfiles): +def getTableSize(mdbtableset, database): + tablesize = float(0) + tablelist = list() + for i in mdbtableset: + tablelist.append("table_name = '%s'" % i) + orsep = " OR " + orstr = orsep.join(tablelist) + + cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE table_name = %s\"" % (database, orstr) + p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + output = p.stdout.read() + for i in output.split("\n")[1:]: + fields = i.split() + for j in fields: + tablesize = tablesize + float(j) + return math.ceil(tablesize) + +def checkMd5sums(newfiles, oldfiles, loose): errors = [] for i in oldfiles: if i not in newfiles: pass - else: + elif re.match('wgEncode.*', i): if oldfiles[i].md5sum != newfiles[i].md5sum: errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum)) - + if loose: + return list() + else: return errors -def makeFileSizes(c, args, pushFiles, pushGbdbs, additionalList): - pushFileSize = list() - for i in pushGbdbs: - pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) - for i in pushFiles: - pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) - for i in additionalList: - pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) +def makeFileSizes(c, args, inlist): + checklist = list() + for i in inlist: + checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)) filesizes = 0 - for i in pushFileSize: + for i in checklist: filesizes = filesizes + int(os.path.getsize(i)) - filesizes = filesizes / (1024**2) + filesizes = math.ceil(float(filesizes) / (1024**2)) + return int(filesizes) - return filesizes - -def cleanSpecialFiles(pushFiles, totalFiles): +def cleanSpecialFiles(inlist): specialRemoveList = ['md5sum.history'] for i in specialRemoveList: - if i in pushFiles: - pushFiles.remove(i) - if i in totalFiles: - totalFiles.remove(i) + if i in inlist: + inlist.remove(i) - return(pushFiles, totalFiles) + return(inlist) -def separateOutAdditional(oldReleaseFiles, totalFiles, pushFiles): +def separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet): additionalList = set() + oldAdditionalList = set() + newTotal = set() + newOld = set() for i in totalFiles: - if not re.match('wgEncode.*', i): + if i in newSupplementalSet: + continue + elif not re.match('wgEncode.*', i): additionalList.add(i) - for i in additionalList: - if i in pushFiles: - pushFiles.remove(i) - oldReleaseFiles = oldReleaseFiles - (oldReleaseFiles - totalFiles) + else: + newTotal.add(i) + for i in oldReleaseFiles: + if not re.match('wgEncode.*', i): + if i in totalFiles: + pass + elif i in newSupplementalSet: + continue + else: + oldAdditionalList.add(i) + else: + newOld.add(i) + + oldReleaseFiles = newOld - return(oldReleaseFiles, pushFiles, additionalList) + return(newOld, additionalList, oldAdditionalList, newTotal) -def printWithPath(set, c, args): +def printWithPath(set, c, release): for i in sorted(set): - print "%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i) + print "%s/%s" % (c.httpDownloadsPath + 'release' + release, i) -def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, filesizes, newAtticSet, oldAtticSet, newRevokedSet, oldRevokedSet): - revokedSet = ((newRevokedSet | oldRevokedSet) - (oldRevokedSet - newRevokedSet)) - unrevokedSet = (oldRevokedSet - newRevokedSet) - atticSet = ((newAtticSet | oldAtticSet) - (oldAtticSet - newAtticSet)) - unatticSet = (oldAtticSet - newAtticSet) +def printGbdbPath(set, database): + for i in sorted(set): + print "/gbdb/%s/bbi/%s" % (database, i) + +def printIter(inlist): + for i in sorted(inlist): + print i + +def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, mdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize): + #the groups here need to be predefined, I just copied and pasted after working out what they were sep = "\n" print "mkChangeNotes v2" - print "%s %s Release %s" % (args.database, args.composite, args.releaseNew) + print "%s %s Release %s vs Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld) print "" - print "Totals:" - print "New Files and Gbdbs: %d" % (int(len(additionalList) + int(len(pushFiles)) + int(len(pushGbdbs)))) - print "Total size of files to be pushed: %d MB" % filesizes - print "" - print "Total Files: %d" % int(len(totalFiles | oldReleaseFiles)) - print "Total Gbdbs: %d" % int(len(newGbdbSet | oldGbdbSet)) - print "Total Tables: %d" % int(len(newTableSet | oldTableSet)) - print "Other Files: %d" % int(len(additionalList)) - print "Total Revoked: %d" % int(len(revokedSet)) - print "Total Unrevoked: %d" % int(len(unrevokedSet)) - print "Total Attic: %d" % int(len(atticSet)) - print "Total Un-attic: %d" % int(len(unatticSet)) - print "\n" - print "New Tables (%s):" % len(pushTables) - print sep.join(sorted(pushTables)) - print "\n" - print "New Files (%s):" % len(pushFiles) - printWithPath(pushFiles, c, args) - print "\n" - print "New Gbdbs (%s):" % len(pushGbdbs) - printWithPath(pushGbdbs, c, args) - print "\n" - print "Additional Files (%s):" % len(additionalList) - printWithPath(additionalList, c, args) + print "QA Count Summaries for Release %s:" % args.releaseNew + print "Tables: %d" % int(len(newTableSet)) + print "Files: %d" % int(len(totalFiles - revokedFiles)) + print "Gbdbs: %d" % int(len(newGbdbSet)) + print "Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet)) + print "Other: %d" % int(len(additionalList)) print "\n" - print "Active Untouched Tables (%s):" % len(oldTableSet) - print sep.join(sorted(oldTableSet)) + totalsize = 0 + size = 0 + print "Sizes of New:" + tableGb = int(tableSize/1024) + if tableGb > 1: + print "Tables: %d MB (%d GB)" % (tableSize, tableGb) + else: + print "Tables: %d MB" % tableSize + totalsize = totalsize + tableSize + size = int(makeFileSizes(c, args, pushFiles)) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Files: %d MB (%d GB)" % (size, int(size/1024)) + else: + print "Files: %d MB" % size + size = int(makeFileSizes(c, args, pushGbdbs)) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024)) + else: + print "Gbdbs: %d MB" % size + size = int(makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet))) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Supplemental: %d MB" % (size, int(size/1024)) + else: + print "Supplemental: %d MB" % size + size = int(makeFileSizes(c, args, (additionalList))) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Other: %d MB" % size + else: + print "Other: %d MB" % size + if int(totalsize/1024) > 1: + print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)) + else: + print "Total: %d MB" % totalsize print "\n" - print "Active Untouched Files (%s):" % len(set(oldReleaseFiles)) - printWithPath(oldReleaseFiles, c, args) + print "TABLES:" + print "New: %s" % len(pushTables) + print "Persisting: %s" % len(oldTableSet & newTableSet) + print "Revoked/Replaced/Renamed: %s" % len(revokedTableSet) + print "New + Persisting: %s" % len(newTableSet) + print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet) + if args.full: + print "" + print "New Tables (%s):" % len(pushTables) + printIter(pushTables) + print "" + print "Persisting (%s):" % len(oldTableSet & newTableSet) + printIter(oldTableSet & newTableSet) + print "" + print "Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet) + printIter(revokedTableSet) print "\n" - print "Active Untouched Gbdbs (%s):" % len(oldGbdbSet) - printWithPath(oldGbdbSet, c, args) + #downlaodables = total - revoked + print "DOWNLOAD FILES:" + print "New: %s" % len(pushFiles - revokedFiles) + print "Persisting: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles) + print "Revoked/Replaced/Renamed: %s" % len(revokedFiles) + print "New + Persisting: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles)) + print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles) + if args.full: + print "" + print "New Download Files (%s):" % len(pushFiles - revokedFiles) + printWithPath((pushFiles - revokedFiles), c, args.releaseNew) + print "" + print "Persisting Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles) + printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args.releaseNew) + print "" + print "Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles) + printWithPath(revokedFiles, c, args.releaseNew) print "\n" - print "Revoked Objects (%s):" % len(revokedSet) - for i in sorted(revokedSet): - print i + print "GBDBS:" + print "New: %s" % len(pushGbdbs) + print "Persisting: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs) + print "Revoked/Replaced/Renamed: %s" % len(revokedGbdbs) + print "New + Persisting: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs)) + print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs) + if args.full: + print "" + print "New Gbdb Files (%s):" % len(pushGbdbs) + printGbdbPath(pushGbdbs, args.database) + print "" + print "Persisting Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs) + printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args.database) + print "" + print "Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs) + printGbdbPath(revokedGbdbs, args.database) print "\n" - print "Attic Objects (%s):" % len(atticSet) - for i in sorted(atticSet): - print i + print "SUPPLEMENTAL FILES:" + print "New: %s" % len(newSupplementalSet - oldSupplementalSet) + print "Persisting: %s" % len(oldSupplementalSet & newSupplementalSet) + print "Removed: %s" % len(oldSupplementalSet - newSupplementalSet) + print "New + Persisting: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet)) + print "Total: %s" % len(newSupplementalSet | oldSupplementalSet) + if args.full: + print "" + print "New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet) + printWithPath(newSupplementalSet - oldSupplementalSet, c, args.releaseNew) + print "" + print "Persisting Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet) + printWithPath(oldSupplementalSet & newSupplementalSet, c, args.releaseNew) + print "" + print "Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet) + printWithPath(oldSupplementalSet - newSupplementalSet, c, args.releaseNew) print "\n" - print "Unrevoked Objects (%s):" % len(unrevokedSet) - for i in sorted(unrevokedSet): - print i + print "OTHER FILES:" + print "New: %s" % len(additionalList) + print "Revoked/Replace: %s" % len(oldAdditionalList) + print "Total: %s" % len(additionalList | oldAdditionalList) + if args.full: + print "" + print "New Other Files (%s):" % len(additionalList) + printWithPath(additionalList, c, args.releaseNew) + print "" + print "Revoked Other Files (%s):" % len(oldAdditionalList) + printWithPath(oldAdditionalList, c, args.releaseNew) print "\n" - print "Un-attic Objects (%s):" % len(unatticSet) - for i in sorted(unatticSet): - print i + print "Files that dropped between releases (%s):" % len(missingFiles) + printWithPath(missingFiles, c, args.releaseOld) print "\n" + if not args.ignore: print "No Errors" -def printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, c, pushGbdbs, filesizes, atticSet, revokedSet): +def printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, c, atticSet, newSupplementalSet, tableSize): print "mkChangeNotes v2" - print "%s %s Release %s against Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld) + print "%s %s Release %s" % (args.database, args.composite, args.releaseNew) print "" - print "Totals:" - print "Files and Gbdbs: %d" % (int(len(totalFiles)) + int(len(newGbdbSet))) - print "Total size of files to be pushed: %d MB" % filesizes + print "QA Count Summaries for Release %s:" % args.releaseNew + print "Tables: %d" % int(len(newTableSet - revokedTables)) + print "Files: %d" % int(len(totalFiles - revokedFiles)) + print "Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs)) + print "Supplemental: %d" % int(len(newSupplementalSet)) + print "Other: %d" % int(len(additionalList)) print "" - print "Files: %d" % int(len(totalFiles)) - print "Gbdbs: %d" % int(len(newGbdbSet)) - print "Tables: %d" % int(len(newTableSet)) - print "Other Files: %d" % int(len(additionalList)) - print "Total Revoked: %d" % int(len(revokedSet)) - print "Total Attic: %d" % int(len(atticSet)) + print "REVOKED:" + print "Tables: %s" % len(revokedTables) + print "Files: %s" % len(revokedFiles) + print "Gbdbs: %s" % len(revokedGbdbs) print "\n" - sep = "\n" - print "New Tables (%s):" % len(pushTables) - print sep.join(sorted(pushTables)) + totalsize = 0; + print "Sizes of New:" + tableGb = int(tableSize / 1024) + if tableGb > 1: + print "Tables: %d MB (%d GB)" % (tableSize, tableGb) + else: + print "Tables: %d MB" % tableSize + totalsize = totalsize + tableSize + size = int(makeFileSizes(c, args, totalFiles)) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Files: %d MB (%d GB)" % (size, int(size/1024)) + else: + print "Files: %d MB" % size + size = int(makeFileSizes(c, args, newGbdbSet)) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024)) + else: + print "Gbdbs: %d MB" % size + size = int(makeFileSizes(c, args, newSupplementalSet)) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Supplemental: %d MB (%d GB)" % (size, int(size/1024)) + else: + print "Supplemental: %d MB" % size + size = int(makeFileSizes(c, args, (additionalList))) + totalsize = totalsize + size + if int(size/1024) > 1: + print "Other: %d MB" % size + else: + print "Other: %d MB" % size + if int(totalsize/1024) > 1: + print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)) + else: + print "Total: %d MB" % totalsize print "\n" - print "New Files (%s):" % len(pushFiles) - printWithPath(pushFiles, c, args) + if args.full: + print "" + print "New Tables (%s):" % len(newTableSet - revokedTables) + printIter(newTableSet - revokedTables) print "\n" - print "New Gbdbs (%s):" % len(pushGbdbs) - printWithPath(pushGbdbs, c, args) + print "New Download Files (%s):" % len(totalFiles - revokedFiles) + printWithPath(totalFiles - revokedFiles, c, args.releaseNew) print "\n" - print "Additional Files (%s):" % len(additionalList) - printWithPath(additionalList, c, args) + print "New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs) + printGbdbPath(newGbdbSet - revokedGbdbs, args.database) print "\n" - print "Revoked Objects (%s):" % len(revokedSet) - for i in sorted(revokedSet): - print i + print "New Supplemental Files (%s):" % len(newSupplementalSet) + printWithPath(newSupplementalSet, c, args.releaseNew) print "\n" - print "Attic Objects (%s):" % len(atticSet) - for i in sorted(atticSet): - print i + print "New Other Files (%s):" % len(additionalList) + printWithPath(additionalList, c, args.releaseNew) + print "\n" + print "Revoked Tables (%s):" %len(revokedTables) + printIter(revokedTables) + print "Revoked Files (%s):" % len(revokedFiles) + printWithPath(revokedFiles, c, args.releaseNew) print "\n" + print "Revoked Gbdbs (%s):" % len(revokedGbdbs) + printGbdbPath(revokedGbdbs, args.database) + print "\n" + if not args.ignore: print "No Errors" + def printErrors(errors): errorsDict = {} for i in errors: line = i.split(":", 1) try: errorsDict[line[0]].append(line[1]) except: errorsDict[line[0]] = [] errorsDict[line[0]].append(line[1]) print "Errors (%s):" % len(errors) for i in sorted(errorsDict.keys()): print "%s:" % i for j in sorted(errorsDict[i]): print "%s" % j def main(): parser = argparse.ArgumentParser( prog='mkChangeNotes', + formatter_class=argparse.RawDescriptionHelpFormatter, description='Writes out notes file for packing to QA', - epilog='example: encodeMkChange hg19 wgEncodeUwDnase 3 2' + epilog= +"""Examples: + +mkChangeNotes hg19 wgEncodeUwDnase 3 2 --loose +mkChangeNotes hg19 wgEncodeSydhTfbs 1 - --full +mkChangeNotes hg19 wgEncodeCshlLongRnaSeq 1 - + +""" ) parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') parser.add_argument('-l', '--loose', action="store_true", default=0, help='Loose checking for legacy elements. Will be retired once all tracks go through a release cycle') parser.add_argument('-i', '--ignore', action="store_true", default=0, help='Ignore errors, print out report.') + parser.add_argument('-f', '--full', action="store_true", default=0, help='Print full stats.') parser.add_argument('database', help='The database, typically hg19 or mm9') parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance') parser.add_argument('releaseNew', help='The new release to be released') - parser.add_argument('releaseOld', help='The old release that is already released') + parser.add_argument('releaseOld', nargs='?', default='-', help='The old release that is already released, if on release 1, or solo release mode, put anything here') if len(sys.argv) == 1: parser.print_help() return args = parser.parse_args(sys.argv[1:]) if not args.releaseNew.isdigit(): parser.print_help() return - c = track.CompositeTrack(args.database,args.composite) loose = args.loose errors = [] - if args.releaseOld == "-": - args.releaseOld = 0 - if int(args.releaseOld) > int(args.releaseNew): + if not args.releaseOld.isdigit(): + args.releaseOld = 'solo' + elif int(args.releaseOld) > int(args.releaseNew): errors.append("Old Release is higher than New Release") args.releaseOld = args.releaseNew + printErrors(errors) + return - if int(args.releaseNew) > 1: + + if int(args.releaseNew) > 1 and str(args.releaseOld) != 'solo': newReleaseFiles = c.releases[int(args.releaseNew)-1] oldReleaseFiles = c.releases[int(args.releaseOld)-1] newMdb = c.alphaMetaDb oldMdb = c.publicMetaDb - errors.extend(checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)) - errors.extend(checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose)) + #check if all files listed in release directories have associated metaDb entries + (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) + (oldMdb, spam, eggs, ham, oldSupplementalSet, oldFileErrors) = checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose) + errors.extend(newFileErrors) + errors.extend(oldFileErrors) + + #checks to see that nothing has disappeared between public and alpha errors.extend(checkAlphaForDropped(newMdb, oldMdb, "alpha metaDb", "stanza")) - errors.extend(checkAlphaForDropped(newReleaseFiles, oldReleaseFiles, "new release download directory", "file")) - errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles)) + missingFiles = checkFilesForDropped(newReleaseFiles, oldReleaseFiles) + errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles, loose)) - (newTableSet, newRevokedSet, newAtticSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose) + #checks and gets tables that are present, also returns a revoked set of tables for new + (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet) + (oldTableSet, spam, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose, revokedSet) errors.extend(newTableError) - (oldTableSet, oldRevokedSet, oldAtticSet, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose) errors.extend(oldTableError) - (newGbdbSet, newGbdbError) = getGbdbFiles(args.database, newTableSet, newMdb) + #same as above except for gbdbs + (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb) + (oldGbdbSet, eggs, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, set(), oldMdb) errors.extend(newGbdbError) - (oldGbdbSet, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, oldMdb) errors.extend(oldGbdbError) - droppedTables = oldTableSet - newTableSet - if droppedTables: - for i in droppedTables: - errors.append("table: %s was dropped between releases" % i) - - -######### - #some weird suggestion from online about python 3 not being able to compare strings to ints implicitly, - #here for future reference in case something breaks - #pushTables = sorted((newTableSet - oldTableSet), key=lambda item: (int(item.partition(' ')[0]) - # if item[0].isdigit() else float('inf'), item)) - #pushFiles = sorted((newReleaseFiles - oldReleaseFiles), key=lambda item: (int(item.partition(' ')[0]) - # if item[0].isdigit() else float('inf'), item)) - #pushGbdbs = sorted((newGbdbSet - oldGbdbSet), key=lambda item: (int(item.partition(' ')[0]) - # if item[0].isdigit() else float('inf'), item)) -######### - - pushTables = sorted((newTableSet - oldTableSet)) - pushFiles = sorted((set(newReleaseFiles) - set(oldReleaseFiles))) - pushGbdbs = sorted((newGbdbSet - oldGbdbSet)) + #for ease of typing totalFiles = set(newReleaseFiles) - (pushFiles, totalFiles) = cleanSpecialFiles(pushFiles, totalFiles) - (oldReleaseFiles, totalFiles) = cleanSpecialFiles(set(oldReleaseFiles), totalFiles) - (oldReleaseFiles, pushFiles, additionalList) = separateOutAdditional(oldReleaseFiles, totalFiles, pushFiles) + #these could honestly be moved earlier, get a file list processing section or something + #they clean out special fiels out and separated the master fiels list into the 3 required + #ones: wgEncode, supplemental and additional. + totalFiles = cleanSpecialFiles(totalFiles) + oldReleaseFiles = cleanSpecialFiles(set(oldReleaseFiles)) + (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet) - filesizes = makeFileSizes(c, args, pushFiles, pushGbdbs, additionalList) + #get the stuff you need to push, also table sizes + pushTables = set(sorted((newTableSet - oldTableSet))) + tableSize = getTableSize(pushTables, args.database) + pushFiles = set(sorted((totalFiles - oldReleaseFiles))) + pushGbdbs = set(sorted((newGbdbSet - oldGbdbSet))) + #don't print report unless ignore option is on or no errors if (not errors) or args.ignore: - printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, filesizes, newAtticSet, oldAtticSet, newRevokedSet, oldRevokedSet) - + printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, newMdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize) else: printErrors(errors) - else: + elif args.releaseOld == 'solo': - args.releaseOld = 0 newReleaseFiles = c.releases[int(args.releaseNew)-1] + newMdb = c.alphaMetaDb - errors.extend(checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)) - (newTableSet, newRevokedSet, newAtticSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose) + + (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose) + errors.extend(newFileErrors) + + (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet) errors.extend(newTableError) - (newGbdbSet, newGbdbError) = getGbdbFiles(args.database, newTableSet, newMdb) + + tableSize = getTableSize(newTableSet, args.database) + + (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb) errors.extend(newGbdbError) + #set for easy operations totalFiles = set(newReleaseFiles) + #clean out special fiels we don't push i.e. md5sum.history - (pushFiles, totalFiles) = cleanSpecialFiles(totalFiles, totalFiles) + totalFiles = cleanSpecialFiles(totalFiles) + #makes list for additional files - (spam, pushFiles, additionalList) = separateOutAdditional(set(), totalFiles, pushFiles) - #makes files sizes - filesizes = makeFileSizes(c, args, pushFiles, newGbdbSet, additionalList) + (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(set(), totalFiles, newSupplementalSet, set()) if (not errors) or args.ignore: - printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, newTableSet, pushFiles, c, newGbdbSet, filesizes, newAtticSet, newRevokedSet) + printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTableSet, additionalList, c, atticSet, newSupplementalSet, tableSize) else: printErrors(errors) if __name__ == '__main__': main()