2b53d2febf119293dabddfe67864168dc44f33b0
wong
  Thu Oct 27 14:39:45 2011 -0700
refactored a bit, removed some random newlines in output
diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py
index ceae085..562990f 100644
--- python/lib/ucscgenomics/mkChangeNotes.py
+++ python/lib/ucscgenomics/mkChangeNotes.py
@@ -1,651 +1,573 @@
 #!/hive/groups/encode/dcc/bin/python
 import sys, os, re, argparse, subprocess, math
 from ucscgenomics import ra, track, qa
 
 class makeNotes(object):
     def checkMetaDbForFiles(self, status, state):
         if state == 'new':
             (mdb, files, loose) = (self.newMdb, self.newReleaseFiles, self.loose)
         elif state == 'old':
             (mdb, files, loose) = (self.oldMdb, self.oldReleaseFiles, self.loose)
 
         errors = []
         revokedset = set()
         revokedfiles = set()
         atticset = set()
         supplementalset = set()
         filtermdb = ra.RaFile()
 
         for i in files:
             if re.match('supplemental', i):
                 supplementalset.add(i)
             if not re.match('wgEncode.*', i):
                 continue
 
             filestanza = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s)
             #should only return 1, just in case
             if filestanza:
                 for j in filestanza:
                     filtermdb[j.name] = j
                     if 'objStatus' in j and re.search('revoked|replaced|renamed', j['objStatus']):
                         revokedfiles.add(i)
                         revokedset.add(j.name)
                     if 'attic' in j:
                         atticset.add(j.name)
             else:
                 #pass
                 if loose and re.match('.*bai', i):
                     pass
                 else:
                     errors.append("metaDb: %s is not mentioned in %s" % (i, status))
 
         return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors)
 
     def __checkAlphaForDropped(self, status, type):
-        (new, old) = (self.newMdb, self.oldMdb)
         errors=[]
-        diff = set(old) -set(new)
+        diff = set(self.oldMdb) -set(self.newMdb)
         for i in diff:
             errors.append("%s: %s missing from %s" % (type, i, status))
         return errors
 
     def __checkFilesForDropped(self):
-        (new, old) = (self.newReleaseFiles, self.oldReleaseFiles)
-        diff = set(old) - set(new)
+        diff = set(self.oldReleaseFiles) - set(self.newReleaseFiles)
         return diff
 
     def checkTableStatus(self, status, state):
         errors=[]
         revokedset = set()
         (database, composite, loose) = (self.database, self.composite, self.loose)
         if state == 'new':
             (mdb, files, revokedset) = (self.newMdb, self.newReleaseFiles, self.revokedSet)
         elif state == 'old':
             (mdb, files) = (self.oldMdb, self.oldReleaseFiles)
+
+### If MySQLdb ever gets installed ###
+
         #home = os.environ['HOME']
         #dbhost = ''
         #dbuser = ''
         #dbpassword = ''
         #p = re.compile('db.(\S+)=(\S+)')
         #with open("%s/.hg.conf" % home) as f:
         #    for line in f:
         #        line.rstrip("\n\r")
         #        if p.match(line):
         #            m = p.match(line)
         #            if m.groups(1)[0] == 'host':
         #                dbhost = m.groups(1)[1]
         #            if m.groups(1)[0] == 'user':
         #                dbuser = m.groups(1)[1]
         #            if m.groups(1)[0] == 'password':
         #                dbpassword = m.groups(1)[1]
 
         #db = MySQLdb.connect (host = dbhost,
         #            user = dbuser,
         #            passwd = dbpassword,
         #            db = database)
 
         #cursor = db.cursor ()
         #cursor.execute ("show tables like '%s%s'" % (composite, "%"))
         #tableset = set(cursor.fetchall())
 
-        mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject']))
-        mdbtableset = mdbtableset - revokedset
-        mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbtableset, lambda s: s['tableName']))
+
+### END ###
+
+        mdbobjectset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject'])) - revokedset
+        mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbobjectset, lambda s: s['tableName']))
         revokedtableset = set(mdb.filter(lambda s: s['metaObject'] in revokedset, lambda s: s['tableName']))
         sep = "','"
         tablestr = sep.join(mdbtableset)
         tablestr = "'" + tablestr + "'"
 
         #this should really be using python's database module, but I'd need admin access to install it
         #at this point, I am just parsing the output form hgsql
         cmd = "hgsql %s -e \"select table_name from information_schema.TABLES where table_name in (%s)\"" % (database, tablestr)
         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
         cmdoutput = p.stdout.read()
-
         sqltableset = set(cmdoutput.split("\n")[1:-1])
 
-        missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject']))
-
-        missingFromDb = mdbtableset - sqltableset
 
+        missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject']))
         if missingTableNames:
             for i in missingTableNames:
                 errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status))
-
+        missingFromDb = mdbtableset - sqltableset
         if missingFromDb:
             for i in missingFromDb:
                 errors.append("table: %s table not found in Db called by %s" % (i, status))
 
+
+
         return (mdbtableset, revokedtableset, errors)
 
-    def __checkGbdbFileStatus(self, i, set, errors):
+    def __checkGbdbFileStatus(self, i, set, errors, state):
         filelist = i['fileName'].split(',')
         for j in filelist:
-            if os.path.isfile("/gbdb/%s/bbi/%s" % (self.database, j)):
+            if os.path.isfile("%s/%s" % (self.gbdbPath, j)):
                 set.add(j)
             else:
                 cmd = "hgsql %s -e \"select fileName from (%s)\"" % (self.database, i['tableName'])
                 p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
                 cmdoutput = p.stdout.read()
                 if os.path.isfile(cmdoutput.split("\n")[1]):
                     set.add(j)
                 else:
-                    errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, self.database))
+                    errors.append("gbdb: %s%s does not exist in %s" % (state, j, self.gbdbPath))
         return set, errors
 
     def getGbdbFiles(self, state):
         revokedset = set()
         if state == 'new':
             (tableset, revokedset, mdb) = (self.newTableSet, self.revokedSet, self.newMdb)
         elif state == 'old':
             (tableset, mdb) = (self.oldTableSet, self.oldMdb)
 
         errors = []
 
         gbdbtableset = qa.getGbdbTables(self.database, tableset)
 
         revokedtableset = qa.getGbdbTables(self.database, revokedset)
 
-        file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s)
-        revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s)
+        filestanzas = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s)
+        revokedstanzas = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s)
+
         gbdbfileset = set()
         revokedfileset = set()
 
-        for i in file1stanzalist:
-            (gbdbfileset, errors) = self.__checkGbdbFileStatus(i, gbdbfileset, errors)
+        for i in filestanzas:
+            (gbdbfileset, errors) = self.__checkGbdbFileStatus(i, gbdbfileset, errors, "")
 
-        for i in revokedstanzalist:
-            (revokedfileset, errors) = self.__checkGbdbFileStatus(i, revokedfileset, errors)
+        for i in revokedstanzas:
+            (revokedfileset, errors) = self.__checkGbdbFileStatus(i, revokedfileset, errors, "revoked gbdb ")
 
         return (gbdbfileset, revokedfileset, errors)
 
     def __getTableSize(self):
         (mdbtableset, database) = (self.newTableSet, self.database)
+
         tablesize = float(0)
         tablelist = list()
+
         for i in mdbtableset:
             tablelist.append("table_name = '%s'" % i)
+
         orsep = " OR "
         orstr = orsep.join(tablelist)
 
         cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE %s\"" % (database, orstr)
         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
         cmdoutput = p.stdout.read()
+
         for i in cmdoutput.split("\n")[1:-1]:
             fields = i.split()
             for j in fields:
                 tablesize = tablesize + float(j)
+
         return math.ceil(tablesize)
 
     def __checkMd5sums(self):
         (newfiles, oldfiles, loose) = (self.newReleaseFiles, self.oldReleaseFiles, self.loose)
         errors = []
         for i in oldfiles:
             if i not in newfiles:
                 pass
             elif re.match('wgEncode.*', i):
                 if oldfiles[i].md5sum != newfiles[i].md5sum:
                     errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum))
         if loose:
             return list()
         else:
             return errors
 
-    def __makeFileSizes(self, c, args, inlist):
+    def __makeFileSizes(self, args, inlist):
         checklist = list()
+
         for i in inlist:
-            checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args['releaseNew'], i))    
+            checklist.append("%s/%s" % (self.releasePath, i))    
+
         filesizes = 0
         for i in checklist:
             realpath = os.path.realpath(i)
             filesizes = filesizes + int(os.path.getsize(realpath))
 
         filesizes = math.ceil(float(filesizes) / (1024**2))
+
         return int(filesizes)
 
     def __cleanSpecialFiles(self, inlist):
         specialRemoveList = ['md5sum.history']
         for i in specialRemoveList:
             if i in inlist:
                 inlist.remove(i)
 
         return(inlist)
 
     def __separateOutAdditional(self):
         (oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet) = (self.oldTotalFiles, self.totalFiles, self.newSupplementalSet, self.oldSupplementalSet)
         additionalList = set()
         oldAdditionalList = set()
         newTotal = set()
         newOld = set()
         for i in totalFiles:
             if i in newSupplementalSet:
                 continue
             elif not re.match('wgEncode.*', i):
                 additionalList.add(i)
             else:
                 newTotal.add(i)
         for i in oldReleaseFiles:
             if not re.match('wgEncode.*', i):
                 if i in totalFiles:
                     pass
                 elif i in newSupplementalSet:
                     continue
                 else:
                     oldAdditionalList.add(i)
             else:
                 newOld.add(i)
 
         oldReleaseFiles = newOld
 
         return(newOld, additionalList, oldAdditionalList, newTotal)
 
-    def __printWithPath(self, set, c, release):
+    def __printIter(self, set, path):
         output = []
         for i in sorted(set):
-            output.append("%s/%s" % (c.httpDownloadsPath + 'release' + release, i))
-        return output
-    def __printGbdbPath(self, set, database):
-        output = []
-        for i in sorted(set):
-            output.append("/gbdb/%s/bbi/%s" % (database, i))
+            if path:
+                output.append("%s/%s" % (path, i))
+            else:
+                output.append("%s" % (i))
         return output
 
-    def __printIter(self, inlist):
+
+    def __printSize(self, size, output, totalsize, type):
+
+        sizeGb = int(size/1024)
+        if sizeGb > 1:
+            output.append("%s: %d MB (%d GB)" % (type, size, sizeGb))
+        else:
+            output.append("%s: %d MB" % (type, size))
+
+        totalsize = totalsize + size
+
+        return (output, totalsize)
+
+    def __printSection(self, new, untouched, revoked, all, title, path, summary):
         output = []
-        for i in sorted(inlist):
-            output.append(i)
+        removeline = "Revoked/Replaced/Renamed"
+        totaline = "Total (New + Untouched + Revoked/Replaced/Renamed)"
+        caps = title.upper()
+        if title == "supplemental":
+            removeline = "Removed"
+            totaline = "Total"
+            title = title + " files"
+            caps = title.upper()
+        elif title == 'gbdbs':
+            caps = "GBDBS"
+            title = "gbdb files"
+        elif title == "download":
+            title = title + " files"
+            caps = title.upper()
+        if all:
+            output.append("\n")
+            output.append("%s:" % caps)
+            output.append("New: %s" % len(new))
+            output.append("Untouched: %s" % len(untouched))
+            output.append("%s: %s" % (removeline, len(revoked)))
+            output.append("New + Untouched: %s" % len(new | untouched))
+            output.append("%s: %s" % (totaline, len(all)))
+        if all and not summary:
+            output.append("")
+            output.append("New %s (%s):" % (title.title(), len(new)))
+            output.extend(self.__printIter(new, path))
+            output.append("")
+            output.append("Untouched %s (%s):" % (title.title(), len(untouched)))
+            output.extend(self.__printIter(untouched, path))
+            output.append("")
+            output.append("%s %s (%s):" % (removeline, title.title(), len(revoked)))
+            output.extend(self.__printIter(revoked, path))
         return output
 
-    def printReport(self, args, c):
-        (totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize) = (self.totalFiles, self.newGbdbSet, self.newTableSet, self.additionalList, self.oldAdditionalList, self.pushTables, self.pushFiles, self.pushGbdbs, self.oldTableSet, self.oldTotalFiles, self.oldGbdbSet, self.atticSet, self.revokedFiles, self.revokedTableSet, self.revokedGbdbs, self.missingFiles, self.newSupplementalSet, self.oldSupplementalSet, self.tableSize)
-        #the groups here need to be predefined, I just copied and pasted after working out what they were
-        sep = "\n"
+    def __qaHeader(self, output, newTableSet, filesNoRevoke, newGbdbSet, newSupp, additionalList, revokedTables, revokedFiles, revokedGbdbs, pushFiles, pushGbdbs, args, c):
         output = []
+        tableSize = self.__getTableSize()
+
         output.append("mkChangeNotes v2")
+        if (args['releaseOld'] == "solo"):
+            output.append("%s %s Release %s" % (args['database'], args['composite'], args['releaseNew']))
+        else:
         output.append("%s %s Release %s vs Release %s" % (args['database'], args['composite'], args['releaseNew'], args['releaseOld']))
         output.append("")
         output.append("QA Count Summaries for Release %s:" % args['releaseNew'])
         output.append("Tables: %d" % int(len(newTableSet)))
-        output.append("Files: %d" % int(len(totalFiles - revokedFiles)))
+        output.append("Files: %d" % int(len(filesNoRevoke)))
         output.append("Gbdbs: %d" % int(len(newGbdbSet)))
-        output.append("Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet)))
+        output.append("Supplemental: %d" % int(len(newSupp)))
         output.append("Other: %d" % int(len(additionalList)))
-        output.append("\n")
+        output.append("")
+        output.append("REVOKED:")
+        output.append("Tables: %s" % len(revokedTables))
+        output.append("Files: %s" % len(revokedFiles))
+        output.append("Gbdbs: %s" % len(revokedGbdbs))
+        output.append("")
         output.append("Sizes of New:")
 
         totalsize = 0
-        size = 0
-        tableGb = int(tableSize/1024)
-        totalsize = totalsize + tableSize
-        if tableGb > 1:
-            output.append("Tables: %d MB (%d GB)" % (tableSize, tableGb))
-        else:
-            output.append("Tables: %d MB" % tableSize)
-
-        size = int(self.__makeFileSizes(c, args, pushFiles))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Files: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Files: %d MB" % size)
         
-        size = int(self.__makeFileSizes(c, args, pushGbdbs))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Gbdbs: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Gbdbs: %d MB" % size)
-        
-        size = int(self.__makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet)))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Supplemental: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Supplemental: %d MB" % size)
+        (output, totalsize) = self.__printSize(tableSize, output, totalsize, "Table")
+        (output, totalsize) = self.__printSize(int(self.__makeFileSizes(args, pushFiles)), output, totalsize, "Files")
+        (output, totalsize) = self.__printSize(int(self.__makeFileSizes(args, pushGbdbs)), output, totalsize, "Gbdbs")
+        (output, totalsize) = self.__printSize(int(self.__makeFileSizes(args, newSupp)), output, totalsize, "Supplemental")
+        (output, totalsize) = self.__printSize(int(self.__makeFileSizes(args, additionalList)), output, totalsize, "Other")
+        (output, totalsize) = self.__printSize(totalsize, output, 0, "Total")
         
-        size = int(self.__makeFileSizes(c, args, (additionalList)))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Other: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Other: %d MB" % size)
+        return output
       
-        if int(totalsize/1024) > 1:
-            output.append("Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)))
-        else:
-            output.append("Total: %d MB" % totalsize)
+    def printReport(self, args, c):
+        (totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet) = (self.totalFiles, self.newGbdbSet, self.newTableSet, self.additionalList, self.oldAdditionalList, self.oldTableSet, self.oldTotalFiles, self.oldGbdbSet, self.atticSet, self.revokedFiles, self.revokedTableSet, self.revokedGbdbs, self.missingFiles, self.newSupplementalSet, self.oldSupplementalSet)
+        #the groups here need to be predefined, I just copied and pasted after working out what they were
+        sep = "\n"
+        output = []
+        pushTables = set(sorted((self.newTableSet - self.oldTableSet)))
+        pushFiles = set(sorted((self.totalFiles - self.oldTotalFiles)))
+        pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet)))
+        filesNoRevoke = totalFiles - revokedFiles
+        allTables = newTableSet | oldTableSet | revokedTableSet
+        untouchedTables = oldTableSet & newTableSet
+        allFiles = totalFiles | oldReleaseFiles | revokedFiles
+        newFiles = pushFiles - revokedFiles
+        untouchedFiles = (totalFiles & oldReleaseFiles) - revokedFiles
+        allGbdbs = newGbdbSet | oldGbdbSet | revokedGbdbs
+        untouchedGbdbs = (newGbdbSet & oldGbdbSet) - revokedGbdbs
+        allSupp = newSupplementalSet | oldSupplementalSet
+        newSupp = newSupplementalSet - oldSupplementalSet
+        removedSupp = oldSupplementalSet - newSupplementalSet
+        untouchedSupp = oldSupplementalSet & newSupplementalSet
+        allOther = additionalList | oldAdditionalList
+        removedOther = oldAdditionalList - additionalList
+
+
+        output.extend(self.__qaHeader(output, newTableSet, filesNoRevoke, newGbdbSet, newSupp, additionalList, revokedTableSet, revokedFiles, revokedGbdbs, pushFiles, pushGbdbs, args, c))
+
+        output.extend(self.__printSection(pushTables, untouchedTables, revokedTableSet, allTables, "tables", 0, args['summary']))
+        output.extend(self.__printSection(newFiles, untouchedFiles, revokedFiles, allFiles, "download", self.releasePath, args['summary']))
+        output.extend(self.__printSection(pushGbdbs, untouchedGbdbs, revokedGbdbs, allGbdbs, "gbdbs", self.gbdbPath, args['summary']))
+        output.extend(self.__printSection(newSupp, untouchedSupp, removedSupp, allSupp, "supplemental", self.releasePath, args['summary']))
 
-        tableprint = len(newTableSet | oldTableSet | revokedTableSet)
         self.newTables = set(pushTables)
-        if tableprint:
-            output.append("\n")
-            output.append("TABLES:")
-            output.append("New: %s" % len(pushTables))
-            output.append("Untouched: %s" % len(oldTableSet & newTableSet))
-            output.append("Revoked/Replaced/Renamed: %s" % len(revokedTableSet))
-            output.append("New + Untouched: %s" % len(newTableSet))
-            output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet))
-        if tableprint and not args['summary']:
-            output.append("")
-            output.append("New Tables (%s):" % len(pushTables))
-            output.extend(self.__printIter(pushTables))
-            output.append("")
-            output.append("Untouched (%s):" % len(oldTableSet & newTableSet))
-            output.extend(self.__printIter(oldTableSet & newTableSet))
-            output.append("")
-            output.append("Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet))
-            output.extend(self.__printIter(revokedTableSet))
-
-        dlprint = len(totalFiles | oldReleaseFiles | revokedFiles)
-        self.newFiles = set(self.__printWithPath((pushFiles - revokedFiles), c, args['releaseNew']))
-        if dlprint:
-            output.append("\n")
-            #downlaodables = total - revoked
-            output.append("DOWNLOAD FILES:")
-            output.append("New: %s" % len(pushFiles - revokedFiles))
-            output.append("Untouched: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles))
-            output.append("Revoked/Replaced/Renamed: %s" % len(revokedFiles))
-            output.append("New + Untouched: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles)))
-            output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles))
-        if dlprint and not args['summary']:
-            output.append("")
-            output.append("New Download Files (%s):" % len(pushFiles - revokedFiles))
-            output.extend(sorted(list(self.newFiles)))
-            output.append("")
-            output.append("Untouched Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles))
-            output.extend(self.__printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args['releaseNew']))
-            output.append("")
-            output.append("Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles))
-            output.extend(self.__printWithPath(revokedFiles, c, args['releaseNew']))
-
-        gbdbprint = len(newGbdbSet | oldGbdbSet | revokedGbdbs) 
-        self.newGbdbs = set(self.__printGbdbPath(pushGbdbs, args['database']))
-        if gbdbprint:
-            output.append("\n")
-            output.append("GBDBS:")
-            output.append("New: %s" % len(pushGbdbs))
-            output.append("Untouched: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs))
-            output.append("Revoked/Replaced/Renamed: %s" % len(revokedGbdbs))
-            output.append("New + Untouched: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs)))
-            output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs))
-        if gbdbprint and not args['summary']:
-            output.append("")
-            output.append("New Gbdb Files (%s):" % len(pushGbdbs))
-            output.extend(sorted(list(self.newGbdbs)))
-            output.append("")
-            output.append("Untouched Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs))
-            output.extend(self.__printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args['database']))
-            output.append("")
-            output.append("Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs))
-            output.extend(self.__printGbdbPath(revokedGbdbs, args['database']))
+        self.newFiles = set(self.__printIter(newFiles, self.releasePath))
+        self.newGbdbs = set(self.__printIter(pushGbdbs, self.gbdbPath))
+        self.newSupplemental = set(self.__printIter(newSupp, self.releasePath))
+        self.newOthers = set(self.__printIter(additionalList, self.releasePath))
             
-        supplementalprint = len(newSupplementalSet | oldSupplementalSet)
-        self.newSupplemental = set(self.__printWithPath(newSupplementalSet - oldSupplementalSet, c, args['releaseNew']))
-        if supplementalprint:
-            output.append("\n")
-            output.append("SUPPLEMENTAL FILES:")
-            output.append("New: %s" % len(newSupplementalSet - oldSupplementalSet))
-            output.append("Untouched: %s" % len(oldSupplementalSet & newSupplementalSet))
-            output.append("Removed: %s" % len(oldSupplementalSet - newSupplementalSet))
-            output.append("New + Untouched: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet)))
-            output.append("Total: %s" % len(newSupplementalSet | oldSupplementalSet))
-        if supplementalprint and not args['summary']:
-            output.append("")
-            output.append("New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet))
-            output.extend(sorted(list(self.newSupplemental)))
-            output.append("")
-            output.append("Untouched Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet))
-            output.extend(self.__printWithPath(oldSupplementalSet & newSupplementalSet, c, args['releaseNew']))
-            output.append("")
-            output.append("Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet))
-            output.extend(self.__printWithPath(oldSupplementalSet - newSupplementalSet, c, args['releaseNew']))
-            
-        otherprint = len(additionalList | oldAdditionalList)
-        self.newOthers = set(self.__printWithPath(additionalList, c, args['releaseNew']))
+        otherprint = len(allOther)
         if otherprint:
             output.append("\n")
             output.append("OTHER FILES:")
             output.append("New: %s" % len(additionalList))
-            output.append("Revoked/Replace: %s" % len(oldAdditionalList - additionalList))
-            output.append("Total: %s" % len(additionalList | oldAdditionalList))
+            output.append("Revoked/Replace: %s" % len(removedOther))
+            output.append("Total: %s" % len(allOther))
         if otherprint and not args['summary']:
             output.append("")
             output.append("New Other Files (%s):" % len(additionalList))
             output.extend(sorted(list(self.newOthers)))
             output.append("")
-            output.append("Revoked Other Files (%s):" % len(oldAdditionalList - additionalList))
-            output.extend(self.__printWithPath((oldAdditionalList - additionalList), c, args['releaseNew']))
+            output.append("Revoked Other Files (%s):" % len(removedOther))
+            output.extend(self.__printIter((removedOther), self.releasePath))
         output.append("\n")
         
         if len(missingFiles):
             output.append("Files that dropped between releases (%s):" % len(missingFiles))
-            output.extend(self.__printWithPath(missingFiles, c, args['releaseOld']))
+            output.extend(self.__printIter(missingFiles, self.releasePath))
             output.append("\n")
         
         if not args['ignore']:
             output.append("No Errors")
         return output
 
+    def __printSectionOne(self, output, set, title):
+        output = []
+        if set:
+            output.append("%s (%s):" % (title, len(set)))
+            output.extend(sorted(list(set)))
+            output.append("\n")
+        return output
+
     def printReportOne(self, args, c):
         (totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, atticSet, newSupplementalSet, tableSize) = (self.totalFiles, self.revokedFiles, self.newGbdbSet, self.revokedGbdbs, self.newTableSet, self.revokedTableSet, self.additionalList, self.atticSet, self.newSupplementalSet, self.tableSize)
         output = []
-        output.append("mkChangeNotes v2")
-        output.append("%s %s Release %s" % (args['database'], args['composite'], args['releaseNew']))
-        output.append("")
-        output.append("QA Count Summaries for Release %s:" % args['releaseNew'])
-        output.append("Tables: %d" % int(len(newTableSet - revokedTables)))
-        output.append("Files: %d" % int(len(totalFiles - revokedFiles)))
-        output.append("Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs)))
-        output.append("Supplemental: %d" % int(len(newSupplementalSet)))
-        output.append("Other: %d" % int(len(additionalList)))
-        output.append("")
-        output.append("REVOKED:")
-        output.append("Tables: %s" % len(revokedTables))
-        output.append("Files: %s" % len(revokedFiles))
-        output.append("Gbdbs: %s" % len(revokedGbdbs))
-        output.append("\n")
-        totalsize = 0;
-        output.append("Sizes of New:")
-        tableGb = int(tableSize / 1024)
-        if tableGb > 1:
-            output.append("Tables: %d MB (%d GB)" % (tableSize, tableGb))
-        else:
-            output.append("Tables: %d MB" % tableSize)
-        totalsize = totalsize + tableSize
-        size = int(self.__makeFileSizes(c, args, totalFiles - revokedFiles))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Files: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Files: %d MB" % size)
-        size = int(self.__makeFileSizes(c, args, newGbdbSet - revokedGbdbs))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Gbdbs: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Gbdbs: %d MB" % size)
-        size = int(self.__makeFileSizes(c, args, newSupplementalSet))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Supplemental: %d MB (%d GB)" % (size, int(size/1024)))
-        else:
-            output.append("Supplemental: %d MB" % size)
-        size = int(self.__makeFileSizes(c, args, (additionalList)))
-        totalsize = totalsize + size
-        if int(size/1024) > 1:
-            output.append("Other: %d MB" % size)
-        else:
-            output.append("Other: %d MB" % size)
-        if int(totalsize/1024) > 1:
-            output.append("Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)))
-        else:
-            output.append("Total: %d MB" % totalsize)
-        output.append("\n")
-        self.newTables = set(self.__printIter(newTableSet - revokedTables))
-        self.newFiles = set(self.__printWithPath(totalFiles - revokedFiles, c, args['releaseNew']))
-        self.newGbdbs = set(self.__printGbdbPath(newGbdbSet - revokedGbdbs, args['database']))
-        self.newSupplemental = set(self.__printWithPath(newSupplementalSet, c, args['releaseNew']))
-        self.newOthers = set(self.__printWithPath(additionalList, c, args['releaseNew']))
+        newTables = newTableSet - revokedTables
+        newFiles = totalFiles - revokedFiles
+        newGbdbs = newGbdbSet - revokedGbdbs
+
+        output.extend(self.__qaHeader(output, newTables, newFiles, newGbdbSet, newSupplementalSet, additionalList, revokedTables, revokedFiles, revokedGbdbs, totalFiles, newGbdbSet, args, c))
+        self.newTables = set(newTables)
+        self.newFiles = set(self.__printIter(newFiles, self.releasePath))
+        self.newGbdbs = set(self.__printIter(newGbdbs, self.releasePath))
+        self.newSupplemental = set(self.__printIter(newSupplementalSet, self.releasePath))
+        self.newOthers = set(self.__printIter(additionalList, self.releasePath))
+
         if not args['summary']:
             output.append("")
-            if len(newTableSet - revokedTables):
-                output.append("New Tables (%s):" % len(self.newTables))
-                output.extend(sorted(list(self.newTables)))
-                output.append("\n")
-            if len(totalFiles - revokedFiles):
-                output.append("New Download Files (%s):" % len(self.newFiles))
-                output.extend(sorted(list(self.newFiles)))
-                output.append("\n")
-            if len(newGbdbSet - revokedGbdbs):
-                output.append("New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs))
-                output.extend(sorted(list(self.newGbdbs)))
-                output.append("\n")
-            if len(newSupplementalSet):
-                output.append("New Supplemental Files (%s):" % len(newSupplementalSet))
-                output.extend(sorted(list(self.newSupplemental)))
-                output.append("\n")
-            if len(additionalList):
-                output.append("New Other Files (%s):" % len(additionalList))
-                output.extend(sorted(list(self.newOthers)))
-                output.append("\n")
-            if len(revokedTables):
-                output.append("Revoked Tables (%s):" % len(revokedTables))
-                output.extend(self.__printIter(revokedTables))
-                output.append("\n")
-            if len(revokedFiles):
-                output.append("Revoked Files (%s):" % len(revokedFiles))
-                output.extend(self.__printWithPath(revokedFiles, c, args['releaseNew']))
-                output.append("\n")
-            if len(revokedGbdbs):
-                output.append("Revoked Gbdbs (%s):" % len(revokedGbdbs))
-                output.extend(self.__printGbdbPath(revokedGbdbs, args['database']))
-                output.append("\n")
+            output.extend(self.__printSectionOne(output, self.newTables, "New Tables"))
+            output.extend(self.__printSectionOne(output, self.newFiles, "New Download Files"))
+            output.extend(self.__printSectionOne(output, self.newGbdbs, "New Gbdb Files"))
+            output.extend(self.__printSectionOne(output, self.newSupplemental, "New Supplemental Files"))
+            output.extend(self.__printSectionOne(output, self.newOthers, "New Other Files"))
+            output.extend(self.__printSectionOne(output, self.__printIter(revokedTables, 0), "Revoked Tables"))
+            output.extend(self.__printSectionOne(output, self.__printIter(revokedFiles, self.releasePath), "Revoked Files"))
+            output.extend(self.__printSectionOne(output, self.__printIter(revokedGbdbs, self.gbdbPath), "Revoked Gbdbs"))
         if not args['ignore']:
             output.append("No Errors")
         return output
 
     def printErrors(self, errors):
         errorsDict = {}
         output = []
         for i in errors:
             line = i.split(":", 1)
             try:
                 errorsDict[line[0]].append(line[1])
             except:
                 errorsDict[line[0]] = []
                 errorsDict[line[0]].append(line[1])
         output.append("Errors (%s):" % len(errors))
         for i in sorted(errorsDict.keys()):
             output.append("%s:" % i)
             for j in sorted(errorsDict[i]):
                 output.append("%s" % j)
         return output
 
     def __init__(self, args):
         self.releaseNew = args['releaseNew']
         self.releaseOld = args['releaseOld']
         self.database = args['database']
         self.composite = args['composite']
         self.loose = args['loose']
         self.ignore = args['ignore']
         self.summary = args['summary']
         self.specialMdb = args['specialMdb']
         self.args = args
 
         errors = []
         c = track.CompositeTrack(self.database, self.composite, None, self.specialMdb)
 
         #sanitize arguments
         if not self.releaseOld.isdigit():
             self.releaseOld = 'solo'
         elif int(self.releaseOld) <= 0:
             self.releaseOlf = 'solo'
         elif self.releaseOld > self.releaseNew:
             self.releaseOld = 'solo'
 
+        self.releasePath = c.httpDownloadsPath + 'release' + args['releaseNew']
+        self.gbdbPath = "/gbdb/%s/bbi" % args['database']
         if int(self.releaseNew) > 1 and str(self.releaseOld) != 'solo':
 
             self.newReleaseFiles = c.releases[int(self.releaseNew)-1]
             self.oldReleaseFiles = c.releases[int(self.releaseOld)-1]
 
             self.newMdb = c.alphaMetaDb
             self.oldMdb = c.publicMetaDb
 
+            #make a list of missing files
+            self.missingFiles = self.__checkFilesForDropped()
+            
             #check if all files listed in release directories have associated metaDb entries
             (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
             (self.oldMdb, spam, eggs, ham, self.oldSupplementalSet, oldFileErrors) = self.checkMetaDbForFiles("public metaDb", "old")
-            errors.extend(newFileErrors)
-            errors.extend(oldFileErrors)
 
             #checks to see that nothing has disappeared between public and alpha
             errors.extend(self.__checkAlphaForDropped("alpha metaDb", "stanza"))
-            self.missingFiles = self.__checkFilesForDropped()
             errors.extend(self.__checkMd5sums())
 
             #checks and gets tables that are present, also returns a revoked set of tables for new
             (self.newTableSet, self.revokedTableSet, newTableError) = self.checkTableStatus("alpha metaDb", "new")
             (self.oldTableSet, spam, oldTableError) = self.checkTableStatus("public metaDb", "old")
-            errors.extend(newTableError)
-            errors.extend(oldTableError)
 
             #same as above except for gbdbs
             (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new")
             (self.oldGbdbSet, eggs, oldGbdbError) = self.getGbdbFiles("old")
+
+            #fill in the errors
+            errors.extend(newFileErrors)
+            errors.extend(oldFileErrors)
+            errors.extend(newTableError)
+            errors.extend(oldTableError)
             errors.extend(newGbdbError)
             errors.extend(oldGbdbError)
 
             #for ease of typing
             totalFiles = set(self.newReleaseFiles)
             oldTotalFiles = set(self.oldReleaseFiles)
 
             #these could honestly be moved earlier, get a file list processing section or something
             #they clean out special fiels out and separated the master fiels list into the 3 required
             #ones: wgEncode, supplemental and additional.
             self.totalFiles = self.__cleanSpecialFiles(totalFiles)
             self.oldTotalFiles = self.__cleanSpecialFiles(oldTotalFiles)
             (self.oldTotalFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional()
 
             #get the stuff you need to push, also table sizes        
-            self.pushTables = set(sorted((self.newTableSet - self.oldTableSet)))
-            self.pushFiles = set(sorted((self.totalFiles - self.oldTotalFiles)))
-            self.pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet)))
-            self.tableSize = self.__getTableSize()
+
             self.errors = errors
             #don't output.append(report unless ignore option is on or no errors
             if (not errors) or self.ignore:
                 self.output = self.printReport(args, c)
             else:
                 self.output = self.printErrors(errors)
 
 
         elif self.releaseOld == 'solo':
 
             self.newReleaseFiles = c.releases[int(self.releaseNew)-1]
 
             self.newMdb = c.alphaMetaDb
 
             (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
-            errors.extend(newFileErrors)
 
             (self.newTableSet, self.revokedTableSet, newTableError) = self.checkTableStatus("alpha metaDb", "new")
-            errors.extend(newTableError)
 
             self.tableSize = self.__getTableSize()
 
             (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new")
+
+            #collect errors
+            errors.extend(newFileErrors)
+            errors.extend(newTableError)
             errors.extend(newGbdbError)
 
             #set for easy operations
             totalFiles = set(self.newReleaseFiles)
 
             #clean out special fiels we don't push i.e. md5sum.history
             self.totalFiles = self.__cleanSpecialFiles(totalFiles)
 
             #makes list for additional files
             (self.oldTotalFiles, self.oldSupplementalSet) = (set(), set())
             (self.oldReleaseFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional()
             self.errors = errors
             if (not errors) or self.ignore:
                 self.output = self.printReportOne(args, c) 
             else:
                 self.output = self.printErrors(errors)