2e2c015fcf78e894722b3e368cfb9b041d12bb00
wong
  Mon Oct 24 16:00:19 2011 -0700
separated out library stuff
diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py
new file mode 100755
index 0000000..1f3f8bb
--- /dev/null
+++ python/lib/ucscgenomics/mkChangeNotes.py
@@ -0,0 +1,700 @@
+#!/hive/groups/encode/dcc/bin/python
+import sys, os, re, argparse, subprocess, math
+from ucscgenomics import ra, track, qa
+
+class makeNotes(object):
+    def checkMetaDbForFiles(self, status, state):
+        if state == 'new':
+            (mdb, files, loose) = (self.newMdb, self.newReleaseFiles, self.loose)
+        elif state == 'old':
+            (mdb, files, loose) = (self.oldMdb, self.oldReleaseFiles, self.loose)
+
+        errors = []
+        revokedset = set()
+        revokedfiles = set()
+        atticset = set()
+        supplementalset = set()
+        filtermdb = ra.RaFile()
+
+        for i in files:
+            if re.match('supplemental', i):
+                supplementalset.add(i)
+            if not re.match('wgEncode.*', i):
+                continue
+
+            filestanza = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s)
+            #should only return 1, just in case
+            if filestanza:
+                for j in filestanza:
+                    filtermdb[j.name] = j
+                    if 'objStatus' in j and re.search('revoked|replaced|renamed', j['objStatus']):
+                        revokedfiles.add(i)
+                        revokedset.add(j.name)
+                    if 'attic' in j:
+                        atticset.add(j.name)
+            else:
+                #pass
+                if loose and re.match('.*bai', i):
+                    pass
+                else:
+                    errors.append("metaDb: %s is not mentioned in %s" % (i, status))
+
+        return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors)
+
+    def __checkAlphaForDropped(self, status, type):
+        (new, old) = (self.newMdb, self.oldMdb)
+        errors=[]
+        diff = set(old) -set(new)
+        for i in diff:
+            errors.append("%s: %s missing from %s" % (type, i, status))
+        return errors
+
+    def __checkFilesForDropped(self):
+        (new, old) = (self.newReleaseFiles, self.oldReleaseFiles)
+        diff = set(old) - set(new)
+        return diff
+
+    def checkTableStatus(self, status, state):
+        errors=[]
+        revokedset = set()
+        (database, composite, loose) = (self.database, self.composite, self.loose)
+        if state == 'new':
+            (mdb, files, revokedset) = (self.newMdb, self.newReleaseFiles, self.revokedSet)
+        elif state == 'old':
+            (mdb, files) = (self.oldMdb, self.oldReleaseFiles)
+        #home = os.environ['HOME']
+        #dbhost = ''
+        #dbuser = ''
+        #dbpassword = ''
+        #p = re.compile('db.(\S+)=(\S+)')
+        #with open("%s/.hg.conf" % home) as f:
+        #    for line in f:
+        #        line.rstrip("\n\r")
+        #        if p.match(line):
+        #            m = p.match(line)
+        #            if m.groups(1)[0] == 'host':
+        #                dbhost = m.groups(1)[1]
+        #            if m.groups(1)[0] == 'user':
+        #                dbuser = m.groups(1)[1]
+        #            if m.groups(1)[0] == 'password':
+        #                dbpassword = m.groups(1)[1]
+
+        #db = MySQLdb.connect (host = dbhost,
+        #            user = dbuser,
+        #            passwd = dbpassword,
+        #            db = database)
+
+        #cursor = db.cursor ()
+        #cursor.execute ("show tables like '%s%s'" % (composite, "%"))
+        #tableset = set(cursor.fetchall())
+
+        mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject']))
+        mdbtableset = mdbtableset - revokedset
+        mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbtableset, lambda s: s['tableName']))
+        revokedtableset = set(mdb.filter(lambda s: s['metaObject'] in revokedset, lambda s: s['tableName']))
+        sep = "','"
+        tablestr = sep.join(mdbtableset)
+        tablestr = "'" + tablestr + "'"
+
+        #this should really be using python's database module, but I'd need admin access to install it
+        #at this point, I am just parsing the output form hgsql
+        cmd = "hgsql %s -e \"select table_name from information_schema.TABLES where table_name in (%s)\"" % (database, tablestr)
+        p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
+        output = p.stdout.read()
+
+        sqltableset = set(output.split("\n")[1:-1])
+
+        missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject']))
+
+        missingFromDb = mdbtableset - sqltableset
+
+        if missingTableNames:
+            for i in missingTableNames:
+                errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status))
+
+        if missingFromDb:
+            for i in missingFromDb:
+                errors.append("table: %s table not found in Db called by %s" % (i, status))
+
+        return (mdbtableset, revokedtableset, errors)
+
+
+    def getGbdbFiles(self, state):
+        database = self.database
+        revokedset = set()
+        if state == 'new':
+            (tableset, revokedset, mdb) = (self.newTableSet, self.revokedSet, self.newMdb)
+        elif state == 'old':
+            (tableset, mdb) = (self.oldTableSet, self.oldMdb)
+
+        errors = []
+
+        gbdbtableset = qa.getGbdbTables(self.database, tableset)
+
+        revokedtableset = qa.getGbdbTables(self.database, revokedset)
+
+        file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s)
+        revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s)
+        gbdbfileset = set()
+        revokedfileset = set()
+
+        for i in file1stanzalist:
+            filelist = i['fileName'].split(',')
+            for j in filelist:
+                if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)):
+                    gbdbfileset.add(j)
+                else:
+                    errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, database))
+
+        for i in revokedstanzalist:
+            filelist = i['fileName'].split(',')
+            for j in filelist:
+                if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)):
+                    revokedfileset.add(j)
+                else:
+                    errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, database))
+
+        return (gbdbfileset, revokedfileset, errors)
+
+    def __getTableSize(self):
+        (mdbtableset, database) = (self.newTableSet, self.database)
+        tablesize = float(0)
+        tablelist = list()
+        for i in mdbtableset:
+            tablelist.append("table_name = '%s'" % i)
+        orsep = " OR "
+        orstr = orsep.join(tablelist)
+
+        cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE %s\"" % (database, orstr)
+        p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
+        output = p.stdout.read()
+        for i in output.split("\n")[1:-1]:
+            fields = i.split()
+            for j in fields:
+                tablesize = tablesize + float(j)
+        return math.ceil(tablesize)
+
+    def __checkMd5sums(self):
+        (newfiles, oldfiles, loose) = (self.newReleaseFiles, self.oldReleaseFiles, self.loose)
+        errors = []
+        for i in oldfiles:
+            if i not in newfiles:
+                pass
+            elif re.match('wgEncode.*', i):
+                if oldfiles[i].md5sum != newfiles[i].md5sum:
+                    errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum))
+        if loose:
+            return list()
+        else:
+            return errors
+
+    def __makeFileSizes(self, c, args, inlist):
+        checklist = list()
+        for i in inlist:
+            checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args['releaseNew'], i))    
+        filesizes = 0
+        for i in checklist:
+            realpath = os.path.realpath(i)
+            filesizes = filesizes + int(os.path.getsize(realpath))
+
+        filesizes = math.ceil(float(filesizes) / (1024**2))
+        return int(filesizes)
+
+    def __cleanSpecialFiles(self, inlist):
+        specialRemoveList = ['md5sum.history']
+        for i in specialRemoveList:
+            if i in inlist:
+                inlist.remove(i)
+
+        return(inlist)
+
+    def __separateOutAdditional(self):
+        (oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet) = (self.oldTotalFiles, self.totalFiles, self.newSupplementalSet, self.oldSupplementalSet)
+        additionalList = set()
+        oldAdditionalList = set()
+        newTotal = set()
+        newOld = set()
+        for i in totalFiles:
+            if i in newSupplementalSet:
+                continue
+            elif not re.match('wgEncode.*', i):
+                additionalList.add(i)
+            else:
+                newTotal.add(i)
+        for i in oldReleaseFiles:
+            if not re.match('wgEncode.*', i):
+                if i in totalFiles:
+                    pass
+                elif i in newSupplementalSet:
+                    continue
+                else:
+                    oldAdditionalList.add(i)
+            else:
+                newOld.add(i)
+
+        oldReleaseFiles = newOld
+
+        return(newOld, additionalList, oldAdditionalList, newTotal)
+
+    def __printWithPath(self, set, c, release):
+        output = []
+        for i in sorted(set):
+            output.append("%s/%s" % (c.httpDownloadsPath + 'release' + release, i))
+        return output
+    def __printGbdbPath(self, set, database):
+        output = []
+        for i in sorted(set):
+            output.append("/gbdb/%s/bbi/%s" % (database, i))
+        return output
+
+    def __printIter(self, inlist):
+        output = []
+        for i in sorted(inlist):
+            output.append(i)
+        return output
+
+    def printReport(self, args, c):
+        (totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize) = (self.totalFiles, self.newGbdbSet, self.newTableSet, self.additionalList, self.oldAdditionalList, self.pushTables, self.pushFiles, self.pushGbdbs, self.oldTableSet, self.oldTotalFiles, self.oldGbdbSet, self.atticSet, self.revokedFiles, self.revokedTableSet, self.revokedGbdbs, self.missingFiles, self.newSupplementalSet, self.oldSupplementalSet, self.tableSize)
+        #the groups here need to be predefined, I just copied and pasted after working out what they were
+        sep = "\n"
+        output = []
+        output.append("mkChangeNotes v2")
+        output.append("%s %s Release %s vs Release %s" % (args['database'], args['composite'], args['releaseNew'], args['releaseOld']))
+        output.append("")
+        output.append("QA Count Summaries for Release %s:" % args['releaseNew'])
+        output.append("Tables: %d" % int(len(newTableSet)))
+        output.append("Files: %d" % int(len(totalFiles - revokedFiles)))
+        output.append("Gbdbs: %d" % int(len(newGbdbSet)))
+        output.append("Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet)))
+        output.append("Other: %d" % int(len(additionalList)))
+        output.append("\n")
+        output.append("Sizes of New:")
+
+        totalsize = 0
+        size = 0
+        tableGb = int(tableSize/1024)
+        totalsize = totalsize + tableSize
+        if tableGb > 1:
+            output.append("Tables: %d MB (%d GB)" % (tableSize, tableGb))
+        elif tableSize:
+            output.append("Tables: %d MB" % tableSize)
+
+        size = int(self.__makeFileSizes(c, args, pushFiles))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Files: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Files: %d MB" % size)
+        
+        size = int(self.__makeFileSizes(c, args, pushGbdbs))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Gbdbs: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Gbdbs: %d MB" % size)
+        
+        size = int(self.__makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet)))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Supplemental: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Supplemental: %d MB" % size)
+        
+        size = int(self.__makeFileSizes(c, args, (additionalList)))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Other: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Other: %d MB" % size)
+      
+        if int(totalsize/1024) > 1:
+            output.append("Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)))
+        else:
+            output.append("Total: %d MB" % totalsize)
+
+        tableprint = len(newTableSet | oldTableSet | revokedTableSet)
+        self.newTables = set(pushTables)
+        if tableprint:
+            output.append("\n")
+            output.append("TABLES:")
+            output.append("New: %s" % len(pushTables))
+            output.append("Untouched: %s" % len(oldTableSet & newTableSet))
+            output.append("Revoked/Replaced/Renamed: %s" % len(revokedTableSet))
+            output.append("New + Untouched: %s" % len(newTableSet))
+            output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet))
+        if tableprint and not args['summary']:
+            output.append("")
+            output.append("New Tables (%s):" % len(pushTables))
+            output.extend(self.__printIter(pushTables))
+            output.append("")
+            output.append("Untouched (%s):" % len(oldTableSet & newTableSet))
+            output.extend(self.__printIter(oldTableSet & newTableSet))
+            output.append("")
+            output.append("Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet))
+            output.extend(self.__printIter(revokedTableSet))
+
+        dlprint = len(totalFiles | oldReleaseFiles | revokedFiles)
+        self.newFiles = set(self.__printWithPath((pushFiles - revokedFiles), c, args['releaseNew']))
+        if dlprint:
+            output.append("\n")
+            #downlaodables = total - revoked
+            output.append("DOWNLOAD FILES:")
+            output.append("New: %s" % len(pushFiles - revokedFiles))
+            output.append("Untouched: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles))
+            output.append("Revoked/Replaced/Renamed: %s" % len(revokedFiles))
+            output.append("New + Untouched: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles)))
+            output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles))
+        if dlprint and not args['summary']:
+            output.append("")
+            output.append("New Download Files (%s):" % len(pushFiles - revokedFiles))
+            output.extend(sorted(list(self.newFiles)))
+            output.append("")
+            output.append("Untouched Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles))
+            output.extend(self.__printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args['releaseNew']))
+            output.append("")
+            output.append("Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles))
+            output.extend(self.__printWithPath(revokedFiles, c, args['releaseNew']))
+
+        gbdbprint = len(newGbdbSet | oldGbdbSet | revokedGbdbs) 
+        self.newGbdbs = set(self.__printGbdbPath(pushGbdbs, args['database']))
+        if gbdbprint:
+            output.append("\n")
+            output.append("GBDBS:")
+            output.append("New: %s" % len(pushGbdbs))
+            output.append("Untouched: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs))
+            output.append("Revoked/Replaced/Renamed: %s" % len(revokedGbdbs))
+            output.append("New + Untouched: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs)))
+            output.append("Total (New + Untouched + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs))
+        if gbdbprint and not args['summary']:
+            output.append("")
+            output.append("New Gbdb Files (%s):" % len(pushGbdbs))
+            output.extend(sorted(list(self.newGbdbs)))
+            output.append("")
+            output.append("Untouched Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs))
+            output.extend(self.__printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args['database']))
+            output.append("")
+            output.append("Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs))
+            output.extend(self.__printGbdbPath(revokedGbdbs, args['database']))
+            
+        supplementalprint = len(newSupplementalSet | oldSupplementalSet)
+        self.newSupplemental = set(self.__printWithPath(newSupplementalSet - oldSupplementalSet, c, args['releaseNew']))
+        if supplementalprint:
+            output.append("\n")
+            output.append("SUPPLEMENTAL FILES:")
+            output.append("New: %s" % len(newSupplementalSet - oldSupplementalSet))
+            output.append("Untouched: %s" % len(oldSupplementalSet & newSupplementalSet))
+            output.append("Removed: %s" % len(oldSupplementalSet - newSupplementalSet))
+            output.append("New + Untouched: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet)))
+            output.append("Total: %s" % len(newSupplementalSet | oldSupplementalSet))
+        if supplementalprint and not args['summary']:
+            output.append("")
+            output.append("New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet))
+            output.extend(sorted(list(self.newSupplemental)))
+            output.append("")
+            output.append("Untouched Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet))
+            output.extend(self.__printWithPath(oldSupplementalSet & newSupplementalSet, c, args['releaseNew']))
+            output.append("")
+            output.append("Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet))
+            output.extend(self.__printWithPath(oldSupplementalSet - newSupplementalSet, c, args['releaseNew']))
+            
+        otherprint = len(additionalList | oldAdditionalList)
+        self.newOthers = set(self.__printWithPath(additionalList, c, args['releaseNew']))
+        if otherprint:
+            output.append("\n")
+            output.append("OTHER FILES:")
+            output.append("New: %s" % len(additionalList))
+            output.append("Revoked/Replace: %s" % len(oldAdditionalList - additionalList))
+            output.append("Total: %s" % len(additionalList | oldAdditionalList))
+        if otherprint and not args['summary']:
+            output.append("")
+            output.append("New Other Files (%s):" % len(additionalList))
+            output.extend(sorted(list(self.newOthers)))
+            output.append("")
+            output.append("Revoked Other Files (%s):" % len(oldAdditionalList - additionalList))
+            output.extend(self.__printWithPath((oldAdditionalList - additionalList), c, args['releaseNew']))
+        output.append("\n")
+        
+        if len(missingFiles):
+            output.append("Files that dropped between releases (%s):" % len(missingFiles))
+            output.extend(self.__printWithPath(missingFiles, c, args['releaseOld']))
+            output.append("\n")
+        
+        if not args['ignore']:
+            output.append("No Errors")
+        return output
+
+    def printReportOne(self, args, c):
+        (totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, atticSet, newSupplementalSet, tableSize) = (self.totalFiles, self.revokedFiles, self.newGbdbSet, self.revokedGbdbs, self.newTableSet, self.revokedTableSet, self.additionalList, self.atticSet, self.newSupplementalSet, self.tableSize)
+        output = []
+        output.append("mkChangeNotes v2")
+        output.append("%s %s Release %s" % (args['database'], args['composite'], args['releaseNew']))
+        output.append("")
+        output.append("QA Count Summaries for Release %s:" % args['releaseNew'])
+        output.append("Tables: %d" % int(len(newTableSet - revokedTables)))
+        output.append("Files: %d" % int(len(totalFiles - revokedFiles)))
+        output.append("Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs)))
+        output.append("Supplemental: %d" % int(len(newSupplementalSet)))
+        output.append("Other: %d" % int(len(additionalList)))
+        output.append("")
+        output.append("REVOKED:")
+        output.append("Tables: %s" % len(revokedTables))
+        output.append("Files: %s" % len(revokedFiles))
+        output.append("Gbdbs: %s" % len(revokedGbdbs))
+        output.append("\n")
+        totalsize = 0;
+        output.append("Sizes of New:")
+        tableGb = int(tableSize / 1024)
+        if tableGb > 1:
+            output.append("Tables: %d MB (%d GB)" % (tableSize, tableGb))
+        else:
+            output.append("Tables: %d MB" % tableSize)
+        totalsize = totalsize + tableSize
+        size = int(self.__makeFileSizes(c, args, totalFiles - revokedFiles))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Files: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Files: %d MB" % size)
+        size = int(self.__makeFileSizes(c, args, newGbdbSet - revokedGbdbs))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Gbdbs: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Gbdbs: %d MB" % size)
+        size = int(self.__makeFileSizes(c, args, newSupplementalSet))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Supplemental: %d MB (%d GB)" % (size, int(size/1024)))
+        else:
+            output.append("Supplemental: %d MB" % size)
+        size = int(self.__makeFileSizes(c, args, (additionalList)))
+        totalsize = totalsize + size
+        if int(size/1024) > 1:
+            output.append("Other: %d MB" % size)
+        else:
+            output.append("Other: %d MB" % size)
+        if int(totalsize/1024) > 1:
+            output.append("Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024)))
+        else:
+            output.append("Total: %d MB" % totalsize)
+        output.append("\n")
+        self.newTables = set(self.__printIter(newTableSet - revokedTables))
+        self.newFiles = set(self.__printWithPath(totalFiles - revokedFiles, c, args['releaseNew']))
+        self.newGbdbs = set(self.__printGbdbPath(newGbdbSet - revokedGbdbs, args['database']))
+        self.newSupplemental = set(self.__printWithPath(newSupplementalSet, c, args['releaseNew']))
+        self.newOthers = set(self.__printWithPath(additionalList, c, args['releaseNew']))
+        if not args['summary']:
+            output.append("")
+            if len(newTableSet - revokedTables):
+                output.append("New Tables (%s):" % len(self.newTables))
+                output.extend(sorted(list(self.newTables)))
+                output.append("\n")
+            if len(totalFiles - revokedFiles):
+                output.append("New Download Files (%s):" % len(self.newFiles))
+                output.extend(sorted(list(self.newFiles)))
+                output.append("\n")
+            if len(newGbdbSet - revokedGbdbs):
+                output.append("New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs))
+                output.extend(sorted(list(self.newGbdbs)))
+                output.append("\n")
+            if len(newSupplementalSet):
+                output.append("New Supplemental Files (%s):" % len(newSupplementalSet))
+                output.extend(sorted(list(self.newSupplemental)))
+                output.append("\n")
+            if len(additionalList):
+                output.append("New Other Files (%s):" % len(additionalList))
+                output.extend(sorted(list(self.newOthers)))
+                output.append("\n")
+            if len(revokedTables):
+                output.append("Revoked Tables (%s):" % len(revokedTables))
+                output.extend(self.__printIter(revokedTables))
+                output.append("\n")
+            if len(revokedFiles):
+                output.append("Revoked Files (%s):" % len(revokedFiles))
+                output.extend(self.__printWithPath(revokedFiles, c, args['releaseNew']))
+                output.append("\n")
+            if len(revokedGbdbs):
+                output.append("Revoked Gbdbs (%s):" % len(revokedGbdbs))
+                output.extend(self.__printGbdbPath(revokedGbdbs, args['database']))
+                output.append("\n")
+        if not args['ignore']:
+            output.append("No Errors")
+        return output
+
+    def printErrors(self, errors):
+        errorsDict = {}
+        output = []
+        for i in errors:
+            line = i.split(":", 1)
+            try:
+                errorsDict[line[0]].append(line[1])
+            except:
+                errorsDict[line[0]] = []
+                errorsDict[line[0]].append(line[1])
+        output.append("Errors (%s):" % len(errors))
+        for i in sorted(errorsDict.keys()):
+            output.append("%s:" % i)
+            for j in sorted(errorsDict[i]):
+                output.append("%s" % j)
+        return output
+
+    def __init__(self, args):
+        self.releaseNew = args['releaseNew']
+        self.releaseOld = args['releaseOld']
+        self.database = args['database']
+        self.composite = args['composite']
+        self.loose = args['loose']
+        self.ignore = args['ignore']
+        self.summary = args['summary']
+        self.args = args
+
+        errors = []
+        c = track.CompositeTrack(self.database, self.composite)
+
+        #sanitize arguments
+        if not self.releaseOld.isdigit():
+            self.releaseOld = 'solo'
+        elif int(self.releaseOld) <= 0:
+            self.releaseOlf = 'solo'
+        elif self.releaseOld > self.releaseNew:
+            self.releaseOld = 'solo'
+
+        if int(self.releaseNew) > 1 and str(self.releaseOld) != 'solo':
+
+            self.newReleaseFiles = c.releases[int(self.releaseNew)-1]
+            self.oldReleaseFiles = c.releases[int(self.releaseOld)-1]
+
+            self.newMdb = c.alphaMetaDb
+            self.oldMdb = c.publicMetaDb
+
+            #check if all files listed in release directories have associated metaDb entries
+            (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
+            (self.oldMdb, spam, eggs, ham, self.oldSupplementalSet, oldFileErrors) = self.checkMetaDbForFiles("public metaDb", "old")
+            errors.extend(newFileErrors)
+            errors.extend(oldFileErrors)
+
+            #checks to see that nothing has disappeared between public and alpha
+            errors.extend(self.__checkAlphaForDropped("alpha metaDb", "stanza"))
+            self.missingFiles = self.__checkFilesForDropped()
+            errors.extend(self.__checkMd5sums())
+
+            #checks and gets tables that are present, also returns a revoked set of tables for new
+            (self.newTableSet, self.revokedTableSet, newTableError) = self.checkTableStatus("alpha metaDb", "new")
+            (self.oldTableSet, spam, oldTableError) = self.checkTableStatus("public metaDb", "old")
+            errors.extend(newTableError)
+            errors.extend(oldTableError)
+
+            #same as above except for gbdbs
+            (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new")
+            (self.oldGbdbSet, eggs, oldGbdbError) = self.getGbdbFiles("old")
+            errors.extend(newGbdbError)
+            errors.extend(oldGbdbError)
+
+            #for ease of typing
+            totalFiles = set(self.newReleaseFiles)
+            oldTotalFiles = set(self.oldReleaseFiles)
+
+            #these could honestly be moved earlier, get a file list processing section or something
+            #they clean out special fiels out and separated the master fiels list into the 3 required
+            #ones: wgEncode, supplemental and additional.
+            self.totalFiles = self.__cleanSpecialFiles(totalFiles)
+            self.oldTotalFiles = self.__cleanSpecialFiles(oldTotalFiles)
+            (self.oldTotalFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional()
+
+            #get the stuff you need to push, also table sizes        
+            self.pushTables = set(sorted((self.newTableSet - self.oldTableSet)))
+            self.pushFiles = set(sorted((self.totalFiles - self.oldTotalFiles)))
+            self.pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet)))
+            self.tableSize = self.__getTableSize()
+            
+            #don't output.append(report unless ignore option is on or no errors
+            if (not errors) or self.ignore:
+                self.output = self.printReport(args, c)
+            else:
+                self.output = self.printErrors(errors)
+
+
+        elif self.releaseOld == 'solo':
+
+            self.newReleaseFiles = c.releases[int(self.releaseNew)-1]
+
+            self.newMdb = c.alphaMetaDb
+
+            (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
+            errors.extend(newFileErrors)
+
+            (self.newTableSet, self.revokedTableSet, newTableError) = self.checkTableStatus("alpha metaDb", "new")
+            errors.extend(newTableError)
+
+            self.tableSize = self.__getTableSize()
+
+            (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new")
+            errors.extend(newGbdbError)
+
+            #set for easy operations
+            totalFiles = set(self.newReleaseFiles)
+
+            #clean out special fiels we don't push i.e. md5sum.history
+            self.totalFiles = self.__cleanSpecialFiles(totalFiles)
+
+            #makes list for additional files
+            (self.oldTotalFiles, self.oldSupplementalSet) = (set(), set())
+            (self.oldReleaseFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional()
+            if (not errors) or self.ignore:
+                self.output = self.printReportOne(args, c) 
+            else:
+                self.output = self.printErrors(errors)
+
+
+def main():
+
+
+    parser = argparse.ArgumentParser(
+        prog='mkChangeNotes',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description='Writes out notes file for packing to QA',
+        epilog=
+    """Examples:
+
+    mkChangeNotes hg19 wgEncodeUwDnase 3 2 --loose
+    mkChangeNotes hg19 wgEncodeSydhTfbs 1 - --full
+    mkChangeNotes hg19 wgEncodeCshlLongRnaSeq 1 -
+
+    """
+        )
+    parser.add_argument('-l', '--loose', action="store_true", default=0, help='Loose checking for legacy elements. Will be retired once all tracks go through a release cycle')
+    parser.add_argument('-i', '--ignore', action="store_true", default=0, help='Ignore errors, output.append(out report.')
+    parser.add_argument('-s', '--summary', action="store_true", default=0, help='output.append(summary stats only.')
+    parser.add_argument('database', help='The database, typically hg19 or mm9')
+    parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
+    parser.add_argument('releaseNew', help='The new release to be released')
+    parser.add_argument('releaseOld', nargs='?', default='-', help='The old release that is already released, if on release 1, or solo release mode, put anything here')
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        return
+    args = parser.parse_args(sys.argv[1:])
+    if not args.releaseNew.isdigit():
+        parser.print_help()
+        return
+
+
+
+    if not args.releaseOld.isdigit():
+        args.releaseOld = 'solo'    
+    elif int(args.releaseOld) > int(args.releaseNew):
+        errors.append("Old Release is higher than New Release")
+        args.releaseOld = args.releaseNew
+        printErrors(errors)
+        return
+
+    argsdict = {'database': args.database, 'composite': args.composite, 'releaseNew': args.releaseNew, 'releaseOld': args.releaseOld, 'loose': args.loose, 'ignore': args.ignore, 'summary': args.summary}
+
+    notes = makeNotes(argsdict)
+
+    for line in notes.output:
+        print line
+
+if __name__ == '__main__':
+    main()
+