9a49290fedc8b4f9c7a631dedcfe8ac3aa2b4cee
chinhli
  Thu Oct 20 14:10:43 2011 -0700
merge conflict resolved
diff --git python/programs/mkChangeNotes/mkChangeNotes python/programs/mkChangeNotes/mkChangeNotes
index 9f7c464..297a870 100755
--- python/programs/mkChangeNotes/mkChangeNotes
+++ python/programs/mkChangeNotes/mkChangeNotes
@@ -1,413 +1,609 @@
 #!/hive/groups/encode/dcc/bin/python
-import sys, os, re, argparse, subprocess
+import sys, os, re, argparse, subprocess, math
 from ucscgenomics import ra, track
 
 def checkMetaDbForFiles(mdb, files, status, loose):
     errors = []
+    revokedset = set()
+    revokedfiles = set()
+    atticset = set()
+    supplementalset = set()
+    filtermdb = ra.RaFile()
+    
     for i in files:
+        if re.match('supplemental', i):
+            supplementalset.add(i)
         if not re.match('wgEncode.*', i):
             continue
+        
         filestanza = mdb.filter(lambda s: re.match(".*%s.*" % i,s['fileName']), lambda s: s)
+        #should only return 1, just in case
         if filestanza:
-            pass
+            for j in filestanza:
+                filtermdb[j.name] = j
+                if 'objStatus' in j and re.search('revoked|replaced|renamed', j['objStatus']):
+                    revokedfiles.add(i)
+                    revokedset.add(j.name)
+                if 'attic' in j:
+                    atticset.add(j.name)
         else:
             #pass    
             if loose and re.match('.*bai', i):
                 pass
             else:
                 errors.append("metaDb: %s has is not mentioned in %s" % (i, status))
                 
-    return errors
+    return (filtermdb, revokedset, revokedfiles, atticset, supplementalset, errors)
 
 def checkAlphaForDropped(new, old, status, type):
     errors=[]
-    for i in old:
-        if re.search('MAGIC', i):
-            pass
-            if i in old:
-                errors.append("MAGIC number same in alpha and public metaDb")
-            else:
-                continue
-        if not re.match('wgEncode.*', i):
-            continue
-        if i in new:
-            pass
-        else:
+    diff = set(old) -set(new)
+    for i in diff:
             errors.append("%s: %s missing from %s" % (type, i, status))
-            
     return errors
 
-def checkTableStatus(mdb, files, database, composite, status, loose):
+def checkFilesForDropped(new, old):
+    diff = set(old) - set(new)
+    return diff
+
+def checkTableStatus(mdb, files, database, composite, status, loose, revokedset):
     errors=[]
+    
     #home = os.environ['HOME']
     #dbhost = ''
     #dbuser = ''
     #dbpassword = ''
     #p = re.compile('db.(\S+)=(\S+)')
     #with open("%s/.hg.conf" % home) as f:
     #    for line in f:
     #        line.rstrip("\n\r")
     #        if p.match(line):
     #            m = p.match(line)
     #            if m.groups(1)[0] == 'host':
     #                dbhost = m.groups(1)[1]
     #            if m.groups(1)[0] == 'user':
     #                dbuser = m.groups(1)[1]
     #            if m.groups(1)[0] == 'password':
     #                dbpassword = m.groups(1)[1]
     #print dbhost
     #print dbuser
     #print dbpassword
 
     #db = MySQLdb.connect (host = dbhost,
     #            user = dbuser,
     #            passwd = dbpassword,
     #            db = database)
 
     #cursor = db.cursor ()
     #cursor.execute ("show tables like '%s%s'" % (composite, "%"))
     #tableset = set(cursor.fetchall())
     
-    mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s and s['fileName'].split(",",1)[0] in files, lambda s: s['metaObject']))
-    atticset = set(mdb.filter(lambda s: 'attic' in s, lambda s: s['metaObject']))
-    revokedset = set(mdb.filter(lambda s: re.search('revoked|replaced|renamed', s['objStatus']), lambda s: s['metaObject']))
+    mdbtableset = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' in s and 'attic' not in s, lambda s: s['metaObject']))
     mdbtableset = mdbtableset - revokedset
-
+    mdbtableset = set(mdb.filter(lambda s: s['metaObject'] in mdbtableset, lambda s: s['tableName']))
+    revokedtableset = set(mdb.filter(lambda s: s['metaObject'] in revokedset, lambda s: s['tableName']))
     sep = "','"
     tablestr = sep.join(mdbtableset)
     tablestr = "'" + tablestr + "'"
 
     #this should really be using python's database module, but I'd need admin access to install it
     #at this point, I am just parsing the output form hgsql
     cmd = "hgsql %s -e \"select table_name from information_schema.TABLES where table_name in (%s)\"" % (database, tablestr)
     p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
     output = p.stdout.read()
     
     sqltableset = set(output.split("\n")[1:])
 
     missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject']))
 
     missingFromDb = mdbtableset - sqltableset
     
     if missingTableNames:
         for i in missingTableNames:
             errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status))
 
     if missingFromDb:
         for i in missingFromDb:
             errors.append("table: %s table not found in Db called by %s" % (i, status))
 
-    return (mdbtableset, revokedset, atticset, errors)
+    return (mdbtableset, revokedtableset, errors)
 
-def getGbdbFiles(database, tableset, mdb):
+def getGbdbFiles(database, tableset, revokedset, mdb):
     errors = []
     sep = "','"
     tablestr = sep.join(tableset)
     tablestr = "'" + tablestr + "'"
+    revokestr = sep.join(revokedset)
+    revokestr = "'" + revokestr + "'"
 
     cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, tablestr)
     p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
     output = p.stdout.read()
     
     gbdbtableset = set(output.split("\n")[1:])
     
-    file1stanzalist = mdb.filter(lambda s: s['tableName'] in set(output.split("\n")[1:]), lambda s: s)
+    cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, revokestr)
+    p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
+    output = p.stdout.read()
     
+    revokedtableset = set(output.split("\n")[1:])
+    
+    file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s)
+    revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s)
     gbdbfileset = set()
+    revokedfileset = set()
+    
     for i in file1stanzalist:
         filelist = i['fileName'].split(',')
         for j in filelist:
             if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)):
                 gbdbfileset.add(j)
             else:
-                errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, databsase))
+                errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, database))
+    
+    for i in revokedstanzalist:
+        filelist = i['fileName'].split(',')
+        for j in filelist:
+            if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)):
+                revokedfileset.add(j)
+            else:
+                errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, databsase))
     
-    return (gbdbfileset, errors)
+    return (gbdbfileset, revokedfileset, errors)
 
-def checkMd5sums(newfiles, oldfiles):
+def getTableSize(mdbtableset, database):
+    tablesize = float(0)
+    tablelist = list()
+    for i in mdbtableset:
+        tablelist.append("table_name = '%s'" % i)
+    orsep = " OR "
+    orstr = orsep.join(tablelist)
+
+    cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE table_name = %s\"" % (database, orstr)
+    p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
+    output = p.stdout.read()
+    for i in output.split("\n")[1:]:
+        fields = i.split()
+        for j in fields:
+            tablesize = tablesize + float(j)
+    return math.ceil(tablesize)
+
+def checkMd5sums(newfiles, oldfiles, loose):
     errors = []
     for i in oldfiles:
         if i not in newfiles:
             pass
-        else:
+        elif re.match('wgEncode.*', i):
             if oldfiles[i].md5sum != newfiles[i].md5sum:
                 errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum))
-
+    if loose:
+        return list()
+    else:
     return errors
 
-def makeFileSizes(c, args, pushFiles, pushGbdbs, additionalList):
-    pushFileSize = list()
-    for i in pushGbdbs:
-        pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i))    
-    for i in pushFiles:
-        pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i))
-    for i in additionalList:
-        pushFileSize.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i))
+def makeFileSizes(c, args, inlist):
+    checklist = list()
+    for i in inlist:
+        checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i))    
     filesizes = 0
-    for i in pushFileSize:
+    for i in checklist:
         filesizes = filesizes + int(os.path.getsize(i))
-    filesizes = filesizes / (1024**2)
+    filesizes = math.ceil(float(filesizes) / (1024**2))
+    return int(filesizes)
         
-    return filesizes
-
-def cleanSpecialFiles(pushFiles, totalFiles):
+def cleanSpecialFiles(inlist):
     specialRemoveList = ['md5sum.history']
     for i in specialRemoveList:
-        if i in pushFiles:
-            pushFiles.remove(i)
-        if i in totalFiles:
-            totalFiles.remove(i)
+        if i in inlist:
+            inlist.remove(i)
     
-    return(pushFiles, totalFiles)
+    return(inlist)
     
-def separateOutAdditional(oldReleaseFiles, totalFiles, pushFiles):
+def separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet):
     additionalList = set()
+    oldAdditionalList = set()
+    newTotal = set()
+    newOld = set()
     for i in totalFiles:
-        if not re.match('wgEncode.*', i):
+        if i in newSupplementalSet:
+            continue
+        elif not re.match('wgEncode.*', i):
             additionalList.add(i)
-    for i in additionalList:
-        if i in pushFiles:
-            pushFiles.remove(i)
-    oldReleaseFiles = oldReleaseFiles - (oldReleaseFiles - totalFiles)
+        else:
+            newTotal.add(i)
+    for i in oldReleaseFiles:
+        if not re.match('wgEncode.*', i):
+            if i in totalFiles:
+                pass
+            elif i in newSupplementalSet:
+                continue
+            else:
+                oldAdditionalList.add(i)
+        else:
+            newOld.add(i)
+    
+    oldReleaseFiles = newOld
 
-    return(oldReleaseFiles, pushFiles, additionalList)
+    return(newOld, additionalList, oldAdditionalList, newTotal)
 
-def printWithPath(set, c, args):
+def printWithPath(set, c, release):
     for i in sorted(set):
-        print "%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i)
+        print "%s/%s" % (c.httpDownloadsPath + 'release' + release, i)
 
-def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, filesizes, newAtticSet, oldAtticSet, newRevokedSet, oldRevokedSet):
-    revokedSet = ((newRevokedSet | oldRevokedSet) - (oldRevokedSet - newRevokedSet))
-    unrevokedSet = (oldRevokedSet - newRevokedSet)
-    atticSet = ((newAtticSet | oldAtticSet) - (oldAtticSet - newAtticSet))
-    unatticSet = (oldAtticSet - newAtticSet) 
+def printGbdbPath(set, database):
+    for i in sorted(set):
+        print "/gbdb/%s/bbi/%s" % (database, i)
+
+def printIter(inlist):
+    for i in sorted(inlist):
+        print i
+
+def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, mdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize):
+    #the groups here need to be predefined, I just copied and pasted after working out what they were
     sep = "\n"    
     print "mkChangeNotes v2"
-    print "%s %s Release %s" % (args.database, args.composite, args.releaseNew)
+    print "%s %s Release %s vs Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld)    
     print "" 
-    print "Totals:"
-    print "New Files and Gbdbs: %d" % (int(len(additionalList) + int(len(pushFiles)) + int(len(pushGbdbs))))
-    print "Total size of files to be pushed: %d MB" % filesizes
-    print ""
-    print "Total Files: %d" % int(len(totalFiles | oldReleaseFiles))
-    print "Total Gbdbs: %d" % int(len(newGbdbSet | oldGbdbSet))
-    print "Total Tables: %d" % int(len(newTableSet | oldTableSet))
-    print "Other Files: %d" % int(len(additionalList))
-    print "Total Revoked: %d" % int(len(revokedSet))
-    print "Total Unrevoked: %d" % int(len(unrevokedSet))
-    print "Total Attic: %d" % int(len(atticSet))
-    print "Total Un-attic: %d" % int(len(unatticSet))
-    print "\n"
-    print "New Tables (%s):" % len(pushTables)
-    print sep.join(sorted(pushTables))
-    print "\n"
-    print "New Files (%s):" % len(pushFiles)
-    printWithPath(pushFiles, c, args)
-    print "\n"
-    print "New Gbdbs (%s):" % len(pushGbdbs)
-    printWithPath(pushGbdbs, c, args)
-    print "\n"
-    print "Additional Files (%s):" % len(additionalList)
-    printWithPath(additionalList, c, args)
+    print "QA Count Summaries for Release %s:" % args.releaseNew
+    print "Tables: %d" % int(len(newTableSet))
+    print "Files: %d" % int(len(totalFiles - revokedFiles))
+    print "Gbdbs: %d" % int(len(newGbdbSet))
+    print "Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet))
+    print "Other: %d" % int(len(additionalList))
     print "\n"
-    print "Active Untouched Tables (%s):" % len(oldTableSet)
-    print sep.join(sorted(oldTableSet))
+    totalsize = 0
+    size = 0
+    print "Sizes of New:"
+    tableGb = int(tableSize/1024)
+    if tableGb > 1:
+        print "Tables: %d MB (%d GB)" % (tableSize, tableGb)
+    else:
+        print "Tables: %d MB" % tableSize
+    totalsize = totalsize + tableSize
+    size = int(makeFileSizes(c, args, pushFiles))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Files: %d MB (%d GB)" % (size, int(size/1024))
+    else:
+        print "Files: %d MB" % size
+    size = int(makeFileSizes(c, args, pushGbdbs))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024))
+    else:
+        print "Gbdbs: %d MB" % size
+    size = int(makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet)))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Supplemental: %d MB" % (size, int(size/1024))
+    else:
+        print "Supplemental: %d MB" % size
+    size = int(makeFileSizes(c, args, (additionalList)))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Other: %d MB" % size
+    else:
+        print "Other: %d MB" % size
+    if int(totalsize/1024) > 1:
+        print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024))
+    else:
+        print "Total: %d MB" % totalsize
     print "\n"
-    print "Active Untouched Files (%s):" % len(set(oldReleaseFiles))
-    printWithPath(oldReleaseFiles, c, args)
+    print "TABLES:"
+    print "New: %s" % len(pushTables)
+    print "Persisting: %s" % len(oldTableSet & newTableSet)
+    print "Revoked/Replaced/Renamed: %s" % len(revokedTableSet)
+    print "New + Persisting: %s" % len(newTableSet)
+    print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newTableSet | oldTableSet | revokedTableSet)
+    if args.full:
+        print ""
+        print "New Tables (%s):" % len(pushTables)
+        printIter(pushTables)
+        print ""
+        print "Persisting (%s):" % len(oldTableSet & newTableSet)
+        printIter(oldTableSet & newTableSet)
+        print ""
+        print "Revoked/Replaced/Renamed Tables (%s):" % len(revokedTableSet)
+        printIter(revokedTableSet)
     print "\n"
-    print "Active Untouched Gbdbs (%s):" % len(oldGbdbSet)
-    printWithPath(oldGbdbSet, c, args)
+    #downlaodables = total - revoked
+    print "DOWNLOAD FILES:"
+    print "New: %s" % len(pushFiles - revokedFiles)
+    print "Persisting: %s" % len((totalFiles & oldReleaseFiles) - revokedFiles)
+    print "Revoked/Replaced/Renamed: %s" % len(revokedFiles)
+    print "New + Persisting: %s" % len((pushFiles - revokedFiles) | ((totalFiles & oldReleaseFiles) - revokedFiles))
+    print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(totalFiles | oldReleaseFiles | revokedFiles)
+    if args.full:
+        print ""
+        print "New Download Files (%s):" % len(pushFiles - revokedFiles)
+        printWithPath((pushFiles - revokedFiles), c, args.releaseNew)
+        print ""
+        print "Persisting Download Files (%s):" % len((totalFiles & oldReleaseFiles) - revokedFiles)    
+        printWithPath(((totalFiles & oldReleaseFiles) - revokedFiles), c, args.releaseNew)
+        print ""
+        print "Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles)    
+        printWithPath(revokedFiles, c, args.releaseNew)
     print "\n"
-    print "Revoked Objects (%s):" % len(revokedSet)
-    for i in sorted(revokedSet):
-        print i
+    print "GBDBS:"
+    print "New: %s" % len(pushGbdbs)
+    print "Persisting: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs)
+    print "Revoked/Replaced/Renamed: %s" % len(revokedGbdbs)
+    print "New + Persisting: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs))
+    print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs)
+    if args.full:
+        print ""
+        print "New Gbdb Files (%s):" % len(pushGbdbs)
+        printGbdbPath(pushGbdbs, args.database)
+        print ""
+        print "Persisting Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs)
+        printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args.database)
+        print ""
+        print "Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs)
+        printGbdbPath(revokedGbdbs, args.database)
     print "\n"
-    print "Attic Objects (%s):" % len(atticSet)
-    for i in sorted(atticSet):
-        print i
+    print "SUPPLEMENTAL FILES:"
+    print "New: %s" % len(newSupplementalSet - oldSupplementalSet)
+    print "Persisting: %s" % len(oldSupplementalSet & newSupplementalSet)
+    print "Removed: %s" % len(oldSupplementalSet - newSupplementalSet)
+    print "New + Persisting: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet))
+    print "Total: %s" % len(newSupplementalSet | oldSupplementalSet)
+    if args.full:
+        print ""
+        print "New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet)
+        printWithPath(newSupplementalSet - oldSupplementalSet, c, args.releaseNew)
+        print ""
+        print "Persisting Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet)
+        printWithPath(oldSupplementalSet & newSupplementalSet, c, args.releaseNew)
+        print ""
+        print "Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet)
+        printWithPath(oldSupplementalSet - newSupplementalSet, c, args.releaseNew)
     print "\n"
-    print "Unrevoked Objects (%s):" % len(unrevokedSet)
-    for i in sorted(unrevokedSet):
-        print i
+    print "OTHER FILES:"
+    print "New: %s" % len(additionalList)
+    print "Revoked/Replace: %s" % len(oldAdditionalList)
+    print "Total: %s" % len(additionalList | oldAdditionalList)
+    if args.full:
+        print "" 
+        print "New Other Files (%s):" % len(additionalList)
+        printWithPath(additionalList, c, args.releaseNew)
+        print ""
+        print "Revoked Other Files (%s):" % len(oldAdditionalList)
+        printWithPath(oldAdditionalList, c, args.releaseNew)
     print "\n"
-    print "Un-attic Objects (%s):" % len(unatticSet)
-    for i in sorted(unatticSet):
-        print i
+    print "Files that dropped between releases (%s):" % len(missingFiles)
+    printWithPath(missingFiles, c, args.releaseOld)
     print "\n"
+    if not args.ignore:
     print "No Errors"
     
-def printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, c, pushGbdbs, filesizes, atticSet, revokedSet):
+def printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, c, atticSet, newSupplementalSet, tableSize):
     print "mkChangeNotes v2"
-    print "%s %s Release %s against Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld)
+    print "%s %s Release %s" % (args.database, args.composite, args.releaseNew)
     print "" 
-    print "Totals:"
-    print "Files and Gbdbs: %d" % (int(len(totalFiles)) + int(len(newGbdbSet)))
-    print "Total size of files to be pushed: %d MB" % filesizes
+    print "QA Count Summaries for Release %s:" % args.releaseNew
+    print "Tables: %d" % int(len(newTableSet - revokedTables))
+    print "Files: %d" % int(len(totalFiles - revokedFiles))
+    print "Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs))
+    print "Supplemental: %d" % int(len(newSupplementalSet))
+    print "Other: %d" % int(len(additionalList))
     print ""
-    print "Files: %d" % int(len(totalFiles))
-    print "Gbdbs: %d" % int(len(newGbdbSet))
-    print "Tables: %d" % int(len(newTableSet))
-    print "Other Files: %d" % int(len(additionalList))
-    print "Total Revoked: %d" % int(len(revokedSet))
-    print "Total Attic: %d" % int(len(atticSet))
+    print "REVOKED:"
+    print "Tables: %s" % len(revokedTables)
+    print "Files: %s" % len(revokedFiles)
+    print "Gbdbs: %s" % len(revokedGbdbs)
     print "\n"
-    sep = "\n"    
-    print "New Tables (%s):" % len(pushTables)
-    print sep.join(sorted(pushTables))
+    totalsize = 0;
+    print "Sizes of New:"
+    tableGb = int(tableSize / 1024)
+    if tableGb > 1:
+        print "Tables: %d MB (%d GB)" % (tableSize, tableGb)
+    else:
+        print "Tables: %d MB" % tableSize
+    totalsize = totalsize + tableSize
+    size = int(makeFileSizes(c, args, totalFiles))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Files: %d MB (%d GB)" % (size, int(size/1024))
+    else:
+        print "Files: %d MB" % size
+    size = int(makeFileSizes(c, args, newGbdbSet))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024))
+    else:
+        print "Gbdbs: %d MB" % size
+    size = int(makeFileSizes(c, args, newSupplementalSet))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Supplemental: %d MB (%d GB)" % (size, int(size/1024))
+    else:
+        print "Supplemental: %d MB" % size
+    size = int(makeFileSizes(c, args, (additionalList)))
+    totalsize = totalsize + size
+    if int(size/1024) > 1:
+        print "Other: %d MB" % size
+    else:
+        print "Other: %d MB" % size
+    if int(totalsize/1024) > 1:
+        print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024))
+    else:
+        print "Total: %d MB" % totalsize
     print "\n"
-    print "New Files (%s):" % len(pushFiles)
-    printWithPath(pushFiles, c, args)
+    if args.full:
+        print ""
+        print "New Tables (%s):" % len(newTableSet - revokedTables)
+        printIter(newTableSet - revokedTables)
     print "\n"
-    print "New Gbdbs (%s):" % len(pushGbdbs)
-    printWithPath(pushGbdbs, c, args)
+        print "New Download Files (%s):" % len(totalFiles - revokedFiles)
+        printWithPath(totalFiles - revokedFiles, c, args.releaseNew)
     print "\n"
-    print "Additional Files (%s):" % len(additionalList)
-    printWithPath(additionalList, c, args)
+        print "New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs)
+        printGbdbPath(newGbdbSet - revokedGbdbs, args.database)
     print "\n"
-    print "Revoked Objects (%s):" % len(revokedSet)
-    for i in sorted(revokedSet):
-        print i
+        print "New Supplemental Files (%s):" % len(newSupplementalSet)
+        printWithPath(newSupplementalSet, c, args.releaseNew)
     print "\n"
-    print "Attic Objects (%s):" % len(atticSet)
-    for i in sorted(atticSet):
-        print i
+        print "New Other Files (%s):" % len(additionalList)
+        printWithPath(additionalList, c, args.releaseNew)
+        print "\n"
+        print "Revoked Tables (%s):" %len(revokedTables)
+        printIter(revokedTables)
+        print "Revoked Files (%s):" % len(revokedFiles)
+        printWithPath(revokedFiles, c, args.releaseNew)
     print "\n"
+        print "Revoked Gbdbs (%s):" % len(revokedGbdbs)
+        printGbdbPath(revokedGbdbs, args.database)
+        print "\n"
+    if not args.ignore:
     print "No Errors"
 
+
 def printErrors(errors):
     errorsDict = {}
     for i in errors:
         line = i.split(":", 1)
         try:
             errorsDict[line[0]].append(line[1])
         except:
             errorsDict[line[0]] = []
             errorsDict[line[0]].append(line[1])
     print "Errors (%s):" % len(errors)
     for i in sorted(errorsDict.keys()):
         print "%s:" % i
         for j in sorted(errorsDict[i]):
             print "%s" % j
 
 def main():
 
     parser = argparse.ArgumentParser(
         prog='mkChangeNotes',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
         description='Writes out notes file for packing to QA',
-        epilog='example: encodeMkChange hg19 wgEncodeUwDnase 3 2'
+        epilog=
+"""Examples:
+
+mkChangeNotes hg19 wgEncodeUwDnase 3 2 --loose
+mkChangeNotes hg19 wgEncodeSydhTfbs 1 - --full
+mkChangeNotes hg19 wgEncodeCshlLongRnaSeq 1 -
+
+"""
         )
     parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
     parser.add_argument('-l', '--loose', action="store_true", default=0, help='Loose checking for legacy elements. Will be retired once all tracks go through a release cycle')
     parser.add_argument('-i', '--ignore', action="store_true", default=0, help='Ignore errors, print out report.')
+    parser.add_argument('-f', '--full', action="store_true", default=0, help='Print full stats.')
     parser.add_argument('database', help='The database, typically hg19 or mm9')
     parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
     parser.add_argument('releaseNew', help='The new release to be released')
-    parser.add_argument('releaseOld', help='The old release that is already released')
+    parser.add_argument('releaseOld', nargs='?', default='-', help='The old release that is already released, if on release 1, or solo release mode, put anything here')
 
     if len(sys.argv) == 1:
         parser.print_help()
         return
     args = parser.parse_args(sys.argv[1:])
     if not args.releaseNew.isdigit():
         parser.print_help()
         return
 
-    
     c = track.CompositeTrack(args.database,args.composite)
 
     loose = args.loose
 
     errors = []
     
-    if args.releaseOld == "-":
-        args.releaseOld = 0
-    if int(args.releaseOld) > int(args.releaseNew):
+    if not args.releaseOld.isdigit():
+        args.releaseOld = 'solo'    
+    elif int(args.releaseOld) > int(args.releaseNew):
         errors.append("Old Release is higher than New Release")
         args.releaseOld = args.releaseNew
+        printErrors(errors)
+        return
 
-    if int(args.releaseNew) > 1:
+
+    if int(args.releaseNew) > 1 and str(args.releaseOld) != 'solo':
     
         newReleaseFiles = c.releases[int(args.releaseNew)-1]
         oldReleaseFiles = c.releases[int(args.releaseOld)-1]
 
         newMdb = c.alphaMetaDb
         oldMdb = c.publicMetaDb
             
-        errors.extend(checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose))    
-        errors.extend(checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose))
+        #check if all files listed in release directories have associated metaDb entries
+        (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)
+        (oldMdb, spam, eggs, ham, oldSupplementalSet, oldFileErrors) = checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose)
+        errors.extend(newFileErrors)
+        errors.extend(oldFileErrors)
+        
+        #checks to see that nothing has disappeared between public and alpha
         errors.extend(checkAlphaForDropped(newMdb, oldMdb, "alpha metaDb", "stanza"))
-        errors.extend(checkAlphaForDropped(newReleaseFiles, oldReleaseFiles, "new release download directory", "file"))
-        errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles))
+        missingFiles = checkFilesForDropped(newReleaseFiles, oldReleaseFiles)
+        errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles, loose))
 
-        (newTableSet, newRevokedSet, newAtticSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose)
+        #checks and gets tables that are present, also returns a revoked set of tables for new
+        (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet)
+        (oldTableSet, spam, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose, revokedSet)
         errors.extend(newTableError)
-        (oldTableSet, oldRevokedSet, oldAtticSet, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose)
         errors.extend(oldTableError)
 
-        (newGbdbSet, newGbdbError) = getGbdbFiles(args.database, newTableSet, newMdb)
+        #same as above except for gbdbs
+        (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb)
+        (oldGbdbSet, eggs, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, set(), oldMdb)
         errors.extend(newGbdbError)
-        (oldGbdbSet, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, oldMdb)
         errors.extend(oldGbdbError)
     
-        droppedTables = oldTableSet - newTableSet
-        if droppedTables:
-            for i in droppedTables:
-                errors.append("table: %s was dropped between releases" % i)
-
-
-#########
-        #some weird suggestion from online about python 3 not being able to compare strings to ints implicitly,
-        #here for future reference in case something breaks
-        #pushTables = sorted((newTableSet - oldTableSet), key=lambda item: (int(item.partition(' ')[0])
-        #           if item[0].isdigit() else float('inf'), item))
-        #pushFiles = sorted((newReleaseFiles - oldReleaseFiles), key=lambda item: (int(item.partition(' ')[0])
-        #           if item[0].isdigit() else float('inf'), item))
-        #pushGbdbs = sorted((newGbdbSet - oldGbdbSet), key=lambda item: (int(item.partition(' ')[0])
-        #           if item[0].isdigit() else float('inf'), item))
-#########
-
-        pushTables = sorted((newTableSet - oldTableSet))
-        pushFiles = sorted((set(newReleaseFiles) - set(oldReleaseFiles)))
-        pushGbdbs = sorted((newGbdbSet - oldGbdbSet))
+        #for ease of typing
         totalFiles = set(newReleaseFiles)
 
-        (pushFiles, totalFiles) = cleanSpecialFiles(pushFiles, totalFiles)
-        (oldReleaseFiles, totalFiles) = cleanSpecialFiles(set(oldReleaseFiles), totalFiles)
-        (oldReleaseFiles, pushFiles, additionalList) = separateOutAdditional(oldReleaseFiles, totalFiles, pushFiles)
+        #these could honestly be moved earlier, get a file list processing section or something
+        #they clean out special fiels out and separated the master fiels list into the 3 required
+        #ones: wgEncode, supplemental and additional.
+        totalFiles = cleanSpecialFiles(totalFiles)
+        oldReleaseFiles = cleanSpecialFiles(set(oldReleaseFiles))
+        (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet)
         
-        filesizes = makeFileSizes(c, args, pushFiles, pushGbdbs, additionalList)   
+        #get the stuff you need to push, also table sizes        
+        pushTables = set(sorted((newTableSet - oldTableSet)))
+        tableSize = getTableSize(pushTables, args.database)
+        pushFiles = set(sorted((totalFiles - oldReleaseFiles)))
+        pushGbdbs = set(sorted((newGbdbSet - oldGbdbSet)))
         
+        #don't print report unless ignore option is on or no errors
         if (not errors) or args.ignore:
-            printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, filesizes, newAtticSet, oldAtticSet, newRevokedSet, oldRevokedSet)
-
+            printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, newMdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize)
         else:
             printErrors(errors)
 
 
-    else:
+    elif args.releaseOld == 'solo':
 
-        args.releaseOld = 0
         newReleaseFiles = c.releases[int(args.releaseNew)-1]
+        
         newMdb = c.alphaMetaDb
-        errors.extend(checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose))
-        (newTableSet, newRevokedSet, newAtticSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose)
+        
+        (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)
+        errors.extend(newFileErrors)
+        
+        (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet)
         errors.extend(newTableError)
-        (newGbdbSet, newGbdbError) = getGbdbFiles(args.database, newTableSet, newMdb)
+        
+        tableSize = getTableSize(newTableSet, args.database)
+
+        (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb)
         errors.extend(newGbdbError)
+        
         #set for easy operations
         totalFiles = set(newReleaseFiles)
+        
         #clean out special fiels we don't push i.e. md5sum.history
-        (pushFiles, totalFiles) = cleanSpecialFiles(totalFiles, totalFiles)
+        totalFiles = cleanSpecialFiles(totalFiles)
+        
         #makes list for additional files
-        (spam, pushFiles, additionalList) = separateOutAdditional(set(), totalFiles, pushFiles)
-        #makes files sizes
-        filesizes = makeFileSizes(c, args, pushFiles, newGbdbSet, additionalList)
+        (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(set(), totalFiles, newSupplementalSet, set())
         if (not errors) or args.ignore:
-            printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, newTableSet, pushFiles, c, newGbdbSet, filesizes, newAtticSet, newRevokedSet) 
+            printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTableSet, additionalList, c, atticSet, newSupplementalSet, tableSize) 
         else:
             printErrors(errors)
 
 if __name__ == '__main__':
     main()