99cadc77d83b20928b0eb1f8e6bb1d7641b134a8
wong
  Mon Oct 17 16:56:08 2011 -0700
added in solo release functionality, changed how certain things are diaplayed, loose mode for md5sum check
diff --git python/programs/mkChangeNotes/mkChangeNotes python/programs/mkChangeNotes/mkChangeNotes
index d480801..56dd1e6 100755
--- python/programs/mkChangeNotes/mkChangeNotes
+++ python/programs/mkChangeNotes/mkChangeNotes
@@ -156,39 +156,41 @@
     tablelist = list()
     for i in mdbtableset:
         tablelist.append("table_name = '%s'" % i)
     orsep = " OR "
     orstr = orsep.join(tablelist)
 
     cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE table_name = %s\"" % (database, orstr)
     p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
     output = p.stdout.read()
     for i in output.split("\n")[1:]:
         fields = i.split()
         for j in fields:
             tablesize = tablesize + float(j)
     return math.ceil(tablesize)
 
-def checkMd5sums(newfiles, oldfiles):
+def checkMd5sums(newfiles, oldfiles, loose):
     errors = []
     for i in oldfiles:
         if i not in newfiles:
             pass
         elif re.match('wgEncode.*', i):
             if oldfiles[i].md5sum != newfiles[i].md5sum:
                 errors.append("file: %s have changed md5sums between releases. %s vs %s" % (i, oldfiles[i].md5sum, newfiles[i].md5sum))
-
+    if loose:
+        return list()
+    else:
     return errors
 
 def makeFileSizes(c, args, inlist):
     checklist = list()
     for i in inlist:
         checklist.append("%s/%s" % (c.downloadsDirectory + 'release' + args.releaseNew, i))    
     filesizes = 0
     for i in checklist:
         filesizes = filesizes + int(os.path.getsize(i))
     filesizes = filesizes / (1024**2)
         
     return filesizes
 
 def cleanSpecialFiles(inlist):
     specialRemoveList = ['md5sum.history']
@@ -229,62 +231,62 @@
     for i in sorted(set):
         print "%s/%s" % (c.httpDownloadsPath + 'release' + release, i)
         
 def printGbdbPath(set, database):
     for i in sorted(set):
         print "/gbdb/%s/bbi/%s" % (database, i)
 
 def printIter(inlist):
     for i in sorted(inlist):
         print i
 
 def printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, mdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize):
     #the groups here need to be predefined, I just copied and pasted after working out what they were
     sep = "\n"
     print "mkChangeNotes v2"
-    print "%s %s Release %s" % (args.database, args.composite, args.releaseNew)    
+    print "%s %s Release %s vs Release %s" % (args.database, args.composite, args.releaseNew, args.releaseOld)    
     print "" 
     print "QA Count Summaries for Release %s:" % args.releaseNew
     print "Tables: %d" % int(len(newTableSet))
     print "Files: %d" % int(len(totalFiles - revokedFiles))
     print "Gbdbs: %d" % int(len(newGbdbSet))
     print "Supplemental: %d" % int(len(newSupplementalSet - oldSupplementalSet))
     print "Other: %d" % int(len(additionalList))
     print "\n"
     totalsize = 0;
     print "Sizes of New:"
     tableGb = int(tableSize/1024)
     if tableGb > 1:
         print "Tables: %d MB (%d GB)" % (tableSize, tableGb)
     else:
         print "Tables: %d MB" % tableSize
     totalsize = totalsize + tableSize
     size = int(makeFileSizes(c, args, pushFiles))
     totalsize = totalsize + size
     if int(size/1024) > 1:
         print "Files: %d MB (%d GB)" % (size, int(size/1024))
     else:
         print "Files: %d MB" % size
     size = int(makeFileSizes(c, args, pushGbdbs))
     totalsize = totalsize + size
     if int(size/1024) > 1:
         print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024))
     else:
         print "Gbdbs: %d MB" % size
     size = int(makeFileSizes(c, args, (newSupplementalSet - oldSupplementalSet)))
     totalsize = totalsize + size
-    if int(size/1-24) > 1:
+    if int(size/1024) > 1:
         print "Supplemental: %d MB" % (size, int(size/1024))
     else:
         print "Supplemental: %d MB" % size
     size = int(makeFileSizes(c, args, (additionalList)))
     totalsize = totalsize + size
     if int(size/1024) > 1:
         print "Other: %d MB" % size
     else:
         print "Other: %d MB" % size
     if int(totalsize/1024) > 1:
         print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024))
     else:
         print "Total: %d MB" % totalsize
     print "\n"
     print "TABLES:"
@@ -322,135 +324,148 @@
         print "Revoked/Replaced/Renamed Download Files (%s):" % len(revokedFiles)    
         printWithPath(revokedFiles, c, args.releaseNew)
     print "\n"
     print "GBDBS:"
     print "New: %s" % len(pushGbdbs)
     print "Persisting: %s" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs)
     print "Revoked/Replaced/Renamed: %s" % len(revokedGbdbs)
     print "New + Persisting: %s" % len(pushGbdbs | ((newGbdbSet & oldGbdbSet) - revokedGbdbs))
     print "Total (New + Persisting + Revoked/Replaced/Renamed): %s" % len(newGbdbSet | oldGbdbSet | revokedGbdbs)
     if args.full:
         print ""
         print "New Gbdb Files (%s):" % len(pushGbdbs)
         printGbdbPath(pushGbdbs, args.database)
         print ""
         print "Persisting Gbdb Files (%s):" % len((newGbdbSet & oldGbdbSet) - revokedGbdbs)
-        printGbdbPath(((newGbdbSet & oldGbdbSet) - revokedGbdbs), args.database)
+        printGbdbPath((newGbdbSet & oldGbdbSet) - revokedGbdbs, args.database)
         print ""
         print "Revoked/Replaced/Renamed Gbdb Files (%s):" % len(revokedGbdbs)
         printGbdbPath(revokedGbdbs, args.database)
     print "\n"
-    print "Supplemental Files:"
+    print "SUPPLEMENTAL FILES:"
     print "New: %s" % len(newSupplementalSet - oldSupplementalSet)
     print "Persisting: %s" % len(oldSupplementalSet & newSupplementalSet)
     print "Removed: %s" % len(oldSupplementalSet - newSupplementalSet)
     print "New + Persisting: %s" % len((newSupplementalSet - oldSupplementalSet) | (oldSupplementalSet & newSupplementalSet))
     print "Total: %s" % len(newSupplementalSet | oldSupplementalSet)
     if args.full:
         print ""
-        print "New Supplemental Files:"
+        print "New Supplemental Files (%s):" % len(newSupplementalSet - oldSupplementalSet)
         printWithPath(newSupplementalSet - oldSupplementalSet, c, args.releaseNew)
         print ""
-        print "Persisting Supplemental Files:"
+        print "Persisting Supplemental Files (%s):" % len(oldSupplementalSet & newSupplementalSet)
         printWithPath(oldSupplementalSet & newSupplementalSet, c, args.releaseNew)
         print ""
-        print "Removed Supplemental Files:"
+        print "Removed Supplemental Files (%s):" % len(oldSupplementalSet - newSupplementalSet)
         printWithPath(oldSupplementalSet - newSupplementalSet, c, args.releaseNew)
     print "\n"
     print "OTHER FILES:"
     print "New: %s" % len(additionalList)
     print "Revoked/Replace: %s" % len(oldAdditionalList)
     print "Total: %s" % len(additionalList | oldAdditionalList)
     if args.full:
         print "" 
         print "New Other Files (%s):" % len(additionalList)
         printWithPath(additionalList, c, args.releaseNew)
         print ""
         print "Revoked Other Files (%s):" % len(oldAdditionalList)
         printWithPath(oldAdditionalList, c, args.releaseNew)
     print "\n"
     print "Files that dropped between releases (%s):" % len(missingFiles)
     printWithPath(missingFiles, c, args.releaseOld)
     print "\n"
     if not args.ignore:
         print "No Errors"
     
-def printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, pushTables, c, pushGbdbs, atticSet, newSupplementalSet, tableSize):
+def printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTables, additionalList, c, atticSet, newSupplementalSet, tableSize):
     print "mkChangeNotes v2"
     print "%s %s Release %s" % (args.database, args.composite, args.releaseNew)
     print ""
     print "QA Count Summaries for Release %s:" % args.releaseNew
-    print "Tables: %d" % int(len(newTableSet))
-    print "Files: %d" % int(len(totalFiles))
-    print "Gbdbs: %d" % int(len(newGbdbSet))
+    print "Tables: %d" % int(len(newTableSet - revokedTables))
+    print "Files: %d" % int(len(totalFiles - revokedFiles))
+    print "Gbdbs: %d" % int(len(newGbdbSet - revokedGbdbs))
     print "Supplemental: %d" % int(len(newSupplementalSet))
     print "Other: %d" % int(len(additionalList))
+    print ""
+    print "REVOKED:"
+    print "Tables: %s" % len(revokedTables)
+    print "Files: %s" % len(revokedFiles)
+    print "Gbdbs: %s" % len(revokedGbdbs)
     print "\n"
     totalsize = 0;
     print "Sizes of New:"
     tableGb = int(tableSize / 1024)
     if tableGb > 1:
         print "Tables: %d MB (%d GB)" % (tableSize, tableGb)
     else:
         print "Tables: %d MB" % tableSize
     totalsize = totalsize + tableSize
     size = int(makeFileSizes(c, args, totalFiles))
     totalsize = totalsize + size
     if int(size/1024) > 1:
         print "Files: %d MB (%d GB)" % (size, int(size/1024))
     else:
         print "Files: %d MB" % size
-    size = int(makeFileSizes(c, args, pushGbdbs))
+    size = int(makeFileSizes(c, args, newGbdbSet))
     totalsize = totalsize + size
     if int(size/1024) > 1:
         print "Gbdbs: %d MB (%d GB)" % (size, int(size/1024))
     else:
         print "Gbdbs: %d MB" % size
     size = int(makeFileSizes(c, args, newSupplementalSet))
     totalsize = totalsize + size
-    if int(size/1-24) > 1:
-        print "Supplemental: %d MB" % (size, int(size/1024))
+    if int(size/1024) > 1:
+        print "Supplemental: %d MB (%d GB)" % (size, int(size/1024))
     else:
         print "Supplemental: %d MB" % size
     size = int(makeFileSizes(c, args, (additionalList)))
     totalsize = totalsize + size
     if int(size/1024) > 1:
         print "Other: %d MB" % size
     else:
         print "Other: %d MB" % size
     if int(totalsize/1024) > 1:
         print "Total: %d MB (%d GB)" % (totalsize, int(totalsize/1024))
     else:
         print "Total: %d MB" % totalsize
     print "\n"
     if args.full:
         print ""
-        print "New Tables (%s):" % len(pushTables)
-        printIter(pushTables)
+        print "New Tables (%s):" % len(newTableSet - revokedTables)
+        printIter(newTableSet - revokedTables)
         print "\n"
-        print "New Download Files (%s):" % len(totalFiles)
-        printWithPath(totalFiles, c, args.releaseNew)
+        print "New Download Files (%s):" % len(totalFiles - revokedFiles)
+        printWithPath(totalFiles - revokedFiles, c, args.releaseNew)
         print "\n"
-        print "New Gbdb Files (%s):" % len(pushGbdbs)
-        printWithPath(pushGbdbs, c, args.releaseNew)
+        print "New Gbdb Files (%s):" % len(newGbdbSet - revokedGbdbs)
+        printGbdbPath(newGbdbSet - revokedGbdbs, args.database)
         print "\n"
-        print "New Supplemental Files:"
+        print "New Supplemental Files (%s):" % len(newSupplementalSet)
         printWithPath(newSupplementalSet, c, args.releaseNew)
         print "\n" 
         print "New Other Files (%s):" % len(additionalList)
         printWithPath(additionalList, c, args.releaseNew)
         print "\n"
+        print "Revoked Tables (%s):" %len(revokedTables)
+        printIter(revokedTables)
+        print "Revoked Files (%s):" % len(revokedFiles)
+        printWithPath(revokedFiles, c, args.releaseNew)
+        print "\n"
+        print "Revoked Gbdbs (%s):" % len(revokedGbdbs)
+        printGbdbPath(revokedGbdbs, args.database)
+        print "\n"
     if not args.ignore:
         print "No Errors"
 
 
 def printErrors(errors):
     errorsDict = {}
     for i in errors:
         line = i.split(":", 1)
         try:
             errorsDict[line[0]].append(line[1])
         except:
             errorsDict[line[0]] = []
             errorsDict[line[0]].append(line[1])
     print "Errors (%s):" % len(errors)
     for i in sorted(errorsDict.keys()):
@@ -485,53 +500,56 @@
     if len(sys.argv) == 1:
         parser.print_help()
         return
     args = parser.parse_args(sys.argv[1:])
     if not args.releaseNew.isdigit():
         parser.print_help()
         return
 
     c = track.CompositeTrack(args.database,args.composite)
 
     loose = args.loose
 
     errors = []
     
     if not args.releaseOld.isdigit():
-        args.releaseOld = 0
-    if int(args.releaseOld) > int(args.releaseNew):
+        args.releaseOld = 'solo'    
+    elif int(args.releaseOld) > int(args.releaseNew):
         errors.append("Old Release is higher than New Release")
         args.releaseOld = args.releaseNew
+        printErrors(errors)
+        return
 
-    if int(args.releaseNew) > 1:
+
+    if int(args.releaseNew) > 1 and str(args.releaseOld) != 'solo':
     
         newReleaseFiles = c.releases[int(args.releaseNew)-1]
         oldReleaseFiles = c.releases[int(args.releaseOld)-1]
 
         newMdb = c.alphaMetaDb
         oldMdb = c.publicMetaDb
 
         #check if all files listed in release directories have associated metaDb entries
         (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)
         (oldMdb, spam, eggs, ham, oldSupplementalSet, oldFileErrors) = checkMetaDbForFiles(oldMdb, oldReleaseFiles, "public metaDb", loose)
         errors.extend(newFileErrors)
         errors.extend(oldFileErrors)
         
         #checks to see that nothing has disappeared between public and alpha
         errors.extend(checkAlphaForDropped(newMdb, oldMdb, "alpha metaDb", "stanza"))
         missingFiles = checkFilesForDropped(newReleaseFiles, oldReleaseFiles)
-        errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles))
+        errors.extend(checkMd5sums(newReleaseFiles, oldReleaseFiles, loose))
 
         #checks and gets tables that are present, also returns a revoked set of tables for new
         (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet)
         (oldTableSet, spam, oldTableError) = checkTableStatus(oldMdb, oldReleaseFiles, args.database, args.composite, "public metaDb", loose, revokedSet)
         errors.extend(newTableError)
         errors.extend(oldTableError)
 
         #same as above except for gbdbs
         (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb)
         (oldGbdbSet, eggs, oldGbdbError) = getGbdbFiles(args.database, oldTableSet, set(), oldMdb)
         errors.extend(newGbdbError)
         errors.extend(oldGbdbError)
     
         #check to see if tables were dropped between releases
         droppedTables = oldTableSet - newTableSet
@@ -550,48 +568,48 @@
         (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(oldReleaseFiles, totalFiles, newSupplementalSet, oldSupplementalSet)
 
         #get the stuff you need to push, also table sizes        
         pushTables = set(sorted((newTableSet - oldTableSet)))
         tableSize = getTableSize(pushTables, args.database)
         pushFiles = set(sorted((totalFiles - oldReleaseFiles)))
         pushGbdbs = set(sorted((newGbdbSet - oldGbdbSet)))
         
         #don't print report unless ignore option is on or no errors
         if (not errors) or args.ignore:
             printReport(args, totalFiles, newGbdbSet, newTableSet, additionalList, oldAdditionalList, pushTables, pushFiles, pushGbdbs, c, oldTableSet, oldReleaseFiles, oldGbdbSet, atticSet, revokedFiles, newMdb, revokedTableSet, revokedGbdbs, missingFiles, newSupplementalSet, oldSupplementalSet, tableSize)
         else:
             printErrors(errors)
 
 
-    else:
+    elif args.releaseOld == 'solo':
 
         newReleaseFiles = c.releases[int(args.releaseNew)-1]
         
         newMdb = c.alphaMetaDb
         
         (newMdb, revokedSet, revokedFiles, atticSet, newSupplementalSet, newFileErrors) = checkMetaDbForFiles(newMdb, newReleaseFiles, "alpha metaDb", loose)
         errors.extend(newFileErrors)
         
         (newTableSet, revokedTableSet, newTableError) = checkTableStatus(newMdb, newReleaseFiles, args.database, args.composite, "alpha metaDb", loose, revokedSet)
         errors.extend(newTableError)
         
         tableSize = getTableSize(newTableSet, args.database)
 
         (newGbdbSet, revokedGbdbs, newGbdbError) = getGbdbFiles(args.database, newTableSet, revokedTableSet, newMdb)
         errors.extend(newGbdbError)
         
         #set for easy operations
         totalFiles = set(newReleaseFiles)
         
         #clean out special fiels we don't push i.e. md5sum.history
         totalFiles = cleanSpecialFiles(totalFiles)
         
         #makes list for additional files
         (oldReleaseFiles, additionalList, oldAdditionalList, totalFiles) = separateOutAdditional(set(), totalFiles, newSupplementalSet, set())
         if (not errors) or args.ignore:
-            printReportOne(args, totalFiles, newGbdbSet, newTableSet, additionalList, newTableSet, c, newGbdbSet, atticSet, newSupplementalSet, tableSize) 
+            printReportOne(args, totalFiles, revokedFiles, newGbdbSet, revokedGbdbs, newTableSet, revokedTableSet, additionalList, c, atticSet, newSupplementalSet, tableSize) 
         else:
             printErrors(errors)
 
 if __name__ == '__main__':
     main()