python/lib/ucscgenomics/mkChangeNotes.py 99a3a781e5dfcafa5bd393b0cc38113849491ca4

99a3a781e5dfcafa5bd393b0cc38113849491ca4
wong
  Wed Oct 26 13:13:40 2011 -0700
moved a function out for cleanliness
diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py
index 05a6d8b..ceae085 100644
--- python/lib/ucscgenomics/mkChangeNotes.py
+++ python/lib/ucscgenomics/mkChangeNotes.py
@@ -106,65 +106,68 @@
 
         missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject']))
 
         missingFromDb = mdbtableset - sqltableset
 
         if missingTableNames:
             for i in missingTableNames:
                 errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status))
 
         if missingFromDb:
             for i in missingFromDb:
                 errors.append("table: %s table not found in Db called by %s" % (i, status))
 
         return (mdbtableset, revokedtableset, errors)
 
+    def __checkGbdbFileStatus(self, i, set, errors):
+        filelist = i['fileName'].split(',')
+        for j in filelist:
+            if os.path.isfile("/gbdb/%s/bbi/%s" % (self.database, j)):
+                set.add(j)
+            else:
+                cmd = "hgsql %s -e \"select fileName from (%s)\"" % (self.database, i['tableName'])
+                p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
+                cmdoutput = p.stdout.read()
+                if os.path.isfile(cmdoutput.split("\n")[1]):
+                    set.add(j)
+                else:
+                    errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, self.database))
+        return set, errors
 
     def getGbdbFiles(self, state):
-        database = self.database
         revokedset = set()
         if state == 'new':
             (tableset, revokedset, mdb) = (self.newTableSet, self.revokedSet, self.newMdb)
         elif state == 'old':
             (tableset, mdb) = (self.oldTableSet, self.oldMdb)
 
         errors = []
 
         gbdbtableset = qa.getGbdbTables(self.database, tableset)
 
         revokedtableset = qa.getGbdbTables(self.database, revokedset)
 
         file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s)
         revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s)
         gbdbfileset = set()
         revokedfileset = set()
 
         for i in file1stanzalist:
-            filelist = i['fileName'].split(',')
-            for j in filelist:
-                if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)):
-                    gbdbfileset.add(j)
-                else:
-                    errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, database))
+            (gbdbfileset, errors) = self.__checkGbdbFileStatus(i, gbdbfileset, errors)
 
         for i in revokedstanzalist:
-            filelist = i['fileName'].split(',')
-            for j in filelist:
-                if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)):
-                    revokedfileset.add(j)
-                else:
-                    errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, database))
+            (revokedfileset, errors) = self.__checkGbdbFileStatus(i, revokedfileset, errors)
 
         return (gbdbfileset, revokedfileset, errors)
 
     def __getTableSize(self):
         (mdbtableset, database) = (self.newTableSet, self.database)
         tablesize = float(0)
         tablelist = list()
         for i in mdbtableset:
             tablelist.append("table_name = '%s'" % i)
         orsep = " OR "
         orstr = orsep.join(tablelist)
 
         cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE %s\"" % (database, orstr)
         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
         cmdoutput = p.stdout.read()
@@ -595,31 +598,31 @@
             totalFiles = set(self.newReleaseFiles)
             oldTotalFiles = set(self.oldReleaseFiles)
 
             #these could honestly be moved earlier, get a file list processing section or something
             #they clean out special fiels out and separated the master fiels list into the 3 required
             #ones: wgEncode, supplemental and additional.
             self.totalFiles = self.__cleanSpecialFiles(totalFiles)
             self.oldTotalFiles = self.__cleanSpecialFiles(oldTotalFiles)
             (self.oldTotalFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional()
 
             #get the stuff you need to push, also table sizes        
             self.pushTables = set(sorted((self.newTableSet - self.oldTableSet)))
             self.pushFiles = set(sorted((self.totalFiles - self.oldTotalFiles)))
             self.pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet)))
             self.tableSize = self.__getTableSize()
-            
+            self.errors = errors
             #don't output.append(report unless ignore option is on or no errors
             if (not errors) or self.ignore:
                 self.output = self.printReport(args, c)
             else:
                 self.output = self.printErrors(errors)
 
 
         elif self.releaseOld == 'solo':
 
             self.newReleaseFiles = c.releases[int(self.releaseNew)-1]
 
             self.newMdb = c.alphaMetaDb
 
             (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
             errors.extend(newFileErrors)
@@ -629,19 +632,20 @@
 
             self.tableSize = self.__getTableSize()
 
             (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new")
             errors.extend(newGbdbError)
 
             #set for easy operations
             totalFiles = set(self.newReleaseFiles)
 
             #clean out special fiels we don't push i.e. md5sum.history
             self.totalFiles = self.__cleanSpecialFiles(totalFiles)
 
             #makes list for additional files
             (self.oldTotalFiles, self.oldSupplementalSet) = (set(), set())
             (self.oldReleaseFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional()
+            self.errors = errors
             if (not errors) or self.ignore:
                 self.output = self.printReportOne(args, c) 
             else:
                 self.output = self.printErrors(errors)