99a3a781e5dfcafa5bd393b0cc38113849491ca4 wong Wed Oct 26 13:13:40 2011 -0700 moved a function out for cleanliness diff --git python/lib/ucscgenomics/mkChangeNotes.py python/lib/ucscgenomics/mkChangeNotes.py index 05a6d8b..ceae085 100644 --- python/lib/ucscgenomics/mkChangeNotes.py +++ python/lib/ucscgenomics/mkChangeNotes.py @@ -106,65 +106,68 @@ missingTableNames = set(mdb.filter(lambda s: s['objType'] == 'table' and 'tableName' not in s and 'attic' not in s, lambda s: s['metaObject'])) missingFromDb = mdbtableset - sqltableset if missingTableNames: for i in missingTableNames: errors.append("table: %s is type obj, but missing tableName field called by %s" % (i, status)) if missingFromDb: for i in missingFromDb: errors.append("table: %s table not found in Db called by %s" % (i, status)) return (mdbtableset, revokedtableset, errors) + def __checkGbdbFileStatus(self, i, set, errors): + filelist = i['fileName'].split(',') + for j in filelist: + if os.path.isfile("/gbdb/%s/bbi/%s" % (self.database, j)): + set.add(j) + else: + cmd = "hgsql %s -e \"select fileName from (%s)\"" % (self.database, i['tableName']) + p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + cmdoutput = p.stdout.read() + if os.path.isfile(cmdoutput.split("\n")[1]): + set.add(j) + else: + errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, self.database)) + return set, errors def getGbdbFiles(self, state): - database = self.database revokedset = set() if state == 'new': (tableset, revokedset, mdb) = (self.newTableSet, self.revokedSet, self.newMdb) elif state == 'old': (tableset, mdb) = (self.oldTableSet, self.oldMdb) errors = [] gbdbtableset = qa.getGbdbTables(self.database, tableset) revokedtableset = qa.getGbdbTables(self.database, revokedset) file1stanzalist = mdb.filter(lambda s: s['tableName'] in gbdbtableset, lambda s: s) revokedstanzalist = mdb.filter(lambda s: s['tableName'] in revokedtableset, lambda s: s) gbdbfileset = set() revokedfileset = set() for i in file1stanzalist: - filelist = i['fileName'].split(',') - for j in filelist: - if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): - gbdbfileset.add(j) - else: - errors.append("gbdb: %s does not exist in /gbdb/%s/bbi" % (j, database)) + (gbdbfileset, errors) = self.__checkGbdbFileStatus(i, gbdbfileset, errors) for i in revokedstanzalist: - filelist = i['fileName'].split(',') - for j in filelist: - if os.path.isfile("/gbdb/%s/bbi/%s" % (database, j)): - revokedfileset.add(j) - else: - errors.append("gbdb: revoked gbdb %s does not exist in /gbdb/%s/bbi" % (j, database)) + (revokedfileset, errors) = self.__checkGbdbFileStatus(i, revokedfileset, errors) return (gbdbfileset, revokedfileset, errors) def __getTableSize(self): (mdbtableset, database) = (self.newTableSet, self.database) tablesize = float(0) tablelist = list() for i in mdbtableset: tablelist.append("table_name = '%s'" % i) orsep = " OR " orstr = orsep.join(tablelist) cmd = "hgsql %s -e \"SELECT ROUND(data_length/1024/1024,2) total_size_mb, ROUND(index_length/1024/1024,2) total_index_size_mb FROM information_schema.TABLES WHERE %s\"" % (database, orstr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() @@ -595,31 +598,31 @@ totalFiles = set(self.newReleaseFiles) oldTotalFiles = set(self.oldReleaseFiles) #these could honestly be moved earlier, get a file list processing section or something #they clean out special fiels out and separated the master fiels list into the 3 required #ones: wgEncode, supplemental and additional. self.totalFiles = self.__cleanSpecialFiles(totalFiles) self.oldTotalFiles = self.__cleanSpecialFiles(oldTotalFiles) (self.oldTotalFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional() #get the stuff you need to push, also table sizes self.pushTables = set(sorted((self.newTableSet - self.oldTableSet))) self.pushFiles = set(sorted((self.totalFiles - self.oldTotalFiles))) self.pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet))) self.tableSize = self.__getTableSize() - + self.errors = errors #don't output.append(report unless ignore option is on or no errors if (not errors) or self.ignore: self.output = self.printReport(args, c) else: self.output = self.printErrors(errors) elif self.releaseOld == 'solo': self.newReleaseFiles = c.releases[int(self.releaseNew)-1] self.newMdb = c.alphaMetaDb (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet, self.newSupplementalSet, newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new") errors.extend(newFileErrors) @@ -629,19 +632,20 @@ self.tableSize = self.__getTableSize() (self.newGbdbSet, self.revokedGbdbs, newGbdbError) = self.getGbdbFiles("new") errors.extend(newGbdbError) #set for easy operations totalFiles = set(self.newReleaseFiles) #clean out special fiels we don't push i.e. md5sum.history self.totalFiles = self.__cleanSpecialFiles(totalFiles) #makes list for additional files (self.oldTotalFiles, self.oldSupplementalSet) = (set(), set()) (self.oldReleaseFiles, self.additionalList, self.oldAdditionalList, self.totalFiles) = self.__separateOutAdditional() + self.errors = errors if (not errors) or self.ignore: self.output = self.printReportOne(args, c) else: self.output = self.printErrors(errors)