c8af12e5efbf81ef6f0f80461deea5d63a08b045 wong Fri Oct 28 16:59:19 2011 -0700 added smarted filtering error messages diff --git python/lib/ucscgenomics/qa.py python/lib/ucscgenomics/qa.py index 635f563..7f6edb3 100644 --- python/lib/ucscgenomics/qa.py +++ python/lib/ucscgenomics/qa.py @@ -54,32 +54,32 @@ tablecounts = dict() output = [] globalseen = set() localseen = dict() cmd = "hgsql %s -e \"select chrom from chromInfo\"" % database p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() chrlist = set(cmdoutput.split("\n")[1:-1]) if not chrlist: output.append("Can't get chromInfo from %s for countPerChrom" % database) return (output, tablecounts) - if not tables: - output.append("No Tables to count") + if not notgbdbtablelist: + output.append("No tables to count chroms") output.append("") return (output, tablecounts) for i in notgbdbtablelist: counts = dict() cmd = "hgsql %s -e \"select chrom from %s\"" % (database, i) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() chrs = cmdoutput.split("\n")[1:-1] localseen[i] = set() for j in chrs: globalseen.add(j) if counts.has_key(j): counts[j] = counts[j] + 1 @@ -130,30 +130,35 @@ output.append(i) output.append("") else: output.append("No tables missing a description") output.append("") return (output, missing) def checkTableIndex(database, tables): """ Check if each table has an index or not.""" notgbdbtablelist = tables - getGbdbTables(database, tables) tablelist = list() missing = set() output = [] + if not notgbdbtablelist: + output.append("No tables require an index") + output.append("") + return (output, missing) + for i in notgbdbtablelist: cmd = "hgsql %s -e \"show indexes from %s\"" % (database, i) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() index = cmdoutput.split("\n")[1:-1] if index: pass else: missing.add(i) if missing: output.append("Tables missing an index:") for i in missing: output.append(i) output.append("") @@ -220,30 +225,37 @@ if output: output.insert(0,"Label errors:") output.append("") else: output.append("No labels are incorrect") output.append("") return (output, toolong) def checkTableCoords(database, tables): """Runs checkTableCoords externally against a set of tables, timeout is 10 seconds""" notgbdbtablelist = tables - getGbdbTables(database, tables) results = [] output = [] + + if not notgbdbtablelist: + output.append("No tables have coordinates") + output.append("") + return (output, results) + + timeout = 20 for i in sorted(notgbdbtablelist): start = datetime.datetime.now() cmd = "checkTableCoords %s %s" % (database, i) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) killed = 0 while p.poll() is None: time.sleep(0.1) now = datetime.datetime.now() if (now - start).seconds > timeout: p.kill() killed = 1 if not killed: cmdoutput = p.stdout.read() cmderr = p.stderr.read() @@ -256,32 +268,40 @@ results.append("Process timeout after %d seconds, for table: %s" % (timeout, i)) results.append("You might want to manually run: '%s'" % cmd) results.append("") if results: output.append("These tables have coordinate errors:") for i in results: output.append(i) else: output.append("No coordinate errors") output.append("") return (output, results) def positionalTblCheck(database, tables): notgbdbtablelist = tables - getGbdbTables(database, tables) + + results = [] output = [] + + if not notgbdbtablelist: + output.append("No tables are positional") + output.append("") + return (output, results) + for i in notgbdbtablelist: cmd = "positionalTblCheck %s %s" % (database, i) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) cmdoutput = p.stdout.read() cmderr = p.stderr.read() if cmdoutput: results.append(cmdoutput) if cmderr: results.append(cmderr) if results: output.append("These tables have position errors:") for i in results: output.append(i) else: output.append("No position errors")