51adadf28a92e212e17e93dacdb9169e276aa9a9 wong Mon Jan 23 11:28:44 2012 -0800 made it so that certain tests skip non-positional tables diff --git python/lib/ucscgenomics/qa.py python/lib/ucscgenomics/qa.py index 7f6edb3..61f862a 100644 --- python/lib/ucscgenomics/qa.py +++ python/lib/ucscgenomics/qa.py @@ -49,75 +49,83 @@ return sorted(l, key = alphanum_key) def countPerChrom(database, tables): """ Count the amount of rows per chromosome.""" notgbdbtablelist = tables - getGbdbTables(database, tables) tablecounts = dict() output = [] globalseen = set() localseen = dict() cmd = "hgsql %s -e \"select chrom from chromInfo\"" % database p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() chrlist = set(cmdoutput.split("\n")[1:-1]) + notPositionalTable = set() if not chrlist: output.append("Can't get chromInfo from %s for countPerChrom" % database) return (output, tablecounts) if not notgbdbtablelist: output.append("No tables to count chroms") output.append("") return (output, tablecounts) for i in notgbdbtablelist: counts = dict() cmd = "hgsql %s -e \"select chrom from %s\"" % (database, i) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() chrs = cmdoutput.split("\n")[1:-1] localseen[i] = set() + if not chrs: + notPositionalTable.add(i) + continue for j in chrs: globalseen.add(j) if counts.has_key(j): counts[j] = counts[j] + 1 else: localseen[i].add(j) counts[j] = 1 tablecounts[i] = counts for i in sorted(tablecounts): output.append(i) used = set() for j in sorted_nicely(tablecounts[i]): output.append("%s = %s" % (j, tablecounts[i][j])) notused = chrlist - (localseen[i] | (chrlist - globalseen)) if notused: output.append("Seen by others, but not used here:") for j in sorted_nicely(notused): output.append(j) output.append("") globalnotused = chrlist - globalseen if globalnotused: output.append("Not seen anywhere:") for i in sorted_nicely(globalnotused): output.append(i) output.append("") + if notPositionalTable: + output.append("Not a positional table:") + for i in notPositionalTable: + output.append(i) return (output, tablecounts) def checkTableDescriptions(database, tables): """ Check if each table has a description or not.""" tablelist = list() missing = set() output = [] orstr = "" for i in tables: tablelist.append("tableName = '%s'" % i) orsep = " OR " orstr = orsep.join(tablelist) cmd = "hgsql %s -e \"select tableName from tableDescriptions where %s\"" % (database, orstr) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() @@ -288,23 +296,38 @@ if not notgbdbtablelist: output.append("No tables are positional") output.append("") return (output, results) for i in notgbdbtablelist: cmd = "positionalTblCheck %s %s" % (database, i) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) cmdoutput = p.stdout.read() cmderr = p.stderr.read() if cmdoutput: results.append(cmdoutput) if cmderr: results.append(cmderr) if results: - output.append("These tables have position errors:") + p = re.compile('(.*)does not appear to be a positional table') + outResults = list() + nonPositional = list() for i in results: + m = p.search(i) + if m: + nonPositional.append(m.group(1)) + else: + outResults.append(i) + + output.append("These tables have position errors:") + for i in outResults: output.append(i) + if nonPositional: + output.append("These tables are non-positional:") + for i in nonPositional: + output.append(i) + output.append("") else: output.append("No position errors") output.append("") return (output, results)