3b0ae0a28655b958c4bc033435c9d10cee82ca65
rhead
  Fri Apr 6 19:28:54 2012 -0700
Changed hgsql calls to use callHgsql function, except for the one in countPerChrom, which needs more work before changing. Moved callHgsql to top of file.
diff --git python/lib/ucscgenomics/qa.py python/lib/ucscgenomics/qa.py
index 0df2044..f0be336 100644
--- python/lib/ucscgenomics/qa.py
+++ python/lib/ucscgenomics/qa.py
@@ -1,80 +1,79 @@
 #!/usr/bin/env python2.7
 import re
 import argparse
 import subprocess
 import datetime
 import time
 import pipes
 
 """
     A collection of functions useful to the ENCODE and GB QA Teams.
 """
 
+def callHgsql(database, command):
+    """ Run hgsql command using subprocess, return stdout data if no error."""
+    cmd = ["hgsql", database, "-Ne", command]
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    cmdout, cmderr = p.communicate()
+    if p.returncode != 0:
+        # keep command arguments nicely quoted
+        cmdstr = " ".join([pipes.quote(arg) for arg in cmd])
+        raise Exception("Error from: " + cmdstr + ": " + cmderr)
+    return cmdout
 
 def getGbdbTables(database, tableset):
     """ Remove tables that aren't pointers to Gbdb files."""
     sep = "','"
     tablestr = sep.join(tableset)
     tablestr = "'" + tablestr + "'"
-
-    cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, tablestr)
-    p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
-    cmdoutput = p.stdout.read()
-    gbdbtableset = set(cmdoutput.split("\n")[1:-1])
+    hgsqlOut = callHgsql(database, "select table_name from information_schema.columns where table_name in ("
+                                   + tablestr + ") and column_name = 'fileName'")
+    gbdbtableset = set(hgsqlOut.split())
     return gbdbtableset
 
 def sorted_nicely(l):
     """ Sort the given iterable in the way that humans expect."""
     convert = lambda text: int(text) if text.isdigit() else text
     alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
     return sorted(l, key = alphanum_key)
 
-def callHgsql(database, command, options="-Ne"):
-    """Run hgsql command using subprocess, return stdout data if no error."""
-    cmd = ["hgsql", database, options, command]
-    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    cmdout, cmderr = p.communicate()
-    if p.returncode != 0:
-        # keep command arguments nicely quoted
-        cmdstr = " ".join([pipes.quote(arg) for arg in cmd])
-        raise Exception("Error from: " + cmdstr + ": " + cmderr)
-    return cmdout
-
 def countPerChrom(database, tables):
     """ Count the amount of rows per chromosome."""
     notgbdbtablelist = tables - getGbdbTables(database, tables)
     tablecounts = dict()
     output = []
     globalseen = set()
     localseen = dict()
 
     hgsqlOut = callHgsql(database, "select chrom from chromInfo")
     chrlist = set(hgsqlOut.split())
 
     notPositionalTable = set()
     if not notgbdbtablelist:
         output.append("No tables to count chroms")
         output.append("")
         return (output, tablecounts)
     for i in notgbdbtablelist:
         counts = dict()
+
         cmd = "hgsql %s -e \"select chrom from %s\"" % (database, i)
         p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
         cmdoutput = p.stdout.read()
 
         chrs = cmdoutput.split("\n")[1:-1]
+
         localseen[i] = set()
 
         if not chrs:
             notPositionalTable.add(i)
             continue
         for j in chrs:
             globalseen.add(j)
             if counts.has_key(j):
                 counts[j] = counts[j] + 1
             else:
                 localseen[i].add(j)
                 counts[j] = 1
         tablecounts[i] = counts
 
     for i in sorted(tablecounts):
@@ -99,65 +98,58 @@
         output.append("Not a positional table:")
         for i in notPositionalTable:
             output.append(i)
     return (output, tablecounts)
 
 def checkTableDescriptions(database, tables):
     """ Check if each table has a description or not."""
     tablelist = list()
     missing = set()
     output = []
     orstr = ""
     for i in tables:
         tablelist.append("tableName = '%s'" % i)
         orsep = " OR "
         orstr = orsep.join(tablelist)
-    cmd = "hgsql %s -e \"select tableName from tableDescriptions where %s\"" % (database, orstr)
-    p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
-    cmdoutput = p.stdout.read()
-
-    described = set(cmdoutput.split("\n")[1:-1])
+    hgsqlOut = callHgsql(database, "select tableName from tableDescriptions where " + orstr)
+    described = set(hgsqlOut.split())
     missing = tables - described
     if missing:
         output.append("Tables missing a description:")
         for i in missing:
             output.append(i)
         output.append("")
     else:
         output.append("No tables missing a description")
         output.append("")
-
     return (output, missing)
 
 def checkTableIndex(database, tables):
     """ Check if each table has an index or not."""
     notgbdbtablelist = tables - getGbdbTables(database, tables)
     tablelist = list()
     missing = set()
     output = []
 
     if not notgbdbtablelist:
         output.append("No tables require an index")
         output.append("")
         return (output, missing)
 
     for i in notgbdbtablelist:
-        cmd = "hgsql %s -e \"show indexes from %s\"" % (database, i)
-        p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
-        cmdoutput = p.stdout.read()
-
-        index = cmdoutput.split("\n")[1:-1]
+        hgsqlOut = callHgsql(database, "show indexes from " + i)
+        index = hgsqlOut.split()
         if index:
             pass
         else:
             missing.add(i)
     if missing:
         output.append("Tables missing an index:")
         for i in missing:
             output.append(i)
         output.append("")
     else:
         output.append("No missing indices")
         output.append("")
 
     return (output, missing)