454a424e3b2d8660132d6cafc408a03d348c9d57
rhead
  Mon Mar 19 13:08:51 2012 -0700
Changing to conform with new style guide: changed tabs to 4 spaces, changed interpreter directive, put imports on their own lines and removed unused imports, removed repetitive documentation from beginning.
diff --git python/lib/ucscgenomics/qa.py python/lib/ucscgenomics/qa.py
index 27d24eb..dfdbd80 100644
--- python/lib/ucscgenomics/qa.py
+++ python/lib/ucscgenomics/qa.py
@@ -1,46 +1,24 @@
-#!/hive/groups/encode/dcc/bin/python
-import sys, os, re, argparse, subprocess, math, threading, datetime, time, signal
+#!/usr/bin/env python2.7
+import re
+import argparse
+import subprocess
+import datetime
+import time
 
 """
-	A collection of functions useful to the ENCODE QA Team.
-
-	Programs known to be dependent on it:
-	mkChangeNotes
-	qaInit
-        qaEncodeTracks2
-        qaGbTracks
-
-	Functions:
-	getGbdbTables - takes in a database and a set of tables,
-		returns a set of tables that are gbdb files in reality
-	sorted_nicely - a neat way of alphanumerically sorting 
-		anything sortable, a nice snippet of functional programming
-		Google found.
-	countPerChrom - takes in Db & tables, returns a list of strings for 
-		outputting and a dictionary of counts
-		dict->{Table}->{Chrom} = countPerChrom
-		*automatically filters out Gbdbs*
-	checkTableDescriptions - takes in Db and tables, returns a list 
-		of strings and a set of tables with no description
-	checkTableIndex - takes in Db & tables, returns a list of strings 
-		and a set of tables with no index
-		*automatically filters out Gbdbs*
-	checkTableName - takes in a set of tables, returns string output
-		and a set of tables with underscores in the name
-	checkLabels - takes in a trackDb.ra filepath, returns a string 
-		output list and a set of tuples, ([label, #tooLongBy],[labe...)
+    A collection of functions useful to the ENCODE and GB QA Teams.
 """
 
 
 def getGbdbTables(database, tableset):
 	""" Remove tables that aren't pointers to Gbdb files."""
 	sep = "','"
 	tablestr = sep.join(tableset)
 	tablestr = "'" + tablestr + "'"
 
 	cmd = "hgsql %s -e \"select table_name from information_schema.columns where table_name in (%s) and column_name = 'fileName'\"" % (database, tablestr)
 	p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
 	cmdoutput = p.stdout.read()
 	gbdbtableset = set(cmdoutput.split("\n")[1:-1])
 	return gbdbtableset
 
@@ -55,32 +33,30 @@
 	notgbdbtablelist = tables - getGbdbTables(database, tables)
 	tablecounts = dict()
 	output = []
 	globalseen = set()
 	localseen = dict()
 	
 	cmd = "hgsql %s -e \"select chrom from chromInfo\"" % database
 	p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
 	cmdoutput = p.stdout.read()
 
 	chrlist = set(cmdoutput.split("\n")[1:-1])
 	notPositionalTable = set()
 	if not chrlist:
 		output.append("Can't get chromInfo from %s for countPerChrom" % database)
 		return (output, tablecounts)
-	
-	
 	if not notgbdbtablelist:
 		output.append("No tables to count chroms")
 		output.append("")
 		return (output, tablecounts)
 	for i in notgbdbtablelist:
 		counts = dict()
 		cmd = "hgsql %s -e \"select chrom from %s\"" % (database, i)
 		p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
 		cmdoutput = p.stdout.read()
 
 		chrs = cmdoutput.split("\n")[1:-1]
 		localseen[i] = set()
 		
 		if not chrs:
 			notPositionalTable.add(i)
@@ -184,31 +160,31 @@
 	output = []
 	for i in tables:
 		if re.search('.*_.*', i):
 			bad.add(i)
 	if bad:
 		output.append("These tables have underscores in the name")
 		for i in bad:
 			output.append(i)
 		output.append("")
 	else:
 		output.append("No malformed table names")
 		output.append("")
 	return (output, bad)
 
 def checkLabels(trackDb):
-	""" Check if long and short labels are too long."""
+    """ Check if long and short labels are too long, are duplicated or are auto-generated."""
 	f = open(trackDb, "r")
 	lines = f.readlines()
 	seenlabel = dict()
 	output = []
 	toolong = list()
 	p1 = re.compile('^\s*longLabel\s+(.*)$')
 	p2 = re.compile('^\s*shortLabel\s+(.*)$')
 	p3 = re.compile('^\s*#.*$')
 	for i in lines:
 		m1 = p1.match(i)
 		m2 = p2.match(i)
 		m3 = p3.match(i)
 		if m3:
 			continue
 		if m1:
@@ -323,16 +299,15 @@
 			else:
 				outResults.append(i)
 
 		output.append("These tables have position errors:")
 		for i in outResults:
 			output.append(i)
 		if nonPositional:
 			output.append("These tables are non-positional:")
 			for i in nonPositional:
 				output.append(i)
 		output.append("")
 	else:
 		output.append("No position errors")
 		output.append("")
 	return (output, results)
-