23cef5432d81381532634095c4886e29942bb00a
mary
  Wed Aug 4 17:29:06 2010 -0700
adding better comments to the script
diff --git src/utils/qa/checkGenomeMysql src/utils/qa/checkGenomeMysql
index 182fc9e..2bce821 100755
--- src/utils/qa/checkGenomeMysql
+++ src/utils/qa/checkGenomeMysql
@@ -1,65 +1,72 @@
 #!/bin/bash
 # quit if something within the script fails
 set -beEu -o pipefail
 source `which qaConfig.bash`
 
 ################################
 #
 # 06/07/10 Mary Goldman
 #
 # Script to make sure that all expected tables in public genome-mysql
 # server are there and that they are not corrupted.
-# Does this by selecting * from table limit 2 (data from limit 1 is
+# Does this by selecting * from table limit 5 (data from limit 1 is
 # stored in metaDb table automatically made by mysql databases).
 #  
 ################################
 
 if [ $# -ne 1 ] 
 then
   echo -e "
-	checks genome-mysql for corrupted tables
-	for the database of the day\n
-	Note: If script finds a table that 
-	doesn't exist, it will not check the 
-	rest of the tables in a database.
-	   usage: database\n" >&2
+	checks genome-mysql for corrupted tables\n
+	Note: Due to the bash configurations in this 
+	script, if mysql encounters an error, the script
+	will immediately exit and will not check the 
+	rest of the tables in database. 
+	   usage: $(basename $0) database\n" >&2
   exit 1
 else
   db="$1"
 fi
 
 # make sure this is a valid database name
 if ! hgsql -Ne "show databases" | grep -qw $db
 then
-  echo -e "\nERROR: database $db not found.  Try running on hgwdev?\n" >&2
+  echo -e "\nERROR: database $db not found.\n" >&2
   exit 1
 fi
 
 allOk=true #keeping track of no errors vs errors
-RRdumpFile=$(getRRdumpfile.csh $db) #location of RR dump file
+# get the location of the RR dump file, which contains a mysql "show status"
+# for all tables in all databases. Used to get the list of all table names to test.
+RRdumpFile=$(getRRdumpfile.csh $db) 
 
 # only get the table name($1) where the row number($5) is greater than 0
-# Also dropping the column header.
-# uses 'tawk' (program Mark wrote): uses tabs as delimiters
+# Also dropping the column header (NR>1).
+# uses 'tawk' (program Mark wrote): awk with tabs as delimiters
 tableList=$(tawk 'NR>1 {if ($5>0) print $1}' $RRdumpFile)
 
 # for each table in list do a select * limit 5 and check for errors
+# Want to limit output or else the query would take forever. However,
+# can't do limit 1 since the first row of every table is stored in
+# the metadata and thus, mysql will not actually touch the table
+# if you do a select statement with a limit 1. Thus, we do a limit 5.
 for table in $tableList
 do
 # need to do it this way since if mysql has an error it exits zero (ie can't use status)
   mysqlResult=$(mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -Ne "select * from $table \
                limit 5" $db| wc -l)
-  if [ $mysqlResult == 0 ]
+  if [ "$mysqlResult" == 0 ]
   then
     echo "ERROR:Can't select * from  $table limit 5 from $db." >&2
     allOk=false
   fi
 done
 
-if [ $allOk == false ]
+# check for errors
+if [ "$allOk" == false ]
 then
   exit 1
 else
   exit 0
 fi
 # cron will send an email if you exit non-zero.