src/hg/makeDb/hgLoadWiggle/wigTableStats.sh 1.4

1.4 2009/11/25 19:55:25 hiram
Allow it to work on multiple tables in a single database
Index: src/hg/makeDb/hgLoadWiggle/wigTableStats.sh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/hgLoadWiggle/wigTableStats.sh,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/makeDb/hgLoadWiggle/wigTableStats.sh	25 Nov 2009 19:10:44 -0000	1.3
+++ src/hg/makeDb/hgLoadWiggle/wigTableStats.sh	25 Nov 2009 19:55:25 -0000	1.4
@@ -1,65 +1,70 @@
 #!/bin/sh
 
 #	$Id$
 
 DB=$1
 T=$2
 
 if [ "x${DB}y" = "xy" -o "x${T}y" = "xy" ]; then
     echo "wigTableStats.sh - compute overall statistics for a wiggle table"
     echo
-    echo "usage: wigTableStats.sh <db> <table>"
-    echo "expected table is a wiggle table"
+    echo "usage: wigTableStats.sh <db> <table> [other tables]"
+    echo "expected tables are wiggle tables"
     echo "output is a summary of min, max, average, count, sumData, stdDev, viewLimits"
     echo "the recommended viewLimits are: mean +- 5*stdDev limited by min,max"
     echo "you will want to round those numbers to reasonable nearby values."
     exit 255
 fi
 
 echo -e "# db.table\tmin max mean count sumData stdDev viewLimits"
+shift		# eliminate the database argument
 
+for T in $*
+do
 echo -e -n "${DB}.${T}\t"
 
 hgsql -N ${DB} \
 -e "select lowerLimit,dataRange,validCount,sumData,sumSquares from ${T}" \
     | awk '
 function abs(value) { if (value < 0) {return -value;} else {return value;} }
 function viewUpper(min, max, mean, stdDev,  fiveDev, range, upper) {
 fiveDev = 5 * stdDev;
 range = abs(max-min);
 upper = mean + fiveDev;
 if (upper > max) upper = max;
 return upper;
 }
 function viewLower(min, max, mean, stdDev,  fiveDev, range, lower) {
 fiveDev = 5 * stdDev;
 range = abs(max-min);
 lower = mean - fiveDev;
 if (lower < min) lower = min;
 return lower;
 }
 BEGIN { lower=3.0e+100; upper=-3.0e+100; count = 0; sumData = 0.0;
 	sumSquares = 0.0 }
 {
 maximum = $1 + $2
 if ($1 < lower) {lower = $1;}
 if (maximum > upper) {upper = maximum;}
 count += $3;
 sumData += $4;
 sumSquares += $5;
 }
 END {
 if (count > 0) {
     mean = sumData / count;
     var = sumSquares - (sumData*sumData)/count;
     stdDev = var;
     if (count > 1) { stdDev = sqrt(var/(count-1)); }
     vLower = viewLower(lower, upper, mean, stdDev);
     vUpper = viewUpper(lower, upper, mean, stdDev);
     printf "%g %g %g %d %g %g viewLimits=%g:%g\n",
 	lower, upper, mean, count, sumData, stdDev, vLower, vUpper
 } else {
 printf "empty data set\n"
 }
 }
 '
+
+done