src/utils/qa/PB.csh 1.2
1.2 2009/04/11 00:37:00 rhead
Sourced new qaConfig file at the top. Changed -h hgwbeta lines to look for sql host stored in a variable, specified in the new qaConfig file.
Index: src/utils/qa/PB.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/PB.csh,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/utils/qa/PB.csh 9 Oct 2007 19:36:06 -0000 1.1
+++ src/utils/qa/PB.csh 11 Apr 2009 00:37:00 -0000 1.2
@@ -1,293 +1,294 @@
#! /bin/tcsh
+source `which qaConfig.csh`
############################
# 02-15-04
# 04-14-04
# updated again. added $db support, command line
# sets swissprot database by query from gdbPdb for hgwtest
#
# Runs Proteome Browser database testing
#
############################
set db=""
if ($1 == "") then
echo
echo " runs Proteome Browser testing."
echo " if not given on command line, list of tables, pbTablesAll, \
is expected up one directory."
echo
echo " usage: database, [tablelist]"
echo
exit
else
set db=$1
endif
sort ../pbTablesAll > pbTablesAll
set tablelist="pbTablesAll"
if ($2 != "") then
set tablelist=$2
endif
echo "using:"
echo "db=$db"
echo "tablelist=$tablelist"
echo
# --------------------------------------------
# set proteins database by query from gdbPdb for hgwtest:
set protDb=`hgsql -N -e 'SELECT proteomeDb FROM gdbPdb \
WHERE genomeDb = "'$db'"' hgcentraltest `
echo protDb = $protDb
echo
# --------------------------------------------
# checking off-end and tStart - tEnd relationship
# echo "transcription coords"
# echo "printing tx to file"
# print tx to file:
hgsql -e "SELECT chromInfo.chrom, chromInfo.size - MAX(kgProtMap2.tEnd) \
AS dist_from_end FROM chromInfo, kgProtMap2 \
WHERE chromInfo.chrom = kgProtMap2.tName \
GROUP BY chromInfo.chrom" $db > $db.PB.tx.offEnd
# echo
echo
echo "lines from $db.PB.tx.offEnd > 0:"
awk '{if($2<0) {print $2} }' $db.PB.tx.offEnd
echo "expect blank or check file $db.PB.tx.offEnd"
echo
# --------------------------------------------
# checking pbAnomLimit for low anomaly value less than high
echo
echo "checking pbAnomLimit for low anomaly value less than high"
hgsql -e "SELECT * FROM pbAnomLimit WHERE pctLow >= pctHi" $db
echo "expect blank"
echo
echo
echo "checking pbAnomLimit that low anomaly value is positive or zero"
echo "prints if zero or negative"
hgsql -e "SELECT * FROM pbAnomLimit WHERE pctLow <= 0" $db
echo
# --------------------------------------------
# checking pbResAvgStd -- check that all avg add to 1.00
echo
echo "checking pbResAvgStd -- check that all avg add to 1.00"
hgsql -e "SELECT avg FROM pbResAvgStd" $db \
| gawk '{total+=$1} END {print total}'
echo "pbResAvgStd -- value should be one"
echo
# --------------------------------------------
# pbStamp -- check that all columns have entries and {x,y}{max,min} make sense
echo
echo "pbStamp -- check that all columns have entries and {x,y}{max,min} make sense"
hgsql -t -e "SELECT * FROM pbStamp" $db
echo
# --------------------------------------------
# pbAaDist* -- check that all columns have data and check the SUM(y) of each
echo
echo "pbAaDist* -- check that all columns have data and check the SUM(y) of each"
echo
rm -f AADist.totals
cat $tablelist | grep pbAaDist > AAlist1
foreach table (`cat AAlist1`)
set cnt=`hgsql -N -e "SELECT SUM(y) FROM $table" $db`
echo "$table\t$cnt" >> AADist.totals
end
cat AADist.totals
echo
echo "AADist* -- all values should be close to the same"
echo
# --------------------------------------------
# checking contents and sizes of non-AA Dist tables
echo
echo "checking contents and sizes of non-AA Dist tables"
echo
rm -f stamp.totals
echo "table\tsum(y)" >> stamp.totals
echo "-----\t------" >> stamp.totals
cat $tablelist | grep pep | grep Dist > pepList
foreach table (`cat pepList`)
# hgsql -e "SELECT * FROM $table limit 1" $db
echo
set sum=`hgsql -N -e "SELECT SUM(y) from $table" $db`
echo "$table\t$sum" >> stamp.totals
end
cat stamp.totals
echo
echo "pepResDist should equal 1.000"
echo
# --------------------------------------------
echo
echo "pepPi -- checking count and range"
echo " -- should click into extremes"
echo
echo "sample:"
hgsql -t -e "SELECT * from pepPi limit 1" $db
echo
hgsql -t -e "SELECT COUNT(*) from pepPi" $db
echo
set var=`hgsql -e "SELECT AVG(pI) FROM pepPi" $db`
hgsql -t -e "SELECT AVG(pI) FROM pepPi" $db
echo
echo "extremes of pI:"
echo
echo "pepPi three lowest:"
hgsql -t -e "SELECT * from pepPi ORDER BY pI LIMIT 3" $db
echo
echo "pepPi three highest:"
hgsql -t -e "SELECT * from pepPi ORDER BY pI DESC LIMIT 3" $db
echo
# --------------------------------------------
# pepMwAa -- check count and range
# -- should click into extremes
echo
echo "pepMwAa -- check count and range"
echo " -- should click into extremes"
echo
echo "sample:"
hgsql -t -e "SELECT * from pepMwAa limit 1" $db
echo
hgsql -t -e "SELECT COUNT(*) from pepMwAa" $db
echo
# echo "average MW:"
hgsql -t -e "SELECT ROUND(AVG(molWeight)) from pepMwAa" $db
echo
# echo "min MW:"
hgsql -t -e "SELECT MIN(molWeight) from pepMwAa" $db
echo
# echo "max MW:"
hgsql -t -e "SELECT MAX(molWeight) from pepMwAa" $db
echo
# echo "average aaLen:"
hgsql -t -e "SELECT AVG(aaLen) from pepMwAa" $db
echo
# echo "min aaLen:"
hgsql -t -e "SELECT MIN(aaLen) from pepMwAa" $db
echo
# echo "max aaLen:"
hgsql -t -e "SELECT MAX(aaLen) from pepMwAa" $db
echo
# --------------------------------------------
# get count and one record from each (except for x,y tables above)
echo
echo "get one record from each (except for x,y tables above)"
comm -23 $tablelist AAlist1 > List3
comm -23 List3 pepList > List4
foreach table (`cat List4`)
set c=`hgsql -N -e "SELECT COUNT(*) FROM $table" $db`
echo "$table (count): $c"
hgsql -t -e "SELECT * FROM $table LIMIT 1" $db
echo
end
echo
# --------------------------------------------
# show index from each (except for x,y tables above)
echo
echo "show index from each (except for x,y tables above)"
cat $tablelist | grep -v "pbAaDist" > List5
# echo "checking List5"
foreach table (`cat List5`)
# echo "$table"
hgsql -t -e "SHOW INDEX FROM $table" $db
echo
end
echo
# --------------------------------------------
# check for null or zero values
echo
echo "check for null or zero values"
echo
foreach table (`cat $tablelist`)
hgsql -N -e "DESC $table" $db | awk '{print $1}' > ${table}Cols
echo $table
echo "============="
set totRows=`hgsql -N -e 'SELECT COUNT(*) FROM '$table'' $db`
echo "total rows = "$totRows
rm -f $table.nullzero.totals
echo "null + zero entries:"
foreach col (`cat ${table}Cols`)
set cnt=`hgsql -N -e 'SELECT COUNT(*) FROM '$table' WHERE '$col='""' $db`
echo "$col\t$cnt"
end
if ($table == "ensGeneXref") then
echo "expect translation_name col to be empty due to ensembl change"
echo
endif
echo
end
echo
# --------------------------------------------
# sfDes -- look for uniq
echo
echo "sfDes -- look for uniq"
echo
hgsql -e "SELECT COUNT(*) FROM sfDes" $protDb
hgsql -e "SELECT COUNT(DISTINCT(description)) FROM sfDes" $protDb
hgsql -e "SELECT COUNT(DISTINCT(classification)) FROM sfDes" $protDb
echo
#--------------------------------------------
echo "$protDb.interProXref -- check index"
echo " -- count rows"
echo " -- look for empty fields"
hgsql -t -e "SELECT * FROM interProXref limit 1" $protDb
hgsql -t -e "SHOW INDEX FROM interProXref" $protDb
hgsql -t -e "SELECT COUNT(*) FROM interProXref" $protDb
echo
echo end
rm -f *Cols