src/utils/qa/countPerChrom.csh 1.16
1.16 2010/02/01 05:12:48 kuhn
added back the haplotype chroms at ann's request
Index: src/utils/qa/countPerChrom.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/countPerChrom.csh,v
retrieving revision 1.15
retrieving revision 1.16
diff -b -B -U 1000000 -r1.15 -r1.16
--- src/utils/qa/countPerChrom.csh 24 Jan 2010 19:45:16 -0000 1.15
+++ src/utils/qa/countPerChrom.csh 1 Feb 2010 05:12:48 -0000 1.16
@@ -1,186 +1,186 @@
#!/bin/tcsh
source `which qaConfig.csh`
###############################################
#
# 12-13-05
# Robert Kuhn
#
# check to see if there are genes on all chroms.
#
###############################################
if ( "$HOST" != "hgwdev" ) then
echo "\n error: you must run this script on dev!\n"
exit 1
endif
set db=""
set oldDb=""
set table=""
set host2=""
set chrom=""
set chroms=""
set old=""
set new=""
set machineOut=""
set split=""
set regular=""
set random=""
set histo="false"
set histosize=35
set debug=true
set debug=false
if ( $#argv < 2 || $#argv > 5 ) then
# no command line args
echo
echo " check to see if there are annotations on all chroms."
echo " will check to see if chrom field is named tName or genoName."
echo
echo " usage: database1 table [database2] [RR] [histogram]"
echo
echo " checks database1 on dev"
echo " database2 will be checked on beta by default"
echo " if RR is specified, will use genome-mysql"
echo " histogram option prints bar graph, not values"
echo
exit
else
set db=$argv[1]
set table=$argv[2]
endif
if ( $#argv == 3 || $#argv == 4 ) then
if ( $argv[3] == "histogram" ) then
set histo="true"
else
if ( $argv[3] == "RR" || $argv[3] == "rr" ) then
set host2="mysql -h genome-mysql -u genome -A"
set oldDb=$db
set machineOut="(${argv[3]})"
else
set host2="hgsql -h $sqlbeta"
set machineOut="(hgwbeta)"
if ( $argv[3] == "hgwbeta" ) then
# allow use of "hgwbeta" to check same db in two places
set oldDb=$db
else
# argv[3] must be a db
set oldDb=$argv[3]
endif
endif
endif
endif
if ( $#argv > 3 ) then
if ( $argv[4] == "histogram" ) then
set histo="true"
else
set oldDb=$argv[3]
set machineOut="(${argv[4]})"
if ( $argv[4] == "hgwbeta" ) then
set host2="hgsql -h $sqlbeta"
else
if ( $argv[4] == "RR" || $argv[4] == "rr" ) then
set host2="mysql -h genome-mysql -u genome -A"
else
echo
echo "4th parameter must be RR or hgwbeta"
echo
$0
exit 1
endif
endif
endif
endif
if ( $#argv == 5 ) then
if ( $argv[5] == "histogram" ) then
set histo="true"
endif
endif
if ( $debug == true) then
echo "db = $db"
echo "oldDb = $oldDb"
echo "machineOut = $machineOut"
echo "table = $table"
echo "host2 = $host2"
endif
set chroms=`hgsql -N -e "SELECT chrom FROM chromInfo" $db`
set split=`getSplit.csh $db $table`
if ( $status ) then
echo "\n the database or table may not exist\n"
exit
endif
if ( $split == "unsplit" ) then
set split=""
else
set split=${split}_
echo "\n split tables. e.g., $split$table"
endif
set chrom=`getChromFieldName.csh $db $split$table`
if ( $status ) then
echo " error getting chromFieldName."
echo " chrom, genoName or tName required."
echo
exit 1
endif
# do randoms last (if no histogram)
if ( $histo == "true" ) then
set regular=`echo $chroms | sed -e "s/ /\n/g" | grep chr`
else
set regular=`echo $chroms | sed -e "s/ /\n/g" | grep -v random`
set random=`echo $chroms | sed -e "s/ /\n/g" | grep random`
endif
rm -f Xout$$
rm -f XgraphFile$$
foreach c ( $regular $random )
if ( $split != "" ) then
set table="${c}_$table"
endif
set new=`nice hgsql -N -e 'SELECT COUNT(*) FROM '$table' \
WHERE '$chrom' = "'$c'"' $db`
if ( $machineOut != "" ) then
set old=`nice $host2 -Ne 'SELECT COUNT(*) FROM '$table' \
WHERE '$chrom' = "'$c'"' $oldDb`
endif
# output
echo "$c\t$new\t$old" >> Xout$$
set table=$argv[2]
end
if ( $histo == "true" ) then
- cat Xout$$ | grep chr | egrep -v "random|hap|Un|$db" | sed "s/chr//" \
+ cat Xout$$ | grep chr | egrep -v "random|Un|$db" | sed "s/chr//" \
| sort -n -k1,1 > XgraphFile$$
if ( $machineOut != "" ) then
cat XgraphFile$$ | awk '{print $1, $3}' > XgraphFile2$$
graph.csh XgraphFile$$ $histosize > Xgraph1$$
graph.csh XgraphFile2$$ $histosize > Xgraph2$$
# output header
echo
echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}'
# join on first col, retaining everything from first col
join -a1 -j1 Xgraph1$$ Xgraph2$$ | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}'
else
graph.csh XgraphFile$$ | awk '{printf("%3s %-36s\n", $1, $2)}'
endif
else
# output header
echo "chrom \t$db \t$oldDb$machineOut"
cat Xout$$
endif
rm -f Xgraph1$$
rm -f Xgraph2$$
rm -f XgraphFile$$
rm -f XgraphFile2$$
rm -f Xout$$