4ed6ecc032c24fe0af2f52026950de6574bdefb7 kuhn Tue Jan 25 13:57:59 2011 -0800 added logic to compute number of items for each x in histogram for casae where only one table is displayed diff --git src/utils/qa/countPerChrom.csh src/utils/qa/countPerChrom.csh index d9618d9..56569e3 100755 --- src/utils/qa/countPerChrom.csh +++ src/utils/qa/countPerChrom.csh @@ -1,223 +1,228 @@ #!/bin/tcsh source `which qaConfig.csh` ############################################### # # 12-13-05 # Robert Kuhn # # check to see if there are genes on all chroms. # ############################################### if ( "$HOST" != "hgwdev" ) then echo "\n error: you must run this script on dev!\n" exit 1 endif set db="" set oldDb="" set table="" set host2="" set chrom="" set chroms="" set old="" set new="" set machineOut="" set split="" set regular="" set random="" set max="" set histo="false" set histosize=35 set histosize1=35 set histosize2=35 set debug=true set debug=false if ( $#argv < 2 || $#argv > 5 ) then # no command line args echo echo " check to see if there are annotations on all chroms." echo " will check to see if chrom field is named tName or genoName." echo echo " usage: database1 table [database2] [RR] [histogram]" echo echo " checks database1 on dev" echo " database2 will be checked on beta by default" echo " if RR is specified, will use genome-mysql" echo " histogram option prints bar graph, not values" echo exit else set db=$argv[1] set table=$argv[2] endif if ( $#argv == 3 || $#argv == 4 ) then if ( $argv[3] == "histogram" ) then set histo="true" else if ( $argv[3] == "RR" || $argv[3] == "rr" ) then set host2="mysql -h genome-mysql -u genome -A" set oldDb=$db set machineOut="(${argv[3]})" else set host2="hgsql -h $sqlbeta" set machineOut="(hgwbeta)" if ( $argv[3] == "hgwbeta" ) then # allow use of "hgwbeta" to check same db in two places set oldDb=$db else # argv[3] must be a db set oldDb=$argv[3] endif endif endif endif if ( $#argv > 3 ) then if ( $argv[4] == "histogram" ) then set histo="true" else set oldDb=$argv[3] set machineOut="(${argv[4]})" if ( $argv[4] == "hgwbeta" ) then set host2="hgsql -h $sqlbeta" else if ( $argv[4] == "RR" || $argv[4] == "rr" ) then set host2="mysql -h genome-mysql -u genome -A" else echo echo "4th parameter must be RR or hgwbeta" echo $0 exit 1 endif endif endif endif if ( $#argv == 5 ) then if ( $argv[5] == "histogram" ) then set histo="true" endif endif if ( $debug == true) then echo "db = $db" echo "oldDb = $oldDb" echo "machineOut = $machineOut" echo "table = $table" echo "host2 = $host2" endif set chroms=`hgsql -N -e "SELECT chrom FROM chromInfo" $db` set split=`getSplit.csh $db $table` if ( $status ) then echo "\n the database or table may not exist\n" exit endif if ( $split == "unsplit" ) then set split="" else set split=${split}_ echo "\n split tables. e.g., $split$table" endif set chrom=`getChromFieldName.csh $db $split$table` if ( $status ) then echo " error getting chromFieldName." echo " chrom, genoName or tName required." echo exit 1 endif # do randoms last (if no histogram) if ( $histo == "true" ) then set regular=`echo $chroms | sed -e "s/ /\n/g" | grep chr` else set regular=`echo $chroms | sed -e "s/ /\n/g" | grep -v random` set random=`echo $chroms | sed -e "s/ /\n/g" | grep random` endif rm -f Xout$$ rm -f XgraphFile0$$ foreach c ( $regular $random ) if ( $split != "" ) then set table="${c}_$table" endif set new=`nice hgsql -N -e 'SELECT COUNT(*) FROM '$table' \ WHERE '$chrom' = "'$c'"' $db` if ( $machineOut != "" ) then set old=`nice $host2 -Ne 'SELECT COUNT(*) FROM '$table' \ WHERE '$chrom' = "'$c'"' $oldDb` endif # output echo "$c\t$new\t$old" >> Xout$$ set table=$argv[2] end if ( $histo == "true" ) then cat Xout$$ | grep chr | egrep -v "random|hap|Un|$db" | sed "s/chr//" \ | sort -n -k1,1 > XgraphFile0$$ set max1=`cat XgraphFile0$$ | awk '{print $2}' | sort -n | tail -1` if ( $machineOut != "" ) then # get max values for 2nd dataset for scaling purposes set max2=`cat XgraphFile0$$ | awk '{print $3}' | sort -n | tail -1` if ( $max1 > $max2 ) then set histosize2=`echo $max1 $max2 $histosize | awk '{printf("%2d", $2/$1*$3)}'` set max=$max1 else set histosize1=`echo $max1 $max2 $histosize | awk '{printf("%2d", $1/$2*$3)}'` set max=$max2 endif if ($max > $histosize) then set eachX=`echo $max $histosize | awk '{printf("%2d", $1/$2)}'` else set eachX=1 endif if ( $debug == true) then echo max1 max2 eachX $max1 $max2 $eachX echo histosize.histosize1.histosize2 $histosize.$histosize1.$histosize2 endif cat XgraphFile0$$ | awk '{print $1, $2}' > XgraphFile1$$ cat XgraphFile0$$ | awk '{print $1, $3}' > XgraphFile2$$ graph.csh XgraphFile1$$ $histosize1 > Xgraph1$$ graph.csh XgraphFile2$$ $histosize2 > Xgraph2$$ # put a . into files where the value is blank, to keep join from collapsing cat Xgraph1$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph1b$$ cat Xgraph2$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph2b$$ # output header echo echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}' echo # join on first col, retaining everything from first col join -a1 -j1 Xgraph1b$$ Xgraph2b$$ | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}' echo "max = $max | each x = $eachX" echo else + if ($max1 > $histosize) then + set eachX=`echo $max1 $histosize | awk '{printf("%2d", $1/$2)}'` + else + set eachX=1 + endif graph.csh XgraphFile0$$ | awk '{printf("%3s %-36s\n", $1, $2)}' echo "max = $max1 | each x = $eachX" echo endif else # output header echo "chrom \t$db \t$oldDb$machineOut" cat Xout$$ endif rm -f Xgraph1$$ rm -f Xgraph2$$ rm -f Xgraph1b$$ rm -f Xgraph2b$$ rm -f XgraphFile0$$ rm -f XgraphFile1$$ rm -f XgraphFile2$$ rm -f Xout$$