4a1f2b65f658ec15bbdb3a90ae3762ed777ddc49 kuhn Tue Jan 25 15:26:33 2011 -0800 refactored a bit to get output stats into one place regardless of whether one or two tables were processed diff --git src/utils/qa/countPerChrom.csh src/utils/qa/countPerChrom.csh index 56569e3..6d30de6 100755 --- src/utils/qa/countPerChrom.csh +++ src/utils/qa/countPerChrom.csh @@ -17,30 +17,31 @@ endif set db="" set oldDb="" set table="" set host2="" set chrom="" set chroms="" set old="" set new="" set machineOut="" set split="" set regular="" set random="" set max="" +set eachX=1 set histo="false" set histosize=35 set histosize1=35 set histosize2=35 set debug=true set debug=false if ( $#argv < 2 || $#argv > 5 ) then # no command line args echo echo " check to see if there are annotations on all chroms." echo " will check to see if chrom field is named tName or genoName." echo echo " usage: database1 table [database2] [RR] [histogram]" @@ -153,76 +154,73 @@ set new=`nice hgsql -N -e 'SELECT COUNT(*) FROM '$table' \ WHERE '$chrom' = "'$c'"' $db` if ( $machineOut != "" ) then set old=`nice $host2 -Ne 'SELECT COUNT(*) FROM '$table' \ WHERE '$chrom' = "'$c'"' $oldDb` endif # output echo "$c\t$new\t$old" >> Xout$$ set table=$argv[2] end if ( $histo == "true" ) then cat Xout$$ | grep chr | egrep -v "random|hap|Un|$db" | sed "s/chr//" \ | sort -n -k1,1 > XgraphFile0$$ set max1=`cat XgraphFile0$$ | awk '{print $2}' | sort -n | tail -1` + set max=$max1 if ( $machineOut != "" ) then # get max values for 2nd dataset for scaling purposes set max2=`cat XgraphFile0$$ | awk '{print $3}' | sort -n | tail -1` if ( $max1 > $max2 ) then set histosize2=`echo $max1 $max2 $histosize | awk '{printf("%2d", $2/$1*$3)}'` set max=$max1 else set histosize1=`echo $max1 $max2 $histosize | awk '{printf("%2d", $1/$2*$3)}'` set max=$max2 endif - if ($max > $histosize) then - set eachX=`echo $max $histosize | awk '{printf("%2d", $1/$2)}'` - else - set eachX=1 - endif if ( $debug == true) then echo max1 max2 eachX $max1 $max2 $eachX echo histosize.histosize1.histosize2 $histosize.$histosize1.$histosize2 endif cat XgraphFile0$$ | awk '{print $1, $2}' > XgraphFile1$$ cat XgraphFile0$$ | awk '{print $1, $3}' > XgraphFile2$$ graph.csh XgraphFile1$$ $histosize1 > Xgraph1$$ graph.csh XgraphFile2$$ $histosize2 > Xgraph2$$ # put a . into files where the value is blank, to keep join from collapsing cat Xgraph1$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph1b$$ cat Xgraph2$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph2b$$ # output header echo echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}' echo # join on first col, retaining everything from first col join -a1 -j1 Xgraph1b$$ Xgraph2b$$ | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}' - echo "max = $max | each x = $eachX" - echo else - if ($max1 > $histosize) then + graph.csh XgraphFile0$$ | awk '{printf("%3s %-36s\n", $1, $2)}' + endif + + # print some stats + if ($max > $histosize) then set eachX=`echo $max1 $histosize | awk '{printf("%2d", $1/$2)}'` else set eachX=1 endif - graph.csh XgraphFile0$$ | awk '{printf("%3s %-36s\n", $1, $2)}' echo "max = $max1 | each x = $eachX" echo - endif else + # print text, not histogram # output header echo "chrom \t$db \t$oldDb$machineOut" cat Xout$$ endif rm -f Xgraph1$$ rm -f Xgraph2$$ rm -f Xgraph1b$$ rm -f Xgraph2b$$ rm -f XgraphFile0$$ rm -f XgraphFile1$$ rm -f XgraphFile2$$ rm -f Xout$$