d31f40488ebf0649bcbb31790eb18cc2d6e163a8 kuhn Fri Jan 21 16:31:53 2011 -0800 reworked so histogram scale is the same on both sides if comparing two datasets. added statement about max value and value of each x, so scale makes more sense diff --git src/utils/qa/countPerChrom.csh src/utils/qa/countPerChrom.csh index 7a590d1..d9618d9 100755 --- src/utils/qa/countPerChrom.csh +++ src/utils/qa/countPerChrom.csh @@ -16,32 +16,35 @@ exit 1 endif set db="" set oldDb="" set table="" set host2="" set chrom="" set chroms="" set old="" set new="" set machineOut="" set split="" set regular="" set random="" +set max="" set histo="false" set histosize=35 +set histosize1=35 +set histosize2=35 set debug=true set debug=false if ( $#argv < 2 || $#argv > 5 ) then # no command line args echo echo " check to see if there are annotations on all chroms." echo " will check to see if chrom field is named tName or genoName." echo echo " usage: database1 table [database2] [RR] [histogram]" echo echo " checks database1 on dev" echo " database2 will be checked on beta by default" echo " if RR is specified, will use genome-mysql" @@ -130,57 +133,91 @@ echo " error getting chromFieldName." echo " chrom, genoName or tName required." echo exit 1 endif # do randoms last (if no histogram) if ( $histo == "true" ) then set regular=`echo $chroms | sed -e "s/ /\n/g" | grep chr` else set regular=`echo $chroms | sed -e "s/ /\n/g" | grep -v random` set random=`echo $chroms | sed -e "s/ /\n/g" | grep random` endif rm -f Xout$$ -rm -f XgraphFile$$ +rm -f XgraphFile0$$ foreach c ( $regular $random ) if ( $split != "" ) then set table="${c}_$table" endif set new=`nice hgsql -N -e 'SELECT COUNT(*) FROM '$table' \ WHERE '$chrom' = "'$c'"' $db` if ( $machineOut != "" ) then set old=`nice $host2 -Ne 'SELECT COUNT(*) FROM '$table' \ WHERE '$chrom' = "'$c'"' $oldDb` endif # output echo "$c\t$new\t$old" >> Xout$$ set table=$argv[2] end if ( $histo == "true" ) then cat Xout$$ | grep chr | egrep -v "random|hap|Un|$db" | sed "s/chr//" \ - | sort -n -k1,1 > XgraphFile$$ + | sort -n -k1,1 > XgraphFile0$$ + set max1=`cat XgraphFile0$$ | awk '{print $2}' | sort -n | tail -1` if ( $machineOut != "" ) then - cat XgraphFile$$ | awk '{print $1, $3}' > XgraphFile2$$ - graph.csh XgraphFile$$ $histosize > Xgraph1$$ - graph.csh XgraphFile2$$ $histosize > Xgraph2$$ + # get max values for 2nd dataset for scaling purposes + set max2=`cat XgraphFile0$$ | awk '{print $3}' | sort -n | tail -1` + if ( $max1 > $max2 ) then + set histosize2=`echo $max1 $max2 $histosize | awk '{printf("%2d", $2/$1*$3)}'` + set max=$max1 + else + set histosize1=`echo $max1 $max2 $histosize | awk '{printf("%2d", $1/$2*$3)}'` + set max=$max2 + endif + if ($max > $histosize) then + set eachX=`echo $max $histosize | awk '{printf("%2d", $1/$2)}'` + else + set eachX=1 + endif + + if ( $debug == true) then + echo max1 max2 eachX $max1 $max2 $eachX + echo histosize.histosize1.histosize2 $histosize.$histosize1.$histosize2 + endif + + cat XgraphFile0$$ | awk '{print $1, $2}' > XgraphFile1$$ + cat XgraphFile0$$ | awk '{print $1, $3}' > XgraphFile2$$ + graph.csh XgraphFile1$$ $histosize1 > Xgraph1$$ + graph.csh XgraphFile2$$ $histosize2 > Xgraph2$$ + # put a . into files where the value is blank, to keep join from collapsing + cat Xgraph1$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph1b$$ + cat Xgraph2$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph2b$$ # output header echo - echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}' + echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}' + echo # join on first col, retaining everything from first col - join -a1 -j1 Xgraph1$$ Xgraph2$$ | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}' + join -a1 -j1 Xgraph1b$$ Xgraph2b$$ | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}' + echo "max = $max | each x = $eachX" + echo else - graph.csh XgraphFile$$ | awk '{printf("%3s %-36s\n", $1, $2)}' + graph.csh XgraphFile0$$ | awk '{printf("%3s %-36s\n", $1, $2)}' + echo "max = $max1 | each x = $eachX" + echo endif else # output header echo "chrom \t$db \t$oldDb$machineOut" cat Xout$$ endif rm -f Xgraph1$$ rm -f Xgraph2$$ -rm -f XgraphFile$$ +rm -f Xgraph1b$$ +rm -f Xgraph2b$$ +rm -f XgraphFile0$$ +rm -f XgraphFile1$$ rm -f XgraphFile2$$ rm -f Xout$$ +