d31f40488ebf0649bcbb31790eb18cc2d6e163a8
kuhn
  Fri Jan 21 16:31:53 2011 -0800
reworked so histogram scale is the same on both sides if comparing two datasets.  added statement about max value and value of each x, so scale makes more sense
diff --git src/utils/qa/countPerChrom.csh src/utils/qa/countPerChrom.csh
index 7a590d1..d9618d9 100755
--- src/utils/qa/countPerChrom.csh
+++ src/utils/qa/countPerChrom.csh
@@ -1,186 +1,223 @@
 #!/bin/tcsh
 source `which qaConfig.csh`
 
 ###############################################
 # 
 #  12-13-05
 #  Robert Kuhn
 #
 #  check to see if there are genes on all chroms.
 # 
 ###############################################
 
 
 if ( "$HOST" != "hgwdev" ) then
  echo "\n  error: you must run this script on dev!\n"
  exit 1
 endif
 
 set db=""
 set oldDb=""
 set table=""
 set host2=""
 set chrom=""
 set chroms=""
 set old=""
 set new=""
 set machineOut=""
 set split=""
 set regular=""
 set random=""
+set max=""
 set histo="false"
 set histosize=35
+set histosize1=35
+set histosize2=35
 
 set debug=true 
 set debug=false
 
 if ( $#argv < 2 ||  $#argv > 5 ) then
   # no command line args
   echo
   echo "  check to see if there are annotations on all chroms."
   echo "  will check to see if chrom field is named tName or genoName."
   echo
   echo "    usage:  database1 table [database2] [RR] [histogram]"
   echo
   echo "      checks database1 on dev"
   echo "      database2 will be checked on beta by default"
   echo "        if RR is specified, will use genome-mysql"
   echo "      histogram option prints bar graph, not values"
   echo
   exit
 else
   set db=$argv[1]
   set table=$argv[2]
 endif
 
 if ( $#argv == 3 || $#argv == 4 ) then
   if ( $argv[3] == "histogram" ) then
     set histo="true"
   else
     if ( $argv[3] == "RR" || $argv[3] == "rr" ) then
       set host2="mysql -h genome-mysql -u genome -A"
       set oldDb=$db
       set machineOut="(${argv[3]})"
     else
       set host2="hgsql -h $sqlbeta"
       set machineOut="(hgwbeta)"
       if ( $argv[3] == "hgwbeta" ) then
         # allow use of "hgwbeta" to check same db in two places
         set oldDb=$db
       else
         # argv[3] must be a db
         set oldDb=$argv[3] 
       endif
     endif
   endif
 endif
 
 if ( $#argv > 3 ) then
   if ( $argv[4] == "histogram" ) then
     set histo="true"
   else
     set oldDb=$argv[3]
     set machineOut="(${argv[4]})"
     if ( $argv[4] == "hgwbeta" ) then
       set host2="hgsql -h $sqlbeta"
     else 
       if ( $argv[4] == "RR" || $argv[4] == "rr" ) then
         set host2="mysql -h genome-mysql -u genome -A"
       else
         echo
         echo "4th parameter must be RR or hgwbeta"
         echo
         $0
         exit 1
       endif
     endif
   endif
 endif
 
 if ( $#argv == 5 ) then
   if ( $argv[5] == "histogram" ) then
     set histo="true"
   endif
 endif
 
 if ( $debug == true) then
   echo "db = $db"
   echo "oldDb = $oldDb"
   echo "machineOut = $machineOut"
   echo "table = $table"
   echo "host2 = $host2"
 endif
 
 set chroms=`hgsql -N -e "SELECT chrom FROM chromInfo" $db`
 set split=`getSplit.csh $db $table`
 if ( $status ) then
   echo "\n  the database or table may not exist\n"
   exit
 endif
 
 if ( $split == "unsplit" ) then
   set split=""
 else
   set split=${split}_
   echo "\n  split tables. e.g., $split$table"
 endif
 
 set chrom=`getChromFieldName.csh $db $split$table`
 if ( $status ) then
   echo "  error getting chromFieldName."
   echo "   chrom, genoName or tName required."
   echo
   exit 1
 endif 
 
 # do randoms last (if no histogram)
 if ( $histo == "true" ) then
   set regular=`echo $chroms | sed -e "s/ /\n/g" | grep chr`
 else
   set regular=`echo $chroms | sed -e "s/ /\n/g" | grep -v random`
   set  random=`echo $chroms | sed -e "s/ /\n/g" | grep random`
 endif
 
 rm -f Xout$$
-rm -f XgraphFile$$
+rm -f XgraphFile0$$
 foreach c ( $regular $random )
   if ( $split != "" ) then
     set table="${c}_$table"
   endif
   set new=`nice hgsql -N -e 'SELECT COUNT(*) FROM '$table' \
      WHERE '$chrom' = "'$c'"' $db`
   if ( $machineOut != "" ) then
     set old=`nice $host2 -Ne 'SELECT COUNT(*) FROM '$table' \
       WHERE '$chrom' = "'$c'"' $oldDb`
   endif 
   # output
   echo "$c\t$new\t$old" >> Xout$$
   set table=$argv[2]
 end
 
 if ( $histo == "true" ) then
   cat Xout$$ | grep chr | egrep -v "random|hap|Un|$db" | sed "s/chr//" \
-    | sort -n -k1,1  > XgraphFile$$
+    | sort -n -k1,1  > XgraphFile0$$
+  set max1=`cat XgraphFile0$$ | awk '{print $2}' | sort -n | tail -1`
   if ( $machineOut != "" ) then
-    cat XgraphFile$$ | awk '{print $1, $3}' > XgraphFile2$$ 
-    graph.csh XgraphFile$$  $histosize > Xgraph1$$
-    graph.csh XgraphFile2$$ $histosize > Xgraph2$$
+    # get max values for 2nd dataset for scaling purposes
+    set max2=`cat XgraphFile0$$ | awk '{print $3}' | sort -n | tail -1`
+    if ( $max1 > $max2 ) then
+      set histosize2=`echo $max1 $max2 $histosize | awk '{printf("%2d", $2/$1*$3)}'`
+      set max=$max1
+    else
+      set histosize1=`echo $max1 $max2 $histosize | awk '{printf("%2d", $1/$2*$3)}'`
+      set max=$max2
+    endif
+    if ($max > $histosize) then
+      set eachX=`echo $max $histosize | awk '{printf("%2d", $1/$2)}'`
+    else
+      set eachX=1
+    endif
+
+    if ( $debug == true) then
+      echo max1 max2 eachX $max1 $max2 $eachX
+      echo histosize.histosize1.histosize2 $histosize.$histosize1.$histosize2
+    endif
+
+    cat XgraphFile0$$ | awk '{print $1, $2}' > XgraphFile1$$ 
+    cat XgraphFile0$$ | awk '{print $1, $3}' > XgraphFile2$$ 
+    graph.csh XgraphFile1$$ $histosize1 > Xgraph1$$
+    graph.csh XgraphFile2$$ $histosize2 > Xgraph2$$
+    # put a . into files where the value is blank, to keep join from collapsing
+    cat Xgraph1$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph1b$$
+    cat Xgraph2$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph2b$$
     # output header
     echo
-    echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}'
+    echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}'
+    echo
     # join on first col, retaining everything from first col
-    join -a1 -j1 Xgraph1$$ Xgraph2$$ | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}'
+    join -a1 -j1 Xgraph1b$$ Xgraph2b$$ | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}'
+    echo "max = $max | each x = $eachX"
+    echo
   else
-    graph.csh XgraphFile$$ | awk '{printf("%3s %-36s\n", $1, $2)}'
+    graph.csh XgraphFile0$$ | awk '{printf("%3s %-36s\n", $1, $2)}'
+    echo "max = $max1 | each x = $eachX"
+    echo
   endif
 else
   # output header
   echo "chrom \t$db \t$oldDb$machineOut" 
   cat Xout$$
 endif
 
 rm -f Xgraph1$$
 rm -f Xgraph2$$
-rm -f XgraphFile$$
+rm -f Xgraph1b$$
+rm -f Xgraph2b$$
+rm -f XgraphFile0$$
+rm -f XgraphFile1$$
 rm -f XgraphFile2$$
 rm -f Xout$$
+