src/utils/qa/countPerChrom.csh d31f40488ebf0649bcbb31790eb18cc2d6e163a8

d31f40488ebf0649bcbb31790eb18cc2d6e163a8
kuhn
  Fri Jan 21 16:31:53 2011 -0800
reworked so histogram scale is the same on both sides if comparing two datasets.  added statement about max value and value of each x, so scale makes more sense
diff --git src/utils/qa/countPerChrom.csh src/utils/qa/countPerChrom.csh
index 7a590d1..d9618d9 100755
--- src/utils/qa/countPerChrom.csh
+++ src/utils/qa/countPerChrom.csh
@@ -16,32 +16,35 @@
  exit 1
 endif
 
 set db=""
 set oldDb=""
 set table=""
 set host2=""
 set chrom=""
 set chroms=""
 set old=""
 set new=""
 set machineOut=""
 set split=""
 set regular=""
 set random=""
+set max=""
 set histo="false"
 set histosize=35
+set histosize1=35
+set histosize2=35
 
 set debug=true 
 set debug=false
 
 if ( $#argv < 2 ||  $#argv > 5 ) then
   # no command line args
   echo
   echo "  check to see if there are annotations on all chroms."
   echo "  will check to see if chrom field is named tName or genoName."
   echo
   echo "    usage:  database1 table [database2] [RR] [histogram]"
   echo
   echo "      checks database1 on dev"
   echo "      database2 will be checked on beta by default"
   echo "        if RR is specified, will use genome-mysql"
@@ -130,57 +133,91 @@
   echo "  error getting chromFieldName."
   echo "   chrom, genoName or tName required."
   echo
   exit 1
 endif 
 
 # do randoms last (if no histogram)
 if ( $histo == "true" ) then
   set regular=`echo $chroms | sed -e "s/ /\n/g" | grep chr`
 else
   set regular=`echo $chroms | sed -e "s/ /\n/g" | grep -v random`
   set  random=`echo $chroms | sed -e "s/ /\n/g" | grep random`
 endif
 
 rm -f Xout$$
-rm -f XgraphFile$$
+rm -f XgraphFile0$$
 foreach c ( $regular $random )
   if ( $split != "" ) then
     set table="${c}_$table"
   endif
   set new=`nice hgsql -N -e 'SELECT COUNT(*) FROM '$table' \
      WHERE '$chrom' = "'$c'"' $db`
   if ( $machineOut != "" ) then
     set old=`nice $host2 -Ne 'SELECT COUNT(*) FROM '$table' \
       WHERE '$chrom' = "'$c'"' $oldDb`
   endif 
   # output
   echo "$c\t$new\t$old" >> Xout$$
   set table=$argv[2]
 end
 
 if ( $histo == "true" ) then
   cat Xout$$ | grep chr | egrep -v "random|hap|Un|$db" | sed "s/chr//" \
-    | sort -n -k1,1  > XgraphFile$$
+    | sort -n -k1,1  > XgraphFile0$$
+  set max1=`cat XgraphFile0$$ | awk '{print $2}' | sort -n | tail -1`
   if ( $machineOut != "" ) then
-    cat XgraphFile$$ | awk '{print $1, $3}' > XgraphFile2$$ 
-    graph.csh XgraphFile$$  $histosize > Xgraph1$$
-    graph.csh XgraphFile2$$ $histosize > Xgraph2$$
+    # get max values for 2nd dataset for scaling purposes
+    set max2=`cat XgraphFile0$$ | awk '{print $3}' | sort -n | tail -1`
+    if ( $max1 > $max2 ) then
+      set histosize2=`echo $max1 $max2 $histosize | awk '{printf("%2d", $2/$1*$3)}'`
+      set max=$max1
+    else
+      set histosize1=`echo $max1 $max2 $histosize | awk '{printf("%2d", $1/$2*$3)}'`
+      set max=$max2
+    endif
+    if ($max > $histosize) then
+      set eachX=`echo $max $histosize | awk '{printf("%2d", $1/$2)}'`
+    else
+      set eachX=1
+    endif
+
+    if ( $debug == true) then
+      echo max1 max2 eachX $max1 $max2 $eachX
+      echo histosize.histosize1.histosize2 $histosize.$histosize1.$histosize2
+    endif
+
+    cat XgraphFile0$$ | awk '{print $1, $2}' > XgraphFile1$$ 
+    cat XgraphFile0$$ | awk '{print $1, $3}' > XgraphFile2$$ 
+    graph.csh XgraphFile1$$ $histosize1 > Xgraph1$$
+    graph.csh XgraphFile2$$ $histosize2 > Xgraph2$$
+    # put a . into files where the value is blank, to keep join from collapsing
+    cat Xgraph1$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph1b$$
+    cat Xgraph2$$ | egrep "." | awk '{ if ($2 == "") { $2 = "."; } print $1, $2; }' > Xgraph2b$$
     # output header
     echo
-    echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}'
+    echo "chr \t$db \t$oldDb$machineOut" | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}'
+    echo
     # join on first col, retaining everything from first col
-    join -a1 -j1 Xgraph1$$ Xgraph2$$ | awk '{printf("%3s %'$histosize's %-'$histosize's\n", $1, $2, $3)}'
+    join -a1 -j1 Xgraph1b$$ Xgraph2b$$ | awk '{printf("%3s %'$histosize1's %-'$histosize2's\n", $1, $2, $3)}'
+    echo "max = $max | each x = $eachX"
+    echo
   else
-    graph.csh XgraphFile$$ | awk '{printf("%3s %-36s\n", $1, $2)}'
+    graph.csh XgraphFile0$$ | awk '{printf("%3s %-36s\n", $1, $2)}'
+    echo "max = $max1 | each x = $eachX"
+    echo
   endif
 else
   # output header
   echo "chrom \t$db \t$oldDb$machineOut" 
   cat Xout$$
 endif
 
 rm -f Xgraph1$$
 rm -f Xgraph2$$
-rm -f XgraphFile$$
+rm -f Xgraph1b$$
+rm -f Xgraph2b$$
+rm -f XgraphFile0$$
+rm -f XgraphFile1$$
 rm -f XgraphFile2$$
 rm -f Xout$$
+