src/utils/qa/chain.csh 1.27

1.27 2010/03/02 22:36:29 ann
changed call to the new findLevel script (no .csh extension)
Index: src/utils/qa/chain.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/chain.csh,v
retrieving revision 1.26
retrieving revision 1.27
diff -b -B -U 1000000 -r1.26 -r1.27
--- src/utils/qa/chain.csh	25 Feb 2010 20:10:54 -0000	1.26
+++ src/utils/qa/chain.csh	2 Mar 2010 22:36:29 -0000	1.27
@@ -1,394 +1,394 @@
 #!/bin/tcsh
 source `which qaConfig.csh`
 
 
 ###############################################
 # 
 #  03-28-04 & 10-26-05
 #  Checks chain tracks.
 #  Written by Bob Kuhn - augmented by Ann Zweig
 #  Slow processes are in chain2.csh
 # 
 ###############################################
 
 onintr cleanup
 
 set db=""
 set split=""
 set chrom=""
 set trackname=""
 
 if ( $#argv == 0 || $#argv > 2) then
   # no command line args
   echo
   echo "  runs test suite on chain track (on both regular and Link tables)"
   echo "  expects trackname in chrN_chainOrg format"
   echo "  though it now works for chainOrg format assemblies"
   echo "  slow processes are in chain2.csh"
   echo
   echo "    usage:  database trackname"
   echo "    e.g. chain.csh mm7     chrN_chainXenTro1 > & mm7.chain.xenTro1 &"
   echo "      or chain.csh anoCar1 chainXenTro1      > & anoCar1.chain.xenTro1 &"
   echo
   exit
 else
   set db=$argv[1]
   set trackname=$argv[2]
 endif
 
 set track=`echo $trackname | sed -e "s/chrN_//"`
 set Org=`echo $track | sed -e "s/chain//"`
 set otherDb=`echo $Org | perl -wpe '$_ = lcfirst($_)'`
 set split=`getSplit.csh $db chain$Org hgwdev`
 
 echo "using database $db "
 echo "trackname: $trackname"
 echo "track: $track"
 echo "Org: $Org"
 echo
 
 # ------------------------------------------------
 # check for priority values for all chains on this assembly:
 
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "all chains/nets for this assembly:"
 echo
 
 # make a list of chain and nets to match actual tables
 set chainlist=`hgsql -N -e 'SHOW TABLES LIKE "net%"' $db` 
 hgsql -N -e 'SHOW TABLES LIKE "net%"' $db \
   | sed -e "s/net/chain/g" > chainlist
 echo $chainlist | sed -e "s/ /\n/g" >> chainlist
 echo "priority" >> chainlist
 echo "----" >> chainlist
 
 hgsql -t -e "SELECT tableName, priority FROM trackDb \
   WHERE tableName LIKE 'chain%' OR tableName LIKE 'net%' \
   ORDER BY priority" $db \
   | grep -f chainlist
 
 # ------------------------------------------------
 # check level for html and trackDb entry:
 
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check level for html and trackDb entry:"
 echo
-findLevel.csh $db chain$Org
+findLevel $db chain$Org
 
 # -------------------------------------------------
 # get chroms from chromInfo:
 
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo
 
 getChromlist.csh $db > $db.chromlist$$
 rm -f $db.$Org.pushlist
 rm -f $db.$Org.pushlistLink
 if ( $split == "unsplit" ) then
    echo "unsplit chain track.  echo of long chromlist suppressed"
    echo $track >> $db.$Org.pushlist
    echo ${track}Link >> $db.$Org.pushlist
   echo
 else
   # make push list for split tables
   foreach chrom (`cat $db.chromlist$$`)
     echo $chrom
     echo ${chrom}_$track >> $db.$Org.pushlist
     echo ${chrom}_${track}Link >> $db.$Org.pushlist
   end
 endif
 
 # ------------------------------------------------
 # check updateTimes for each table:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo
 echo "check updateTimes for each table:"
 echo "first: hgwdev"
 echo "second: hgwbeta"
 
 if ( $split == "unsplit" ) then
   updateTimes.csh $db chain$Org | grep -v ERROR
 else
   updateTimes.csh $db $db.$Org.pushlist | grep -v ERROR
 endif
 
 # ------------------------------------------------
 # make sure that the tName column matches the table name:
 #
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to see if tName matches table name:"
 
 if ( $split == "unsplit" ) then
   echo "can't actually do this comparison if table is not split."
   echo
 else
   echo "if there is no output here, then it passes."
   foreach chrom (`cat $db.chromlist$$`)
     set numTNames=`hgsql -N -e "SELECT COUNT(DISTINCT(tName)) \
      FROM ${chrom}_chain$Org" $db`
     if ($numTNames != 1) then
       if ($numTNames == 0) then
         echo "${chrom}_chain$Org is empty."
       else
         echo "There are $numTNames tNames in ${chrom}_chain$Org"
         echo "Should be only one"
         echo "(you should check this table by hand)."
       endif
     else
       set tName=`hgsql -N -e "SELECT tName FROM ${chrom}_chain$Org\
         LIMIT 1" $db`
       if ( $tName != $chrom ) then
         echo "tName does not match in $chrom_chain${Org}!"
         echo
       endif
     endif
     set numTNames=`hgsql -N -e "SELECT COUNT(DISTINCT(tName)) \
       FROM ${chrom}_chain${Org}Link" $db`
     if ($numTNames != 1) then
       if ($numTNames == 0) then
         echo "${chrom}_chain${Org}Link is empty."
       else
         echo "There are $numTNames tNames in ${chrom}_chain${Org}Link"
         echo "Should be only one"
         echo "(you should check this table by hand)."
       endif
     else
       set tName=`hgsql -N -e "SELECT tName FROM ${chrom}_chain${Org} \
         LIMIT 1" $db`
       if ( $tName != $chrom ) then
         echo "tName does not match in $chrom_chain${Org}Link!"
         echo
       endif
     endif
   end
 endif
 
 # -------------------------------------------------
 # check the min and max score values
 #  (later: get the size of the largest chrom and set the column width to that)
 
 # find size of longest chrom name for format purposes
 if ( $split != "unsplit" ) then
   set length=0
   foreach chrom (`cat $db.chromlist$$`)
     set len=`echo $chrom | awk '{print length($1)}'`
     if ( $len > $length ) then
       set length=$len
     endif
   end
   set length=`echo $length | awk '{print $1+1}'`
   set longlength=`echo $length | awk '{print $1+12}'`
 endif
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking min and max score values"
 echo
 
 if ( $split == "unsplit" ) then
   set min = `hgsql -N -e "SELECT MIN(score) FROM chain${Org}" $db`
   set max = `hgsql -N -e "SELECT MAX(score) FROM chain${Org}" $db`  
   echo "chrom		min	max"
   echo "-----		---	---"
   echo "$chrom		$min	$max"
 else
   echo "look through this list for outliers."
   echo "chrom" "min" "max" \
     | gawk '{ printf("%-'${length}'s %8s %12s \n", $1, $2, $3) }'
   echo "-----" "---" "---" \
     | gawk '{ printf("%-'${length}'s %8s %12s \n", $1, $2, $3) }'
   foreach chrom (`cat $db.chromlist$$`)
     set min = `hgsql -N -e "SELECT MIN(score) FROM ${chrom}_chain${Org}" $db`
     set max = `hgsql -N -e "SELECT MAX(score) FROM ${chrom}_chain${Org}" $db`  
     echo $chrom	$min $max \
       | gawk '{ printf("%-'${length}'s %8s %12s \n", $1, $2, $3) }'
   end #foreach
 endif
 echo
 
 # -------------------------------------------------
 # check for rowcounts in each table:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "rowcounts"
 echo
 
 if ( $split == "unsplit" ) then
   echo $trackname
   hgsql -t -e "SELECT COUNT(*) AS rows FROM chain${Org}" $db
   echo ${trackname}Link
   hgsql -t -e "SELECT COUNT(*) AS rows FROM chain${Org}Link" $db
   echo "too many chroms to do a count per chrom"
 else
   echo "check for rowcounts in each table:"
   echo "rowcounts are listed - pay attention to counts of 0"
   echo
   echo "for chrN_chain${Org}:"
   foreach chrom (`cat $db.chromlist$$`)
     set var1=`hgsql -N -e "SELECT COUNT(*) FROM ${chrom}_chain${Org}" $db`
     echo ${chrom}_chain${Org} $var1 \
       | gawk '{ printf("%-'${longlength}'s %6s \n", $1, $2) }'
   end
   echo
   echo "for chrN_chain${Org}Link:"
   foreach chrom (`cat $db.chromlist$$`)
     set var1=`hgsql -N -e "SELECT COUNT(*) FROM ${chrom}_chain${Org}Link" $db`
     set longer=`echo $longlength | awk '{print $1+5}'`
     echo ${chrom}_chain${Org}Link $var1 \
       | gawk '{ printf("%-'${longer}'s %8s \n", $1, $2) }'
   end
   echo
 endif
 echo
 
 # -------------------------------------------------
 # check that qStrand has a valid value
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "count + and - strand alignments"
 echo "watch for zeroes"
 
 echo
 
 if ( $split == "unsplit" ) then
   set badStrands=`hgsql -N -e 'SELECT COUNT(*) FROM chain'$Org' \
     WHERE qStrand != "-" AND qStrand != "+"' $db`
   if ( $badStrands > 0 ) then
     echo 'some qStrands are neither "+" nor "-"'
   else
     echo 'all qStrands are either "+" or "-"'
     echo
   endif
   # header
   echo "posStrand negStrand" \
     | gawk '{ printf("%8s %8s \n", $1, $2) }'
   echo "--------- ---------" \
     | gawk '{ printf("%8s %8s \n", $1, $2) }'
   set posStrands = `hgsql -N -e "SELECT COUNT(*) \
     FROM chain${Org} WHERE qStrand LIKE '+'" $db`
   set negStrands = `hgsql -N -e "SELECT COUNT(*) \
     FROM chain${Org} WHERE qStrand LIKE '-'" $db`
   echo $posStrands $negStrands \
     | gawk '{ printf("%8s %8s \n", $1, $2) }'
 else
   echo "chrom posStrand negStrand" \
       | gawk '{ printf("%-'${length}'s %8s %8s \n", $1, $2, $3) }'
   echo "------  ---------  ---------" \
       | gawk '{ printf("%-'${length}'s %8s %8s \n", $1, $2, $3) }'
   rm -f badStrands
   foreach chrom (`cat $db.chromlist$$`)
     set badStrands=`hgsql -N -e 'SELECT COUNT(*) FROM '$chrom'_chain'$Org' \
       WHERE qStrand != "-" AND qStrand != "+"' $db`
     # echo $badStrands
     if ( $badStrands > 0 ) then
       echo $chrom >> badStrands
     endif
     set posStrands = `hgsql -N -e "SELECT COUNT(*) \
       FROM ${chrom}_chain${Org} WHERE qStrand LIKE '+'" $db`
     set negStrands = `hgsql -N -e "SELECT COUNT(*) \
       FROM ${chrom}_chain${Org} WHERE qStrand LIKE '-'" $db`
     echo $chrom $posStrands $negStrands \
       | gawk '{ printf("%-'${length}'s %8s %8s \n", $1, $2, $3) }'
   end #foreach
   echo
   if ( -e badStrands ) then
     echo 'these chroms have some qStrands that are neither "+" nor "-"'
     cat badStrands
   else
     echo 'all qStrands are "+" or "-"'
   endif
   rm -f badStrands
 endif
 
 echo
 
 # -------------------------------------------------
 # check that qStrand is displayed properly:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "use these three rows to check (manually) that qStrand is \
    displayed properly in the $db browser:"
 echo
 
 if ( $split == "unsplit" ) then
   hgsql -t -e "SELECT tName, tStart, tEnd, qName, qStrand \
       FROM $track WHERE tStart > 10000000 LIMIT 3" $db
   echo
 else
   # pick a random chrom > 10 million and pull out three records
   set rand=''
   set rand=`hgsql -N -e "SELECT chrom FROM chromInfo \
      WHERE size > 10000000 ORDER BY RAND() \
      LIMIT 1" $db`
   hgsql -t -e "SELECT tName, tStart, tEnd, qName, qStrand \
       FROM ${rand}_$track WHERE tStart > 10000000 LIMIT 3" $db
   echo
 endif
 
 # -------------------------------------------------
 # check that tables are sorted by tStart:
 
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo  "check that tables are sorted by tStart:"
 echo
 
 if ( $split == "unsplit" ) then
   echo "can't check chrom ordering on unsplit chroms right now"
 else
   echo  "tStart:"
   foreach chrom (`cat $db.chromlist$$`)
     # echo $chrom
     hgsql -N -e "SELECT tStart FROM ${chrom}_${track}" $db \
       > $db.$track.tStart
     sort -n $db.$track.tStart > $db.$track.tStart.sort
     set sortCheck=`comm -23 $db.$track.tStart $db.$track.tStart.sort | wc -l`
     # echo $sortCheck
     if ($sortCheck != 0) then
       echo "${chrom}_${track} is not sorted by tStart"
     endif
   end
   rm $db.$track.tStart $db.$track.tStart.sort
   echo "only prints if there is a problem"
   echo
   echo
 endif
 
 # -------------------------------------------------
 # find the correct paramaters for the trackDb variables:
 
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo  "Find the correct parameters for the 3 trackDb variables"
 echo  "which appears in the chain-OtherOrg download file."
 echo  "Compare this to the chain description page."
 echo
 
 getMatrixLines.csh $db $otherDb
 getChainLines.csh $db $otherDb
 
 
 # -------------------------------------------------
 # to push to beta:
 
 echo
 echo "to push to beta:"
 echo
 echo  "-------------------------------------------------"
 echo  "     bigPush.csh $db $db.$Org.pushlistLink       "
 echo  "     bigPush.csh $db $db.$Org.pushlist           "
 echo  "-------------------------------------------------"
 echo
 
 echo "the end."
 cleanup:
 rm -f $db.chromlist$$