src/utils/qa/net.csh 1.20

1.20 2009/07/09 21:19:54 kuhn
added sort check for chain and net tables
Index: src/utils/qa/net.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/net.csh,v
retrieving revision 1.19
retrieving revision 1.20
diff -b -B -U 1000000 -r1.19 -r1.20
--- src/utils/qa/net.csh	21 May 2009 01:28:08 -0000	1.19
+++ src/utils/qa/net.csh	9 Jul 2009 21:19:54 -0000	1.20
@@ -1,515 +1,542 @@
 #!/bin/tcsh
 source `which qaConfig.csh`
 
 
 ###############################################
 # 
 #  03-28-04 & 10-27-2005
 #  Checks net tracks.
 #  Written by Bob Kuhn - augmented by Ann Zweig
 # 
 ###############################################
 
 onintr cleanup
 
 set db=""
+set chromNum=""
 set trackname=""
 set currDir=$cwd
 
 if ($2 == "") then
   # no command line args
   echo
   echo "  runs test suite on net track."
   echo "  expects trackname in netOrg format"
   echo "  e.g. net.csh mm7 netXenTro1 > & mm7.net.xenTro1 & "
   echo
   echo "    usage:  database trackname"
   echo
   exit
 else
   set db=$1
   set trackname=$2
 endif
 
 set Org=`echo $trackname | sed -e "s/net//"`
 set track=$trackname
 
 echo "using database $db "
 echo "track: $track"
 echo "trackname: $trackname"
 echo "Org: $Org"
 
 # ------------------------------------------------
 # check level for html and trackDb entry:
 
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check level for html and trackDb entry:"
 echo
 findLevel.csh $db net$Org
 
 # -------------------------------------------------
 # check updateTimes:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Update Times (hgwdev vs. hgwbeta):"
 echo
 updateTimes.csh $db $trackname
 
 
 # -------------------------------------------------
 # rowcounts:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "rowcounts:"
 hgsql -t -e "SELECT COUNT(*) AS rows FROM $trackname" $db
 
 
 
 # ------------------------------------------------
 # featureBits
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "run featureBits"
 echo
 
 runBits.csh $db $track
 
 # -------------------------------------------------
 # get two records:
 
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "view these two randomly-chosen records in the browser:"
 echo
 
 hgsql -t -e "SELECT * FROM $trackname LIMIT 2" $db
 echo
 
 
 # -------------------------------------------------
 # get chroms from chromInfo:
 
 getChromlist.csh $db > $db.chromlist$$
 
 # -------------------------------------------------
 # check for each chrom having data:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check that each chrom has data:"
 echo "if there is no output, then it passes."
 echo 'if this list is long (as in scaffold assemblies), grep for "Look" \
       to get past the list'
 set var=""
 
 foreach chrom (`cat $db.chromlist$$`)
   set var=` hgsql -N -e 'SELECT COUNT(*) from 'net$Org' \
      WHERE tName = "'$chrom'"' $db`
   if ($var == 0) then
     echo "$chrom is empty"
   else
     # echo "$chrom is ok"   # debug
   endif
 end
 
-
+if ( `egrep . chromlist$$` < 100 ) then
+  set chromNum="small"
+endif
 
 # -------------------------------------------------
 # check ends for off-end coords:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 
 checkOffend.csh $db net$Org
 
+# -------------------------------------------------
+# check sort
+
+echo
+echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
+echo "check sort:"
+
+positionalTblCheck -verbose=0 $db net$Org
+if ( ! $status ) then
+  echo "sort is ok"
+endif
+echo
+
+# -------------------------------------------------
+# check countPerChrom
+
+echo "check countPerChrom"
+if ( $chromNum == "small" ) then
+  countPerChrom.csh $db net$Org
+else
+  echo "too many chroms to do a count per chrom"
+endif
+echo
+
 
 # -------------------------------------------------
 # check that all levels fall between 1-12 inclusive:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Here is a list of the levels in the $track file:"
 echo "Expect 1-12 inclusive:"
 echo
 hgsql -N -e "SELECT DISTINCT(level) FROM  $trackname" $db
 echo
 
 
 # -------------------------------------------------
 # check that all of the types are of (top, gap, inv, syn, or nonSyn):
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Here is a list of the types in the $track file:"
 echo "Expect these types: top, gap, inv, syn, nonSyn:"
 echo
 hgsql -N -e "SELECT DISTINCT(type) FROM $trackname" $db
 echo
 
 
 # -------------------------------------------------
 # check to ensure that if level=1 then type=top (and vice versa):
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to make sure that if level=1 then type=top and vice versa:"
 echo "if there is no output, then it passes."
 echo
 
 #  set var=`hgsql -N -e 'SELECT COUNT(*) FROM '$chrom'_chain'$Org'Link \
 #     WHERE tName != "'$chrom'"' $db`
 
 set var1=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE type = "top" AND level != 1' $db`
 set var2=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE level = 1 AND type != "top"' $db`
 
 if ($var1 != 0) then
   echo "there is at least one instance where type = top and level != 1 (check this by hand in the database table)."
 endif
 
 if ($var2 != 0) then
   echo "there is at least one instance where level = 1 and type != top (check this by hand in the database table)."
 endif
 
 
 
 # -------------------------------------------------
 # check to ensure that if type=gap then level is an even number (and vice versa):
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to make sure that if type=gap, then level is an even number:"
 echo "expect to see 2, 4, 6, 8, 10, 12 in the following list:"
 echo 
 
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "gap" ORDER BY level' $db
 
 
 
 # -------------------------------------------------
 # check to ensure that types are all on the correct levels:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to make sure that types are on the correct levels:"
 echo "expect to see types of inv, syn and nonSyn on levels 3, 5, 7, 9, 11, 13"
 echo "(these won't necessarily go all the way to 13):"
 echo
 echo "type = inv:"
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "inv" ORDER BY level' $db
 
 echo
 echo "type = syn:"
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "syn" ORDER BY level' $db
 
 echo
 echo "type = nonSyn:"
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "nonSyn" ORDER BY level' $db
 echo
 
 
 # -------------------------------------------------
 # generate counts by type:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "here's a list of counts by type:"
 echo
 echo "type	count"
 echo "____	_____"
 hgsql -N -e 'SELECT DISTINCT(type) AS types, COUNT(*) AS number \
    FROM '$trackname' GROUP BY types ORDER BY number DESC' $db
 
 
 # -------------------------------------------------
 # generate counts by level:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "here's a list of counts by level:"
 echo
 echo "level	count"
 echo "_____	_____"
 hgsql -N -e 'SELECT DISTINCT(level) AS levels, COUNT(*) AS number \
     FROM '$trackname' GROUP BY levels ORDER BY level' $db
 
 
 
 # -------------------------------------------------
 # check that strand has a valid value and is displayed correctly:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "use these three rows to check (manually) that qStrand is displayed properly in the browser:"
 echo
 
 hgsql -t -e "SELECT tName, tStart, tEnd, level, type, qName, strand FROM $trackname WHERE tStart > 10000000 LIMIT 3" $db
 echo
 
 echo "not gap"
 hgsql -t -e "SELECT tName, tStart, tEnd, level, type, qName, strand FROM $trackname WHERE tStart > 10000000 AND type != 'gap' LIMIT 3" $db
 echo
 
 
 
 # -------------------------------------------------
 # check that strand has a valid value
 
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check that strand has valid values"
 echo "expect '+ -'"
 echo
 
 hgsql -N -e "SELECT DISTINCT(strand) FROM ${trackname}" $db
 
 set numBlank = `hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE strand != "+" AND strand != "-"' $db`
 
 if ($numBlank > 0) then
   echo "In addition to the above strand values, there are $numBlank blank strand values.  These should be checked by hand."
 endif
 echo
 
 
 # -------------------------------------------------
 # check that chainId, ali and score are 0 for gaps
 # and >0 for all other types
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check that chainId, ali and score have valid values"
 echo
 
 echo "chainId value(s) for type = gap (expect 0):"
 hgsql -N -e 'SELECT DISTINCT(chainId) FROM '$trackname' WHERE type = "gap"' $db
 echo
 
 echo "ali value(s) for type = gap (expect 0):"
 hgsql -N -e 'SELECT DISTINCT(ali) FROM '$trackname' WHERE type = "gap"' $db
 echo
 
 echo "score value(s) for type = gap (expect 0):"
 hgsql -N -e 'SELECT DISTINCT(score) FROM '$trackname' WHERE type = "gap"' $db
 echo
 
 echo "count of chainId values that are '0' for all other types (expect 0):"
 hgsql -N -e 'SELECT COUNT(chainId) FROM '$trackname' WHERE type != "gap" AND chainId = 0' $db
 echo
 
 echo "count of ali values that are '0' for all other types (expect 0):"
 hgsql -N -e 'SELECT COUNT(ali) FROM '$trackname' WHERE type != "gap" AND ali = 0' $db
 echo
 
 echo "count of score values that are '0' for all other types (expect 0):"
 hgsql -N -e 'SELECT COUNT(score) FROM '$trackname' WHERE type != "gap" AND score = 0' $db
 echo
 
 
 
 # -------------------------------------------------
 # check min and max values for ali and score (for nonGap types)
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking min and max values for ali and score (for type != gap):"
 echo
 
 set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach control('ali' 'score')
 
 foreach type ($types)
 
   echo type=$type
   echo column=$control
 
   hgsql -e 'SELECT MIN('$control') as minimum, MAX('$control') as maximum FROM '$trackname' WHERE type LIKE "'$type'"' $db
 
 echo
 end #foreach
 end #foreach
 
 
 
 # -------------------------------------------------
 # check max values for score by type (for nonGap types)
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking max values for score by type (for type != gap):"
 echo
 
   hgsql -e 'SELECT MAX(score) AS scores, type FROM '$trackname' GROUP BY type ORDER BY type DESC' $db
 
 
 
 # -------------------------------------------------
 # check that chrN_chainOrg.id  is uniq:
 # superceded by joinerCheck
 # check that chainIds are all found in chainOrg.id:
 # superceded by joinerCheck
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Note: joinerCheck is run in chain2.csh - it covers the net track too."
 echo
 
 
 
 # -------------------------------------------------
 # qOver, qFar, and qDup should be -1 for type=gap
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that qOver, qFar, and qDup = -1 for type=gap:"
 echo
 
 set count=''
 foreach variable ('qOver' 'qFar' 'qDup')
   set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE type LIKE "'gap'" AND '$variable' != -1' $db`
 
   if ($count != 0) then
     echo "$variable has $count rows where value != -1 and type = gap"
     echo "you should investigate this by hand."
   else
     echo "$variable passes (has $count rows where value != 1 and type = gap)"
   endif
 
 set count=0
 end #foreach
 
 
 
 # -------------------------------------------------
 # qDup should be 0 or greater for nonGaps
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that qDup >= 0 for type != gap:"
 echo
 
 #set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach type ($types)
 
   echo type=$type
 
   set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qDup < 0 AND type LIKE "'$type'"' $db`
 
   if ($count != 0) then
     ech  "There are $count rows where type = $type and qDup < 0"
     echo "You should investigate these by hand."
     echo
   else
     echo "passed."
     echo
   endif
 end #foreach
 echo
 
 
 # -------------------------------------------------
 # for nonGaps, if qOver is -1, then qFar must be -1 and vice versa
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that if qOver = -1 then qFar = -1 (and vice versa):"
 echo
 
 set pass1='F'
 set pass2='F'
 
 set var1=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver = -1 AND qFar != -1' $db`
 set var2=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver != -1 AND qFar = -1' $db`
 
 if ($var1 != 0) then
   echo "there is at least one instance where qOver = -1 and qFar != 1 (check this by hand in the database table)."
   set pass1='F'
 else
   set pass1='T'
 endif
 
 if ($var2 != 0) then
   echo "there is at least one instance where qOver != 1 and qFar = 1 (check this by hand in the database table)."
   set pass2='F'
 else
   set pass2='T'
 endif
 
 if ($pass1 == 'T' && $pass2 == 'T') then
     echo passed both tests.
   endif
 endif
 echo
 
 
 # -------------------------------------------------
 # for nonGaps, if qOver > 0, then qFar = 0
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that for type != gap, if qOver > 0, then qFar = 0:"
 echo
 
 
 #set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach type ($types)
 
   echo type=$type
 
   set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver > 0 AND qFar != 0' $db`
 
   if ($count != 0) then
     echo "There are $count rows where qOver > 0 and qFar != 0"
     echo "You should investigate these by hand."
     echo
   else
     echo "passed."
     echo
   endif
 end #foreach
 echo
 
 
 
 # -------------------------------------------------
 # note MIN and MAX values for chainId (for nonGaps)
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking MIN and MAX values for chainId (for type != gap):"
 echo
 
 
 #set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach type ($types)
 
   echo type=$type
 
   hgsql -e 'SELECT MIN(chainId) as minimum, MAX(chainId) as maximum FROM '$trackname' WHERE type LIKE "'$type'"' $db
 
 echo
 end #foreach
 echo 
 
 # -------------------------------------------------
 # add track to list of files to push and find size of entire push:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 
 echo $trackname >> $db.$Org.pushlist
 sort -u $db.$Org.pushlist > pushlist2
 
 getTableSize.csh $db pushlist2 hgwdev
 rm -f pushlist2
 
 echo "the end."
 cleanup:
 rm -f $db.chromlist$$