src/utils/qa/net.csh 1.24

1.24 2010/03/09 22:52:02 ann
only for vertebrate assemblies.
Index: src/utils/qa/net.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/net.csh,v
retrieving revision 1.23
retrieving revision 1.24
diff -b -B -U 1000000 -r1.23 -r1.24
--- src/utils/qa/net.csh	9 Mar 2010 21:53:54 -0000	1.23
+++ src/utils/qa/net.csh	9 Mar 2010 22:52:02 -0000	1.24
@@ -1,547 +1,547 @@
 #!/bin/tcsh
 source `which qaConfig.csh`
 
 
 ###############################################
 # 
 #  03-28-04 & 10-27-2005
 #  Checks net tracks.
 #  Written by Bob Kuhn - augmented by Ann Zweig
 # 
 ###############################################
 
 onintr cleanup
 
 set db=""
 set chromNum=""
 set trackname=""
 set currDir=$cwd
 set maxNumChroms=100
 
 if ($2 == "") then
   # no command line args
   echo
   echo "  runs test suite on net track."
   echo "  expects trackname in netOrg format"
   echo "  e.g. net.csh mm7 netXenTro1 > & mm7.net.xenTro1 & "
   echo
   echo "    usage:  database trackname"
   echo
   exit
 else
   set db=$1
   set trackname=$2
 endif
 
 set Org=`echo $trackname | sed -e "s/net//"`
 set track=$trackname
 
 echo "using database $db "
 echo "track: $track"
 echo "trackname: $trackname"
 echo "Org: $Org"
 
 # ------------------------------------------------
 # check level for html and trackDb entry:
 
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check level for html and trackDb entry:"
-echo "(note that all net (and chain) tracks should now be using the one"
+echo "NOTE: all vertebrate net (and chain) tracks should now be using the one"
 echo "chainNet.html at the top level. So, you can disregard the fact that"
 echo "this test shows no net$Org.html file. However the trackDb location"
 echo "is still relevant."
 echo
 findLevel $db net$Org
 
 # -------------------------------------------------
 # check updateTimes:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Update Times (hgwdev vs. hgwbeta):"
 echo
 updateTimes.csh $db $trackname
 
 
 # -------------------------------------------------
 # rowcounts:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "rowcounts:"
 hgsql -t -e "SELECT COUNT(*) AS rows FROM $trackname" $db
 
 
 
 # ------------------------------------------------
 # featureBits
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "run featureBits"
 echo
 
 runBits.csh $db $track
 
 # -------------------------------------------------
 # get two records:
 
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "view these two randomly-chosen records in the browser:"
 echo
 
 hgsql -t -e "SELECT * FROM $trackname LIMIT 2" $db
 echo
 
 
 # -------------------------------------------------
 # get chroms from chromInfo:
 
 getChromlist.csh $db > $db.chromlist$$
 
 # -------------------------------------------------
 # check for each chrom having data:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check that each chrom has data:"
 echo "if there is no output, then it passes."
 echo 'if this list is long (as in scaffold assemblies), grep for "Look" \
       to get past the list'
 set var=""
 
 foreach chrom (`cat $db.chromlist$$`)
   set var=` hgsql -N -e 'SELECT COUNT(*) from 'net$Org' \
      WHERE tName = "'$chrom'"' $db`
   if ($var == 0) then
     echo "$chrom is empty"
   else
     # echo "$chrom is ok"   # debug
   endif
 end
 
 if ( `cat $db.chromlist$$ | wc -l` < $maxNumChroms ) then
   set chromNum="small"
 endif
 
 # -------------------------------------------------
 # check ends for off-end coords:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 
 checkOffend.csh $db net$Org
 
 # -------------------------------------------------
 # check sort
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check sort:"
 
 positionalTblCheck -verbose=0 $db net$Org
 if ( ! $status ) then
   echo "sort is ok"
 endif
 echo
 
 # -------------------------------------------------
 # check countPerChrom
 
 echo "check countPerChrom"
 if ( $chromNum == "small" ) then
   countPerChrom.csh $db net$Org
 else
   echo "too many chroms to do a count per chrom"
 endif
 echo
 
 
 # -------------------------------------------------
 # check that all levels fall between 1-12 inclusive:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Here is a list of the levels in the $track file:"
 echo "Expect 1-12 inclusive:"
 echo
 hgsql -N -e "SELECT DISTINCT(level) FROM  $trackname" $db
 echo
 
 
 # -------------------------------------------------
 # check that all of the types are of (top, gap, inv, syn, or nonSyn):
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Here is a list of the types in the $track file:"
 echo "Expect these types: top, gap, inv, syn, nonSyn:"
 echo
 hgsql -N -e "SELECT DISTINCT(type) FROM $trackname" $db
 echo
 
 
 # -------------------------------------------------
 # check to ensure that if level=1 then type=top (and vice versa):
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to make sure that if level=1 then type=top and vice versa:"
 echo "if there is no output, then it passes."
 echo
 
 #  set var=`hgsql -N -e 'SELECT COUNT(*) FROM '$chrom'_chain'$Org'Link \
 #     WHERE tName != "'$chrom'"' $db`
 
 set var1=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE type = "top" AND level != 1' $db`
 set var2=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE level = 1 AND type != "top"' $db`
 
 if ($var1 != 0) then
   echo "there is at least one instance where type = top and level != 1 (check this by hand in the database table)."
 endif
 
 if ($var2 != 0) then
   echo "there is at least one instance where level = 1 and type != top (check this by hand in the database table)."
 endif
 
 
 
 # -------------------------------------------------
 # check to ensure that if type=gap then level is an even number (and vice versa):
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to make sure that if type=gap, then level is an even number:"
 echo "expect to see 2, 4, 6, 8, 10, 12 in the following list:"
 echo 
 
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "gap" ORDER BY level' $db
 
 
 
 # -------------------------------------------------
 # check to ensure that types are all on the correct levels:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking to make sure that types are on the correct levels:"
 echo "expect to see types of inv, syn and nonSyn on levels 3, 5, 7, 9, 11, 13"
 echo "(these won't necessarily go all the way to 13):"
 echo
 echo "type = inv:"
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "inv" ORDER BY level' $db
 
 echo
 echo "type = syn:"
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "syn" ORDER BY level' $db
 
 echo
 echo "type = nonSyn:"
 hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "nonSyn" ORDER BY level' $db
 echo
 
 
 # -------------------------------------------------
 # generate counts by type:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "here's a list of counts by type:"
 echo
 echo "type	count"
 echo "____	_____"
 hgsql -N -e 'SELECT DISTINCT(type) AS types, COUNT(*) AS number \
    FROM '$trackname' GROUP BY types ORDER BY number DESC' $db
 
 
 # -------------------------------------------------
 # generate counts by level:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "here's a list of counts by level:"
 echo
 echo "level	count"
 echo "_____	_____"
 hgsql -N -e 'SELECT DISTINCT(level) AS levels, COUNT(*) AS number \
     FROM '$trackname' GROUP BY levels ORDER BY level' $db
 
 
 
 # -------------------------------------------------
 # check that strand has a valid value and is displayed correctly:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "use these three rows to check (manually) that qStrand is displayed properly in the browser:"
 echo
 
 hgsql -t -e "SELECT tName, tStart, tEnd, level, type, qName, strand FROM $trackname WHERE tStart > 10000000 LIMIT 3" $db
 echo
 
 echo "not gap"
 hgsql -t -e "SELECT tName, tStart, tEnd, level, type, qName, strand FROM $trackname WHERE tStart > 10000000 AND type != 'gap' LIMIT 3" $db
 echo
 
 
 
 # -------------------------------------------------
 # check that strand has a valid value
 
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check that strand has valid values"
 echo "expect '+ -'"
 echo
 
 hgsql -N -e "SELECT DISTINCT(strand) FROM ${trackname}" $db
 
 set numBlank = `hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE strand != "+" AND strand != "-"' $db`
 
 if ($numBlank > 0) then
   echo "In addition to the above strand values, there are $numBlank blank strand values.  These should be checked by hand."
 endif
 echo
 
 
 # -------------------------------------------------
 # check that chainId, ali and score are 0 for gaps
 # and >0 for all other types
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "check that chainId, ali and score have valid values"
 echo
 
 echo "chainId value(s) for type = gap (expect 0):"
 hgsql -N -e 'SELECT DISTINCT(chainId) FROM '$trackname' WHERE type = "gap"' $db
 echo
 
 echo "ali value(s) for type = gap (expect 0):"
 hgsql -N -e 'SELECT DISTINCT(ali) FROM '$trackname' WHERE type = "gap"' $db
 echo
 
 echo "score value(s) for type = gap (expect 0):"
 hgsql -N -e 'SELECT DISTINCT(score) FROM '$trackname' WHERE type = "gap"' $db
 echo
 
 echo "count of chainId values that are '0' for all other types (expect 0):"
 hgsql -N -e 'SELECT COUNT(chainId) FROM '$trackname' WHERE type != "gap" AND chainId = 0' $db
 echo
 
 echo "count of ali values that are '0' for all other types (expect 0):"
 hgsql -N -e 'SELECT COUNT(ali) FROM '$trackname' WHERE type != "gap" AND ali = 0' $db
 echo
 
 echo "count of score values that are '0' for all other types (expect 0):"
 hgsql -N -e 'SELECT COUNT(score) FROM '$trackname' WHERE type != "gap" AND score = 0' $db
 echo
 
 
 
 # -------------------------------------------------
 # check min and max values for ali and score (for nonGap types)
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking min and max values for ali and score (for type != gap):"
 echo
 
 set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach control('ali' 'score')
 
 foreach type ($types)
 
   echo type=$type
   echo column=$control
 
   hgsql -e 'SELECT MIN('$control') as minimum, MAX('$control') as maximum FROM '$trackname' WHERE type LIKE "'$type'"' $db
 
 echo
 end #foreach
 end #foreach
 
 
 
 # -------------------------------------------------
 # check max values for score by type (for nonGap types)
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking max values for score by type (for type != gap):"
 echo
 
   hgsql -e 'SELECT MAX(score) AS scores, type FROM '$trackname' GROUP BY type ORDER BY type DESC' $db
 
 
 
 # -------------------------------------------------
 # check that chrN_chainOrg.id  is uniq:
 # superceded by joinerCheck
 # check that chainIds are all found in chainOrg.id:
 # superceded by joinerCheck
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "Note: joinerCheck is run in chain2.csh - it covers the net track too."
 echo
 
 
 
 # -------------------------------------------------
 # qOver, qFar, and qDup should be -1 for type=gap
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that qOver, qFar, and qDup = -1 for type=gap:"
 echo
 
 set count=''
 foreach variable ('qOver' 'qFar' 'qDup')
   set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE type LIKE "'gap'" AND '$variable' != -1' $db`
 
   if ($count != 0) then
     echo "$variable has $count rows where value != -1 and type = gap"
     echo "you should investigate this by hand."
   else
     echo "$variable passes (has $count rows where value != 1 and type = gap)"
   endif
 
 set count=0
 end #foreach
 
 
 
 # -------------------------------------------------
 # qDup should be 0 or greater for nonGaps
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that qDup >= 0 for type != gap:"
 echo
 
 #set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach type ($types)
 
   echo type=$type
 
   set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qDup < 0 AND type LIKE "'$type'"' $db`
 
   if ($count != 0) then
     ech  "There are $count rows where type = $type and qDup < 0"
     echo "You should investigate these by hand."
     echo
   else
     echo "passed."
     echo
   endif
 end #foreach
 echo
 
 
 # -------------------------------------------------
 # for nonGaps, if qOver is -1, then qFar must be -1 and vice versa
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that if qOver = -1 then qFar = -1 (and vice versa):"
 echo
 
 set pass1='F'
 set pass2='F'
 
 set var1=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver = -1 AND qFar != -1' $db`
 set var2=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver != -1 AND qFar = -1' $db`
 
 if ($var1 != 0) then
   echo "there is at least one instance where qOver = -1 and qFar != 1 (check this by hand in the database table)."
   set pass1='F'
 else
   set pass1='T'
 endif
 
 if ($var2 != 0) then
   echo "there is at least one instance where qOver != 1 and qFar = 1 (check this by hand in the database table)."
   set pass2='F'
 else
   set pass2='T'
 endif
 
 if ($pass1 == 'T' && $pass2 == 'T') then
     echo passed both tests.
   endif
 endif
 echo
 
 
 # -------------------------------------------------
 # for nonGaps, if qOver > 0, then qFar = 0
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking that for type != gap, if qOver > 0, then qFar = 0:"
 echo
 
 
 #set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach type ($types)
 
   echo type=$type
 
   set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver > 0 AND qFar != 0' $db`
 
   if ($count != 0) then
     echo "There are $count rows where qOver > 0 and qFar != 0"
     echo "You should investigate these by hand."
     echo
   else
     echo "passed."
     echo
   endif
 end #foreach
 echo
 
 
 
 # -------------------------------------------------
 # note MIN and MAX values for chainId (for nonGaps)
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 echo "checking MIN and MAX values for chainId (for type != gap):"
 echo
 
 
 #set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
 
 foreach type ($types)
 
   echo type=$type
 
   hgsql -e 'SELECT MIN(chainId) as minimum, MAX(chainId) as maximum FROM '$trackname' WHERE type LIKE "'$type'"' $db
 
 echo
 end #foreach
 echo 
 
 # -------------------------------------------------
 # add track to list of files to push and find size of entire push:
 
 echo
 echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
 
 echo $trackname >> $db.$Org.pushlist
 sort -u $db.$Org.pushlist > pushlist2
 
 getTableSize.csh $db pushlist2 hgwdev
 rm -f pushlist2
 
 echo "the end."
 cleanup:
 rm -f $db.chromlist$$