src/utils/qa/net.csh 1.24
1.24 2010/03/09 22:52:02 ann
only for vertebrate assemblies.
Index: src/utils/qa/net.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/net.csh,v
retrieving revision 1.23
retrieving revision 1.24
diff -b -B -U 1000000 -r1.23 -r1.24
--- src/utils/qa/net.csh 9 Mar 2010 21:53:54 -0000 1.23
+++ src/utils/qa/net.csh 9 Mar 2010 22:52:02 -0000 1.24
@@ -1,547 +1,547 @@
#!/bin/tcsh
source `which qaConfig.csh`
###############################################
#
# 03-28-04 & 10-27-2005
# Checks net tracks.
# Written by Bob Kuhn - augmented by Ann Zweig
#
###############################################
onintr cleanup
set db=""
set chromNum=""
set trackname=""
set currDir=$cwd
set maxNumChroms=100
if ($2 == "") then
# no command line args
echo
echo " runs test suite on net track."
echo " expects trackname in netOrg format"
echo " e.g. net.csh mm7 netXenTro1 > & mm7.net.xenTro1 & "
echo
echo " usage: database trackname"
echo
exit
else
set db=$1
set trackname=$2
endif
set Org=`echo $trackname | sed -e "s/net//"`
set track=$trackname
echo "using database $db "
echo "track: $track"
echo "trackname: $trackname"
echo "Org: $Org"
# ------------------------------------------------
# check level for html and trackDb entry:
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check level for html and trackDb entry:"
-echo "(note that all net (and chain) tracks should now be using the one"
+echo "NOTE: all vertebrate net (and chain) tracks should now be using the one"
echo "chainNet.html at the top level. So, you can disregard the fact that"
echo "this test shows no net$Org.html file. However the trackDb location"
echo "is still relevant."
echo
findLevel $db net$Org
# -------------------------------------------------
# check updateTimes:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "Update Times (hgwdev vs. hgwbeta):"
echo
updateTimes.csh $db $trackname
# -------------------------------------------------
# rowcounts:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "rowcounts:"
hgsql -t -e "SELECT COUNT(*) AS rows FROM $trackname" $db
# ------------------------------------------------
# featureBits
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "run featureBits"
echo
runBits.csh $db $track
# -------------------------------------------------
# get two records:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "view these two randomly-chosen records in the browser:"
echo
hgsql -t -e "SELECT * FROM $trackname LIMIT 2" $db
echo
# -------------------------------------------------
# get chroms from chromInfo:
getChromlist.csh $db > $db.chromlist$$
# -------------------------------------------------
# check for each chrom having data:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check that each chrom has data:"
echo "if there is no output, then it passes."
echo 'if this list is long (as in scaffold assemblies), grep for "Look" \
to get past the list'
set var=""
foreach chrom (`cat $db.chromlist$$`)
set var=` hgsql -N -e 'SELECT COUNT(*) from 'net$Org' \
WHERE tName = "'$chrom'"' $db`
if ($var == 0) then
echo "$chrom is empty"
else
# echo "$chrom is ok" # debug
endif
end
if ( `cat $db.chromlist$$ | wc -l` < $maxNumChroms ) then
set chromNum="small"
endif
# -------------------------------------------------
# check ends for off-end coords:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
checkOffend.csh $db net$Org
# -------------------------------------------------
# check sort
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check sort:"
positionalTblCheck -verbose=0 $db net$Org
if ( ! $status ) then
echo "sort is ok"
endif
echo
# -------------------------------------------------
# check countPerChrom
echo "check countPerChrom"
if ( $chromNum == "small" ) then
countPerChrom.csh $db net$Org
else
echo "too many chroms to do a count per chrom"
endif
echo
# -------------------------------------------------
# check that all levels fall between 1-12 inclusive:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "Here is a list of the levels in the $track file:"
echo "Expect 1-12 inclusive:"
echo
hgsql -N -e "SELECT DISTINCT(level) FROM $trackname" $db
echo
# -------------------------------------------------
# check that all of the types are of (top, gap, inv, syn, or nonSyn):
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "Here is a list of the types in the $track file:"
echo "Expect these types: top, gap, inv, syn, nonSyn:"
echo
hgsql -N -e "SELECT DISTINCT(type) FROM $trackname" $db
echo
# -------------------------------------------------
# check to ensure that if level=1 then type=top (and vice versa):
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking to make sure that if level=1 then type=top and vice versa:"
echo "if there is no output, then it passes."
echo
# set var=`hgsql -N -e 'SELECT COUNT(*) FROM '$chrom'_chain'$Org'Link \
# WHERE tName != "'$chrom'"' $db`
set var1=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE type = "top" AND level != 1' $db`
set var2=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE level = 1 AND type != "top"' $db`
if ($var1 != 0) then
echo "there is at least one instance where type = top and level != 1 (check this by hand in the database table)."
endif
if ($var2 != 0) then
echo "there is at least one instance where level = 1 and type != top (check this by hand in the database table)."
endif
# -------------------------------------------------
# check to ensure that if type=gap then level is an even number (and vice versa):
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking to make sure that if type=gap, then level is an even number:"
echo "expect to see 2, 4, 6, 8, 10, 12 in the following list:"
echo
hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "gap" ORDER BY level' $db
# -------------------------------------------------
# check to ensure that types are all on the correct levels:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking to make sure that types are on the correct levels:"
echo "expect to see types of inv, syn and nonSyn on levels 3, 5, 7, 9, 11, 13"
echo "(these won't necessarily go all the way to 13):"
echo
echo "type = inv:"
hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "inv" ORDER BY level' $db
echo
echo "type = syn:"
hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "syn" ORDER BY level' $db
echo
echo "type = nonSyn:"
hgsql -N -e 'SELECT DISTINCT(level) FROM '$trackname' WHERE type = "nonSyn" ORDER BY level' $db
echo
# -------------------------------------------------
# generate counts by type:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "here's a list of counts by type:"
echo
echo "type count"
echo "____ _____"
hgsql -N -e 'SELECT DISTINCT(type) AS types, COUNT(*) AS number \
FROM '$trackname' GROUP BY types ORDER BY number DESC' $db
# -------------------------------------------------
# generate counts by level:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "here's a list of counts by level:"
echo
echo "level count"
echo "_____ _____"
hgsql -N -e 'SELECT DISTINCT(level) AS levels, COUNT(*) AS number \
FROM '$trackname' GROUP BY levels ORDER BY level' $db
# -------------------------------------------------
# check that strand has a valid value and is displayed correctly:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "use these three rows to check (manually) that qStrand is displayed properly in the browser:"
echo
hgsql -t -e "SELECT tName, tStart, tEnd, level, type, qName, strand FROM $trackname WHERE tStart > 10000000 LIMIT 3" $db
echo
echo "not gap"
hgsql -t -e "SELECT tName, tStart, tEnd, level, type, qName, strand FROM $trackname WHERE tStart > 10000000 AND type != 'gap' LIMIT 3" $db
echo
# -------------------------------------------------
# check that strand has a valid value
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check that strand has valid values"
echo "expect '+ -'"
echo
hgsql -N -e "SELECT DISTINCT(strand) FROM ${trackname}" $db
set numBlank = `hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE strand != "+" AND strand != "-"' $db`
if ($numBlank > 0) then
echo "In addition to the above strand values, there are $numBlank blank strand values. These should be checked by hand."
endif
echo
# -------------------------------------------------
# check that chainId, ali and score are 0 for gaps
# and >0 for all other types
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check that chainId, ali and score have valid values"
echo
echo "chainId value(s) for type = gap (expect 0):"
hgsql -N -e 'SELECT DISTINCT(chainId) FROM '$trackname' WHERE type = "gap"' $db
echo
echo "ali value(s) for type = gap (expect 0):"
hgsql -N -e 'SELECT DISTINCT(ali) FROM '$trackname' WHERE type = "gap"' $db
echo
echo "score value(s) for type = gap (expect 0):"
hgsql -N -e 'SELECT DISTINCT(score) FROM '$trackname' WHERE type = "gap"' $db
echo
echo "count of chainId values that are '0' for all other types (expect 0):"
hgsql -N -e 'SELECT COUNT(chainId) FROM '$trackname' WHERE type != "gap" AND chainId = 0' $db
echo
echo "count of ali values that are '0' for all other types (expect 0):"
hgsql -N -e 'SELECT COUNT(ali) FROM '$trackname' WHERE type != "gap" AND ali = 0' $db
echo
echo "count of score values that are '0' for all other types (expect 0):"
hgsql -N -e 'SELECT COUNT(score) FROM '$trackname' WHERE type != "gap" AND score = 0' $db
echo
# -------------------------------------------------
# check min and max values for ali and score (for nonGap types)
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking min and max values for ali and score (for type != gap):"
echo
set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
foreach control('ali' 'score')
foreach type ($types)
echo type=$type
echo column=$control
hgsql -e 'SELECT MIN('$control') as minimum, MAX('$control') as maximum FROM '$trackname' WHERE type LIKE "'$type'"' $db
echo
end #foreach
end #foreach
# -------------------------------------------------
# check max values for score by type (for nonGap types)
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking max values for score by type (for type != gap):"
echo
hgsql -e 'SELECT MAX(score) AS scores, type FROM '$trackname' GROUP BY type ORDER BY type DESC' $db
# -------------------------------------------------
# check that chrN_chainOrg.id is uniq:
# superceded by joinerCheck
# check that chainIds are all found in chainOrg.id:
# superceded by joinerCheck
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "Note: joinerCheck is run in chain2.csh - it covers the net track too."
echo
# -------------------------------------------------
# qOver, qFar, and qDup should be -1 for type=gap
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking that qOver, qFar, and qDup = -1 for type=gap:"
echo
set count=''
foreach variable ('qOver' 'qFar' 'qDup')
set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE type LIKE "'gap'" AND '$variable' != -1' $db`
if ($count != 0) then
echo "$variable has $count rows where value != -1 and type = gap"
echo "you should investigate this by hand."
else
echo "$variable passes (has $count rows where value != 1 and type = gap)"
endif
set count=0
end #foreach
# -------------------------------------------------
# qDup should be 0 or greater for nonGaps
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking that qDup >= 0 for type != gap:"
echo
#set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
foreach type ($types)
echo type=$type
set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qDup < 0 AND type LIKE "'$type'"' $db`
if ($count != 0) then
ech "There are $count rows where type = $type and qDup < 0"
echo "You should investigate these by hand."
echo
else
echo "passed."
echo
endif
end #foreach
echo
# -------------------------------------------------
# for nonGaps, if qOver is -1, then qFar must be -1 and vice versa
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking that if qOver = -1 then qFar = -1 (and vice versa):"
echo
set pass1='F'
set pass2='F'
set var1=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver = -1 AND qFar != -1' $db`
set var2=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver != -1 AND qFar = -1' $db`
if ($var1 != 0) then
echo "there is at least one instance where qOver = -1 and qFar != 1 (check this by hand in the database table)."
set pass1='F'
else
set pass1='T'
endif
if ($var2 != 0) then
echo "there is at least one instance where qOver != 1 and qFar = 1 (check this by hand in the database table)."
set pass2='F'
else
set pass2='T'
endif
if ($pass1 == 'T' && $pass2 == 'T') then
echo passed both tests.
endif
endif
echo
# -------------------------------------------------
# for nonGaps, if qOver > 0, then qFar = 0
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking that for type != gap, if qOver > 0, then qFar = 0:"
echo
#set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
foreach type ($types)
echo type=$type
set count=`hgsql -N -e 'SELECT COUNT(*) FROM '$trackname' WHERE qOver > 0 AND qFar != 0' $db`
if ($count != 0) then
echo "There are $count rows where qOver > 0 and qFar != 0"
echo "You should investigate these by hand."
echo
else
echo "passed."
echo
endif
end #foreach
echo
# -------------------------------------------------
# note MIN and MAX values for chainId (for nonGaps)
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking MIN and MAX values for chainId (for type != gap):"
echo
#set types=`hgsql -N -e 'SELECT DISTINCT(type) FROM '$trackname' WHERE type != "gap"' $db`;
foreach type ($types)
echo type=$type
hgsql -e 'SELECT MIN(chainId) as minimum, MAX(chainId) as maximum FROM '$trackname' WHERE type LIKE "'$type'"' $db
echo
end #foreach
echo
# -------------------------------------------------
# add track to list of files to push and find size of entire push:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo $trackname >> $db.$Org.pushlist
sort -u $db.$Org.pushlist > pushlist2
getTableSize.csh $db pushlist2 hgwdev
rm -f pushlist2
echo "the end."
cleanup:
rm -f $db.chromlist$$