src/utils/qa/chain.csh 1.32
1.32 2010/04/29 01:23:50 mary
adjusting message about checking the matrix, chainMinScore and chainLinearGap
Index: src/utils/qa/chain.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/chain.csh,v
retrieving revision 1.31
retrieving revision 1.32
diff -b -B -U 1000000 -r1.31 -r1.32
--- src/utils/qa/chain.csh 26 Apr 2010 16:31:49 -0000 1.31
+++ src/utils/qa/chain.csh 29 Apr 2010 01:23:50 -0000 1.32
@@ -1,411 +1,404 @@
#!/bin/tcsh
source `which qaConfig.csh`
###############################################
#
# 03-28-04 & 10-26-05
# Checks chain tracks.
# Written by Bob Kuhn - augmented by Ann Zweig
# Slow processes are in chain2.csh
#
###############################################
onintr cleanup
set db=""
set split=""
set chrom=""
set trackname=""
if ( $#argv == 0 || $#argv > 2) then
# no command line args
echo
echo " runs test suite on chain track (on both regular and Link tables)"
echo " expects trackname in chrN_chainOrg format"
echo " though it now works for chainOrg format assemblies"
echo " slow processes are in chain2.csh"
echo
echo " usage: database trackname"
echo " e.g. chain.csh mm7 chrN_chainXenTro1 > & mm7.chain.xenTro1 &"
echo " or chain.csh anoCar1 chainXenTro1 > & anoCar1.chain.xenTro1 &"
echo
exit
else
set db=$argv[1]
set trackname=$argv[2]
endif
set track=`echo $trackname | sed -e "s/chrN_//"`
set Org=`echo $track | sed -e "s/chain//"`
set otherDb=`echo $Org | perl -wpe '$_ = lcfirst($_)'`
set split=`getSplit.csh $db chain$Org hgwdev`
echo "using database $db "
echo "trackname: $trackname"
echo "track: $track"
echo "Org: $Org"
echo
# ------------------------------------------------
# check for priority values for all chains on this assembly:
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "all chains/nets for this assembly:"
echo
# make a list of chain and nets to match actual tables
set chainlist=`hgsql -N -e 'SHOW TABLES LIKE "net%"' $db`
hgsql -N -e 'SHOW TABLES LIKE "net%"' $db \
| sed -e "s/net/chain/g" > chainlist
echo $chainlist | sed -e "s/ /\n/g" >> chainlist
echo "priority" >> chainlist
echo "----" >> chainlist
hgsql -t -e "SELECT tableName, priority FROM trackDb \
WHERE tableName LIKE 'chain%' OR tableName LIKE 'net%' \
ORDER BY priority" $db \
| grep -f chainlist
# ------------------------------------------------
# check level for html and trackDb entry:
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check level for html and trackDb entry:"
echo "NOTE: all vertebrate chain (and net) tracks should now be using the one"
echo "chainNet.html at the top level. So, you can disregard the fact that"
echo "this test shows no chain$Org.html file. However the trackDb location"
echo "is still relevant."
echo
findLevel $db chain$Org
# -------------------------------------------------
# get chroms from chromInfo:
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo
getChromlist.csh $db > $db.chromlist$$
rm -f $db.$Org.pushlist
rm -f $db.$Org.pushlistLink
if ( $split == "unsplit" ) then
echo "unsplit chain track. echo of long chromlist suppressed"
echo $track >> $db.$Org.pushlist
echo ${track}Link >> $db.$Org.pushlist
echo
else
# make push list for split tables
foreach chrom (`cat $db.chromlist$$`)
echo $chrom
echo ${chrom}_$track >> $db.$Org.pushlist
echo ${chrom}_${track}Link >> $db.$Org.pushlist
end
endif
# ------------------------------------------------
# check updateTimes for each table:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo
echo "check updateTimes for each table:"
echo "first: hgwdev"
echo "second: hgwbeta"
if ( $split == "unsplit" ) then
updateTimes.csh $db chain$Org | grep -v ERROR
else
updateTimes.csh $db $db.$Org.pushlist | grep -v ERROR
endif
# ------------------------------------------------
# make sure that the tName column matches the table name:
#
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking to see if tName matches table name:"
if ( $split == "unsplit" ) then
echo "can't actually do this comparison if table is not split."
echo
else
echo "if there is no output here, then it passes."
foreach chrom (`cat $db.chromlist$$`)
set numTNames=`hgsql -N -e "SELECT COUNT(DISTINCT(tName)) \
FROM ${chrom}_chain$Org" $db`
if ($numTNames != 1) then
if ($numTNames == 0) then
echo "${chrom}_chain$Org is empty."
else
echo "There are $numTNames tNames in ${chrom}_chain$Org"
echo "Should be only one"
echo "(you should check this table by hand)."
endif
else
set tName=`hgsql -N -e "SELECT tName FROM ${chrom}_chain$Org\
LIMIT 1" $db`
if ( $tName != $chrom ) then
echo "tName does not match in $chrom_chain${Org}!"
echo
endif
endif
set numTNames=`hgsql -N -e "SELECT COUNT(DISTINCT(tName)) \
FROM ${chrom}_chain${Org}Link" $db`
if ($numTNames != 1) then
if ($numTNames == 0) then
echo "${chrom}_chain${Org}Link is empty."
else
echo "There are $numTNames tNames in ${chrom}_chain${Org}Link"
echo "Should be only one"
echo "(you should check this table by hand)."
endif
else
set tName=`hgsql -N -e "SELECT tName FROM ${chrom}_chain${Org} \
LIMIT 1" $db`
if ( $tName != $chrom ) then
echo "tName does not match in $chrom_chain${Org}Link!"
echo
endif
endif
end
endif
# -------------------------------------------------
# check the min and max score values
# (later: get the size of the largest chrom and set the column width to that)
# find size of longest chrom name for format purposes
if ( $split != "unsplit" ) then
set length=0
foreach chrom (`cat $db.chromlist$$`)
set len=`echo $chrom | awk '{print length($1)}'`
if ( $len > $length ) then
set length=$len
endif
end
set length=`echo $length | awk '{print $1+1}'`
set longlength=`echo $length | awk '{print $1+12}'`
endif
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "checking min and max score values"
echo
if ( $split == "unsplit" ) then
set min = `hgsql -N -e "SELECT MIN(score) FROM chain${Org}" $db`
set max = `hgsql -N -e "SELECT MAX(score) FROM chain${Org}" $db`
echo "chrom min max"
echo "----- --- ---"
echo "$chrom $min $max"
else
echo "look through this list for outliers."
echo "chrom" "min" "max" \
| gawk '{ printf("%-'${length}'s %8s %12s \n", $1, $2, $3) }'
echo "-----" "---" "---" \
| gawk '{ printf("%-'${length}'s %8s %12s \n", $1, $2, $3) }'
foreach chrom (`cat $db.chromlist$$`)
set min = `hgsql -N -e "SELECT MIN(score) FROM ${chrom}_chain${Org}" $db`
set max = `hgsql -N -e "SELECT MAX(score) FROM ${chrom}_chain${Org}" $db`
echo $chrom $min $max \
| gawk '{ printf("%-'${length}'s %8s %12s \n", $1, $2, $3) }'
end #foreach
endif
echo
# -------------------------------------------------
# check for rowcounts in each table:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "rowcounts"
echo
if ( $split == "unsplit" ) then
echo $trackname
hgsql -t -e "SELECT COUNT(*) AS rows FROM chain${Org}" $db
echo ${trackname}Link
hgsql -t -e "SELECT COUNT(*) AS rows FROM chain${Org}Link" $db
echo "too many chroms to do a count per chrom"
else
echo "check for rowcounts in each table:"
echo "rowcounts are listed - pay attention to counts of 0"
echo
echo "for chrN_chain${Org}:"
foreach chrom (`cat $db.chromlist$$`)
set var1=`hgsql -N -e "SELECT COUNT(*) FROM ${chrom}_chain${Org}" $db`
echo ${chrom}_chain${Org} $var1 \
| gawk '{ printf("%-'${longlength}'s %6s \n", $1, $2) }'
end
echo
echo "for chrN_chain${Org}Link:"
foreach chrom (`cat $db.chromlist$$`)
set var1=`hgsql -N -e "SELECT COUNT(*) FROM ${chrom}_chain${Org}Link" $db`
set longer=`echo $longlength | awk '{print $1+5}'`
echo ${chrom}_chain${Org}Link $var1 \
| gawk '{ printf("%-'${longer}'s %8s \n", $1, $2) }'
end
echo
endif
echo
# -------------------------------------------------
# check that qStrand has a valid value
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "count + and - strand alignments"
echo "watch for zeroes"
echo
if ( $split == "unsplit" ) then
set badStrands=`hgsql -N -e 'SELECT COUNT(*) FROM chain'$Org' \
WHERE qStrand != "-" AND qStrand != "+"' $db`
if ( $badStrands > 0 ) then
echo 'some qStrands are neither "+" nor "-"'
else
echo 'all qStrands are either "+" or "-"'
echo
endif
# header
echo "posStrand negStrand" \
| gawk '{ printf("%8s %8s \n", $1, $2) }'
echo "--------- ---------" \
| gawk '{ printf("%8s %8s \n", $1, $2) }'
set posStrands = `hgsql -N -e "SELECT COUNT(*) \
FROM chain${Org} WHERE qStrand LIKE '+'" $db`
set negStrands = `hgsql -N -e "SELECT COUNT(*) \
FROM chain${Org} WHERE qStrand LIKE '-'" $db`
echo $posStrands $negStrands \
| gawk '{ printf("%8s %8s \n", $1, $2) }'
else
echo "chrom posStrand negStrand" \
| gawk '{ printf("%-'${length}'s %8s %8s \n", $1, $2, $3) }'
echo "------ --------- ---------" \
| gawk '{ printf("%-'${length}'s %8s %8s \n", $1, $2, $3) }'
rm -f badStrands
foreach chrom (`cat $db.chromlist$$`)
set badStrands=`hgsql -N -e 'SELECT COUNT(*) FROM '$chrom'_chain'$Org' \
WHERE qStrand != "-" AND qStrand != "+"' $db`
# echo $badStrands
if ( $badStrands > 0 ) then
echo $chrom >> badStrands
endif
set posStrands = `hgsql -N -e "SELECT COUNT(*) \
FROM ${chrom}_chain${Org} WHERE qStrand LIKE '+'" $db`
set negStrands = `hgsql -N -e "SELECT COUNT(*) \
FROM ${chrom}_chain${Org} WHERE qStrand LIKE '-'" $db`
echo $chrom $posStrands $negStrands \
| gawk '{ printf("%-'${length}'s %8s %8s \n", $1, $2, $3) }'
end #foreach
echo
if ( -e badStrands ) then
echo 'these chroms have some qStrands that are neither "+" nor "-"'
cat badStrands
else
echo 'all qStrands are "+" or "-"'
endif
rm -f badStrands
endif
echo
# -------------------------------------------------
# check that qStrand is displayed properly:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "use these three rows to check (manually) that qStrand is \
displayed properly in the $db browser:"
echo
if ( $split == "unsplit" ) then
hgsql -t -e "SELECT tName, tStart, tEnd, qName, qStrand \
FROM $track WHERE tStart > 10000000 LIMIT 3" $db
echo
else
# pick a random chrom > 10 million and pull out three records
set rand=''
set rand=`hgsql -N -e "SELECT chrom FROM chromInfo \
WHERE size > 10000000 ORDER BY RAND() \
LIMIT 1" $db`
hgsql -t -e "SELECT tName, tStart, tEnd, qName, qStrand \
FROM ${rand}_$track WHERE tStart > 10000000 LIMIT 3" $db
echo
endif
# -------------------------------------------------
# check that tables are sorted by tStart:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
echo "check that tables are sorted by tStart:"
echo
if ( $split == "unsplit" ) then
echo "can't check chrom ordering on unsplit chroms right now"
else
echo "tStart:"
foreach chrom (`cat $db.chromlist$$`)
# echo $chrom
hgsql -N -e "SELECT tStart FROM ${chrom}_${track}" $db \
> $db.$track.tStart
sort -n $db.$track.tStart > $db.$track.tStart.sort
set sortCheck=`comm -23 $db.$track.tStart $db.$track.tStart.sort | wc -l`
# echo $sortCheck
if ($sortCheck != 0) then
echo "${chrom}_${track} is not sorted by tStart"
endif
end
rm $db.$track.tStart $db.$track.tStart.sort
echo "only prints if there is a problem"
echo
echo
endif
# -------------------------------------------------
# find the correct paramaters for the trackDb variables:
echo
echo "*~*~*~*~*~*~*~*~*~*~*~*~*~*"
-#echo "Find the correct parameters for the 3 trackDb variables"
-#echo "which appears in the chain-OtherOrg download file."
-#echo "Compare this to the chain description page."
-echo "Go to the chain/net description page for this track"
-echo "and compare the output which is taken from the chain-OtherOrg download file:"
+echo "Go to the chain/net description page for this track, and make"
+echo "sure that the matrix, the chainMinScore, and the chainLinearGap"
+echo "are displayed correctly The correct values for the 3 variables"
+echo "are given below. They were taken from the README page in the"
+echo "in the downloads directory.:"
echo
getMatrixLines.csh $db $otherDb
getChainLines.csh $db $otherDb
-
-set OrgName=`hgsql -Ne "SELECT organism FROM dbDb WHERE NAME LIKE '$db%'" hgcentraltest`
-
-set OrgName=`echo $OrgName | tr '[A-Z]' '[a-z]'`
-
-
echo
-echo "If the output doesn't match, put overrides in this file with the above output: "
-echo "/cluster/home/$USER/kent/src/hg/makeDb/trackDb/$OrgName/trackDb.chainNet.ra"
-echo "*Note use the path name to guide you. It may not work perfectly in all cases."
+echo "If any of the values are displayed incorrectly, adjust them in"
+echo "trackDb.ra."
# -------------------------------------------------
# to push to beta:
echo
echo "to push to beta:"
echo
echo "-------------------------------------------------"
echo " bigPush.csh $db $db.$Org.pushlistLink "
echo " bigPush.csh $db $db.$Org.pushlist "
echo "-------------------------------------------------"
echo
echo "the end."
cleanup:
rm -f $db.chromlist$$