src/utils/qa/qaEncodeTracks.csh 1.5
1.5 2010/01/22 19:50:44 ann
Edited so that, in general, it only prints output if there is a problem. Removed
lots of extra lines. Added a couple of defined values. General clean-up.
Index: src/utils/qa/qaEncodeTracks.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/qaEncodeTracks.csh,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 1000000 -r1.4 -r1.5
--- src/utils/qa/qaEncodeTracks.csh 20 Jan 2010 18:36:11 -0000 1.4
+++ src/utils/qa/qaEncodeTracks.csh 22 Jan 2010 19:50:44 -0000 1.5
@@ -1,139 +1,153 @@
#!/bin/tcsh
source `which qaConfig.csh`
###############################################
#
# 03-05-2009
# Ann Zweig
#
# Runs through the usual checks for ENCODE
# tables.
#
###############################################
set db=''
set tableList=''
+set maxShortLabel='16'
+set maxLongLabel='80'
if ($#argv != 2 ) then
echo
- echo " runs test suite for ENCODE tracks"
+ echo " Runs test suite for ENCODE tracks"
echo " (it's best to direct output and errors to a file: '>&')"
+ echo " In general, this script only prints output if there are problems"
echo
echo " usage: db tableList"
echo
exit 1
else
set db=$argv[1]
set tableList=$argv[2]
endif
# run only from hgwdev
if ( "$HOST" != "hgwdev" ) then
echo "\nERROR: you must run this script on hgwdev!\n"
exit 1
endif
# check to see if it is a single tableName or a tableList
file $tableList | egrep "ASCII text" > /dev/null
if (! $status) then
set tables=`cat $tableList`
else
set tables=$tableList
endif
-
+# Takes too long to run (commented out for now):
# featureBits for all tables
#echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-#echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
#echo "*** featureBits for all tables ***"
#foreach table ( $tables )
# echo ""
# echo "featureBits -countGaps $db $table"
# nice featureBits -countGaps $db $table
# echo "featureBits -countGaps $db $table gap"
# nice featureBits -countGaps $db $table gap
#end
# check for table descriptions for all tables
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** check for tableDescription entry for all tables ***"
-echo "Is there an entry in the tableDescriptions table for each"
-echo "table? (1 == yes, 0 == no)"
+echo "(Only prints tablename if there is NO entry in tableDescription table)"
foreach table ( $tables )
- echo ""
- echo "Table: $table"
- hgsql -Ne "select count(*) from tableDescriptions where tableName = '$table'" $db
+ set num=`hgsql -Ne "select count(*) from tableDescriptions where tableName = '$table'" $db`
+ if ( 0 == $num ) then
+ echo "\nERROR: no description for $table"
+ endif
end
# no underscores in table names
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** make sure there are no underscores in table names ***"
-echo "If there's output here, you have one or more tables with "_":"
+echo "(If there's output here, you have one or more tables with "_")"
echo $tables | grep "_"
-echo ""
# check that positional tables are sorted for all tables
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** check that positional tables are sorted ***"
-echo "(No output means table is sorted)"
+echo "(Only prints if the table is NOT sorted)"
foreach table ( $tables )
- echo ""
- echo "positionalTblCheck $db $table"
positionalTblCheck $db $table
end
# check table index for each table
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** Check table INDEX ***"
+echo "(Only prints the INDEX if there are less than two indicies."
+echo "Presumably, these two will include bin and something else)."
foreach table ( $tables )
- echo ""
- echo ""
- hgsql -e "SHOW INDEX FROM $table" $db
+ set num=`hgsql -N -e "SHOW INDEX FROM $table" $db | wc -l`
+ if ( $num < 2 ) then
+ echo "ERROR:"
+ hgsql -N -e "SHOW INDEX FROM $table" $db
+ endif
end
# checkTableCoords for each table (instead of checkOffend.csh)
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** checkTableCoords for each table ***"
+echo "(Only prints if there are coords off the end of a chrom)"
foreach table ( $tables )
checkTableCoords $db $table
end
-echo "\n(Nothing will be printed if all are okay.)"
# check the length of the shortLabel for each track
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** MAX length of shortLabel should be 16 ***"
+echo "(Only prints if shortLabel is greater than 16 characters, or if"
+echo "it can't find a shortLabel at all)"
foreach table ( $tables )
- echo "Table: $table"
- cat ~/trackDb/human/hg18/trackDb.wgEncode.ra | grep -A10 "track $table" | grep -m 1 shortLabel \
- | sed -e 's/shortLabel //' | sed -e 's/^ *//' | sed -e 's/.$//' | wc -m
+ set num=`cat ~/trackDb/human/hg18/trackDb.wgEncode.ra | grep -A10 "track $table" \
+ | grep -m 1 shortLabel \
+ | sed -e 's/shortLabel //' | sed -e 's/^ *//' | sed -e 's/.$//' | wc -m`
+ if ( $maxShortLabel < $num || 0 == $num ) then
+ if ( 0 == $num ) then
+ echo "ERROR: can't find a shortLabel for $table"
+ else
+ echo "ERROR: $table shortLabel is $num characters"
+ endif
+ endif
end
# check the length of the longLabel for each track
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** MAX length of longLabel should be 80 ***"
+echo "(Only prints if longLabel is greater than 80 characters, or if"
+echo "it can't find a longLabel at all)"
foreach table ( $tables )
- echo "Table: $table"
- cat ~/trackDb/human/hg18/trackDb.wgEncode.ra | grep -A10 "track $table" \
+ set num=`cat ~/trackDb/human/hg18/trackDb.wgEncode.ra | grep -A10 "track $table" \
| grep -m 1 longLabel \
- | sed -e 's/longLabel //' | sed -e 's/^ *//' | sed -e 's/.$//' | wc -m
+ | sed -e 's/longLabel //' | sed -e 's/^ *//' | sed -e 's/.$//' | wc -m`
+ if ( $maxLongLabel < $num || 0 == $num ) then
+ if ( 0 == $num ) then
+ echo "ERROR: can't find a longLabel for $table"
+ else
+ echo "ERROR: $table longLabel is $num characters"
+ endif
+ endif
end
# countPerChrom for all tables
echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "*** countPerChrom for all tables ***"
+echo "(prints all counts here -- a useful way to look at this output is to"
+echo "grep for all 'chr4', for example)"
foreach table ( $tables )
echo ""
echo "countPerChrom.csh $db $table"
countPerChrom.csh $db $table
end
echo "\nthe end.\n"
exit 0