src/utils/qa/knownGene.csh 1.13

1.13 2009/10/11 04:46:17 kuhn
updated to work with new KG. still needs work
Index: src/utils/qa/knownGene.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/knownGene.csh,v
retrieving revision 1.12
retrieving revision 1.13
diff -b -B -U 4 -r1.12 -r1.13
--- src/utils/qa/knownGene.csh	11 Apr 2009 00:37:04 -0000	1.12
+++ src/utils/qa/knownGene.csh	11 Oct 2009 04:46:17 -0000	1.13
@@ -3,8 +3,26 @@
 
 # to do:  check that trackDb.hgGene is "on"
 # to do:  see that cgapBiocDesc has unique entries
 
+# --------------------------------------------
+#  run KGGeneCheck.java on dev:
+
+#### echo
+#### echo " --------------------------------------------"
+####
+#### echo "server hgwdev.cse.ucsc.edu /
+#### machine hgwdev.cse.ucsc.edu /
+#### quick false /
+#### dbSpec $db /
+#### table all /
+#### zoomCount 4" > $db.props
+###
+#### echo "run KGGeneCheck: \
+####   nohup nice HGGeneCheck $db.props > & $db.KGrobot.out & "
+#### echo
+echo " --------------------------------------------"
+
 ###############################################
 # 
 #  03-09-04
 #  Robert Kuhn
@@ -26,11 +44,11 @@
 if ($#argv == 0 || $#argv > 2) then
   # no command line args
   echo
   echo "  runs test suite on Known Genes track."
-  echo "  expects files, {kg,fb,pb}Tables, up one directory."
+  echo "  suggest KG java run in Trackchecking first."
   echo
-  echo "    usage:  database, [oldDb] (if fresh assembly for existing organism)"
+  echo "    usage:  database [oldDb] (if fresh assembly for existing organism)"
   echo
   exit
 else
   set db=$argv[1]
@@ -46,50 +64,31 @@
 echo "using $db "
 echo "using $oldDb for beta db "
 set curr_dir=$cwd
 
+#  get sets of KG, GS, PB tables from master list:
+findKgTableSet.csh $db 
 
-# --------------------------------------------
-#  get sets of KG, FB(GS), PB tables:
-# also prints list and update times.
-
-#####  /cluster/home/kuhn/bin/findKgTableSet.csh $db 
-
-echo
-echo "the following tables are in the overall KG list, but not in this assembly:"
-comm -23 kgTables kgTables$db
-echo  
-
-echo
-echo "the following tables are in the overall GS list, but not in this assembly:"
-comm -23 fbTables fbTables$db
-echo  
-
-echo
-echo "the following tables are in the overall PB list, but not in this assembly:"
-comm -23 pbTables pbTables$db
+# count items in list
+echo "found the following KG tables:"
+wc -l $db.{kg,gs,pb}Tables
+cat $db.{kg,gs,pb}Tables | sort -u > $db.allTables
+echo "will have some overlap. uniq tables = `cat $db.allTables | sort -u | wc -l`"
+
+# find new tables: in pushQ but NOT in a kgTables list
+echo
+echo "find new tables: in pushQ but NOT in a kgTables list"
+hgsql -h mysqlbeta -Ne "SELECT tbls FROM pushQ WHERE tbls LIKE '%knownGene%' \
+  AND dbs = '"$db"'" qapushq | sed "s/\\n/\n/g" | egrep -r . > $db.pushqKgList
+dos2unix $db.pushqKgList >& /dev/null
+commTrio.csh $db.allTables $db.pushqKgList 
+
+if ( `wc -l $db.allTables.Only | awk '{print $1}'` != 0 ) then
+  echo "these tables may be in separate tracks? ($db.allTables.Only) :"
+  cat $db.allTables.Only
+endif 
 echo  
 
-
-# --------------------------------------------
-#  run KGGeneCheck.java on dev:
-
-#### echo
-#### echo " --------------------------------------------"
-####
-#### echo "server hgwdev.cse.ucsc.edu /
-#### machine hgwdev.cse.ucsc.edu /
-#### quick false /
-#### dbSpec $db /
-#### table all /
-#### zoomCount 4" > $db.props
-###
-#### echo "run KGGeneCheck: \
-####   nohup nice HGGeneCheck $db.props > & $db.KGrobot.out & "
-#### echo
-echo " --------------------------------------------"
-
-
 # --------------------------------------------
 
 echo
 echo "compare new genes to old set (uniq):"
@@ -101,18 +100,24 @@
 hgsql -N -e "SELECT name from knownGene" $db | sort | uniq > $db.KG.name.dev
 hgsql -N -h $sqlbeta -e "SELECT name from knownGene" $oldDb | sort |uniq > $oldDb.KG.name.beta.uniq
 comm -23 $db.KG.name.dev $oldDb.KG.name.beta.uniq > $db.KG.name.devOnly
 comm -13 $db.KG.name.dev $oldDb.KG.name.beta.uniq > $oldDb.KG.name.betaOnly
-comm -12 $db.KG.name.dev $oldDb.KG.name.beta.uniq > $db.KG.name.commonOnly
-wc -l *Only | grep -v total
+comm -12 $db.KG.name.dev $oldDb.KG.name.beta.uniq > $db.KG.name.common
+wc -l  $db.KG.name.devOnly $oldDb.KG.name.betaOnly $db.KG.name.common | grep -v total 
+
 echo
 echo "check some new ones by hand:"
 head -20 $db.KG.name.devOnly | tail -3
 echo
 echo "check some old ones by hand on beta:"
 head -20 $oldDb.KG.name.betaOnly | tail -3
 echo
 
+echo "strip version number and compare again:"
+awk -F. '{print $1}' $db.KG.name.devOnly     > $db.KG.name.devOnly.noVersion 
+awk -F. '{print $1}' $oldDb.KG.name.betaOnly > $oldDb.KG.name.betaOnly.noVersion
+commTrio.csh  $db.KG.name.devOnly.noVersion $oldDb.KG.name.betaOnly.noVersion
+
 # --------------------------------------------
 # check alignID for unique:
 
 echo
@@ -229,10 +234,11 @@
 echo
 checkCoverage.csh $db $table
 
 echo
-runBits.csh $db $knownGene
+runBits.csh $db knownGene
 echo
+runJoiner.csh $db knownGene
 
 # -------------------------------------------------
 # check exon sizes:
 #