src/utils/qa/doGenbankTests 1.1

1.1 2009/04/10 20:37:55 rhead
Initial file. Script to run genePredCheck/pslCheck, joinerCheck, gbSanity, and featureBits on GenBank tables.
Index: src/utils/qa/doGenbankTests
===================================================================
RCS file: src/utils/qa/doGenbankTests
diff -N src/utils/qa/doGenbankTests
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/utils/qa/doGenbankTests	10 Apr 2009 20:37:55 -0000	1.1
@@ -0,0 +1,159 @@
+#!/bin/bash -e
+###############################################################################
+#  doGenbankTests
+#
+#  June 2008 -- Brooke Rhead
+#
+#  Wrapper script for automated checking of GenBank tables.
+#  Note: this script assumes the kent source tree is checked out in ~/kent/src
+# 
+###############################################################################
+
+tableExists()
+{
+  local db=$1
+  local table=$2
+  local res=`hgsql -Ne "show tables like '$table'" $db`
+  if [ -z "$res" ]
+  then
+    return 1 
+  else
+    return 0
+  fi
+}
+
+allOk=yes
+
+# check host, set path to genbank table list
+if [ "$HOST" != hgwdev -a "$HOST" != hgwbeta ]
+then
+  echo "This script must be run from hgwdev or hgwbeta." >&2
+  exit 1
+elif [ "$HOST" = hgwdev ]
+then
+  genbankPath="/cluster/data/genbank/etc/genbank.tbls"
+  sanityPath="/cluster/data/genbank"
+else #host is hgwbeta
+  genbankPath="/genbank/etc/genbank.tbls"
+  sanityPath="/genbank"
+fi
+
+# get arguments / give usage statement
+if [ $# -lt 1 -o $# -gt 2 ]
+then
+  echo "usage:  $(basename $0) db [outFile]
+where outFile is the name of a file in which to record results" >&2
+  exit 1
+else
+  db="$1"
+  if [ $# -eq 2 ]
+  then
+    exec > "$2" 2>&1  # neat bash trick to redirect stdout (also redirecting error)
+  fi
+fi
+
+# print some useful info
+echo "$(basename $0) output on $db"
+date
+echo
+
+# get a list of all genbank tracks present in this assembly
+echo "The following GenBank tracks are present in this assembly:"
+
+# now look for the actual tracks
+tableExists $db refGene && trackList="${trackList} refGene" && echo -e "\tRefSeq Genes"
+tableExists $db xenoRefGene && trackList="${trackList} xenoRefGene" && echo -e "\tOther RefSeq"
+tableExists $db all_mrna && trackList="${trackList} all_mrna" && echo -e "\t\$Org mRNAs"
+tableExists $db xenoMrna && trackList="${trackList} xenoMrna" && echo -e "\tOther mRNAs"
+tableExists $db all_est && trackList="${trackList} all_est" && echo -e "\t\$Org ESTs"
+tableExists $db %intronEst && trackList="${trackList} %intronEst" && echo -e "\tSpliced ESTs"
+tableExists $db xenoEst && trackList="${trackList} xenoEst" && echo -e "\tOther ESTs"
+tableExists $db mgcGenes  && trackList="${trackList} mgcGenes"&& echo -e "\tMGC Genes"
+tableExists $db orfeomeGenes  && trackList="${trackList} orfeomeGenes"&& echo -e "\tORFeome Clones"
+echo
+
+echo -e "--> Running genePredCheck and pslCheck:\n"
+
+# run genePredChecks
+for table in refGene xenoRefGene mgcGenes orfeomeGenes
+do
+  tableExists $db $table && {
+    echo genePredCheck $table
+    nice genePredCheck -db=$db $table || allOk=no
+  }
+done
+echo
+
+# run pslChecks
+for table in all_mrna xenoMrna all_est refSeqAli xenoRefSeqAli
+do
+  tableExists $db $table && {
+    echo pslCheck $table
+    nice pslCheck -db=$db $table || allOk=no
+  }
+done
+echo
+
+# run joinerCheck
+joinerLoc=~/kent/src/hg/makeDb/schema/all.joiner
+
+echo -e "--> Checking appropriate keys in $(basename $joinerLoc):\n"
+
+doJoinerCheck() {
+  local table=$1 ident=$2
+  tableExists $db $table && {
+    echo joinerCheck -keys -database=$db -identifier=$ident $(basename $joinerLoc)
+    nice joinerCheck -keys -database=$db -identifier=$ident $joinerLoc || allOk=no
+    echo
+  }
+}
+
+doJoinerCheck gbCdnaInfo gbCdnas
+doJoinerCheck refGene refSeqId
+doJoinerCheck xenoRefGene xenoRefSeqId
+doJoinerCheck all_mrna nativeMrnaAccession
+doJoinerCheck xenoMrna xenoMrnaAccession
+doJoinerCheck all_est nativeEstAccession # takes care of intronEst, too
+doJoinerCheck xenoEst xenoEstAccession
+doJoinerCheck mgcGenes mgcAccession
+doJoinerCheck orfeomeGenes orfeomeAccession
+echo
+
+# run gbSanity
+timestamp=$(date +%m.%d.%Y-%R%p)
+echo -e "--> Running gbSanity, and putting output in:"
+echo -e "${HOST}:${sanityPath}/misc/${db}.sanity.$timestamp"
+oldPath=$(pwd)
+cd ${sanityPath}
+bin/x86_64/gbSanity $db >& misc/${db}.sanity.$timestamp || allOk=no
+cd $oldPath
+echo -e "Here is the last line of ${db}.sanity.$timestamp:\n"
+tail -1
+echo -e "If there are any errors here, consult Mark Diekhans.\n"
+
+# run featureBits
+echo -e "--> Running featureBits.  Remember to paste this into the pushqueue:\n"
+for table in refGene xenoRefGene mgcGenes all_mrna intronEst all_est xenoMrna mgcFullMrna orfeomeGenes
+do
+  tableExists $db $table && {
+    echo featureBits -countGaps $db $table
+    nice featureBits -countGaps $db $table || allOk=no
+    echo featureBits -countGaps $db $table gap
+    nice featureBits -countGaps $db $table gap || allOk=no
+    echo
+  }
+done
+echo
+
+# run countPerChrom??
+
+# give overall report of errors
+if [ $allOk = yes ]
+then
+  echo No errors were encountered during $(basename $0).  Yay!
+else
+  echo At least one ERROR occurred during $(basename $0).
+  exit 2
+fi
+exit 0
+# end.