4b666b1c585cd29776c86cac527f26037b671383
steve
  Thu Aug 8 10:51:58 2013 -0700
Made changes suggested in code review to limit loading of .hg.conf.beta to a subshell (redmine #11481)
diff --git src/utils/qa/qaEnsGenes.csh src/utils/qa/qaEnsGenes.csh
index 342cbe5..af8e644 100755
--- src/utils/qa/qaEnsGenes.csh
+++ src/utils/qa/qaEnsGenes.csh
@@ -1,256 +1,255 @@
 #!/bin/tcsh
 source `which qaConfig.csh`
 
 ###############################################
 # 
 #  03-18-08
 #  Ann Zweig
 #
 #  Runs through the usual checks for Ensembl
 #  Gene updates.
 # 
 ###############################################
 
 set runOn=''
 set ver=0
 set dbs=''
 set db=''
 set betaList=''
 
 if ($#argv != 2 ) then
   echo
   echo "  runs test suite for ensGene track update"
   echo "  run this script before you push the new tables to beta"
   echo "  (makes lots of files: run in junk directory)"
   echo "  (it's best to direct output and errors to a file: '>&')"
   echo
   echo "    usage: ensGeneVersionNumber (db | all)"
   echo "      ensGeneVersionNumber  = Ensembl's version, e.g. 49"
   echo "      choose one database (db) or all dbs with ensGenes tracks (all)"
   echo
   exit 1
 else
   set ver=$argv[1]
   set runOn=$argv[2]
 endif
 
 # run only from hgwdev
 if ( "$HOST" != "hgwdev" ) then
   echo "\nERROR: you must run this script on hgwdev!\n"
   exit 1
 endif
 
 # check input
 if ( $ver <= 45 || $ver >= 100 ) then
   echo "\nERROR: you must enter a valid version number!\n"
   exit 1
 endif
 
 # get rid of files, if they are around
 rm -f xxDbList$$ xxNotActive$$ xxNotOnBeta$$
 
 # figure out which assemblies already have an ensGene track on beta
 set betaList=`getAssemblies.csh ensGene | egrep -v 'get' | egrep -v 'ensGene'` 
 
 # figure out which databases we're running it on
 if ( 'all' == $runOn ) then
  set dbs=`hgsql -Ne "SELECT db FROM trackVersion WHERE version = '$ver' \
  and name='ensGene' ORDER BY db" hgFixed | sort -u`
  echo "\nThe following databases were updated on hgwdev to ensGenes v$ver :"
  echo "$dbs\n"
 
  if ( "" == "$dbs" ) then
   echo "\nERROR: there is no update available for version number $ver\n"
   exit 1
  else # updates for this ensGene version exist
   foreach db ($dbs)
    set onBeta=`hgsql -h $sqlbeta -Ne "SELECT name FROM dbDb \
    WHERE name = '$db' AND active = 1" hgcentralbeta`
 
    if ( "" == "$onBeta" ) then
     echo $db >> xxNotActive$$
    else
     set hasTrack=''
     set hasTrack=`echo $betaList | egrep -wo $db`
     if ( "$db" != "$hasTrack" ) then
      echo $db >> xxNotOnBeta$$
      echo $db >> xxDbList$$
     else
      echo $db >> xxDbList$$
     endif
    endif
   end
 
   # print out all results
   if ( -e xxNotActive$$ ) then
    echo "\nOf that list, the following databases are not active on beta"
    echo "so the tests in this script will not be run on them:"
    cat xxNotActive$$
   endif
 
   if ( -e xxNotOnBeta$$ ) then
    echo "\nOf that list, the following databases do not have an ensGenes track"
    echo "on hgwbeta (however, the tests in this script will still be run on them)."
    echo "You might consider releasing an ensGenes track for these databases:"
    cat xxNotOnBeta$$
   endif
 
   set dbs=`cat xxDbList$$`
  endif
 else # running on one database only (don't check, just run)
  echo "\nRunning script for Ensebml Genes v$ver on this assembly:\n"
  set dbs=$runOn
  echo $dbs
 endif
 
 # a huge loop through all databases
 foreach db ($dbs)
  echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
  echo "~~~~~~~~~ $db ~~~~~~~~~~~~"
  echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
 
  # find out if this is a new Ensembl Genes track (or an update)
  set ensTrack=`echo $betaList | egrep -wo $db`
 
  echo "\n\n----------------------"
  echo "compare new (dev) and old (beta) ens* tables"
  echo "this shows the counts of rows unique to dev, unique to beta, and"
  echo "present on both.  you should be suspicious if there are big differences"
  compareWholeColumn.csh $db ensGene name
  compareWholeColumn.csh $db ensPep name
  compareWholeColumn.csh $db ensGtp transcript
  echo
 
  echo "\n\n----------------------"
  echo "check a few of the new additions to the ensGene table"
  echo "(be sure to click all the way out to Ensembl website)"
  echo "\ncheck these in $db browser on hgwdev:"
  head -2 $db.ensGene.name.devOnly
  tail -2 $db.ensGene.name.devOnly
 
  # only do this test if the ensGene track already exists on beta
  if ( $db == $ensTrack ) then
   echo "\n\n----------------------"
   echo "check a few of the deleted items from the ensGene table"
   echo "(make sure they have also been dropped from Ensembl website)"
   echo "\ncheck these in $db browser on hgwbeta:"
   head -2 $db.ensGene.name.betaOnly
   tail -2 $db.ensGene.name.betaOnly
  endif
 
  echo "\n\n----------------------"
  echo "these are full sets of corresponding rows from the three tables:"
  echo "ensGene <-> ensPep <-> ensGtp on hgwdev"
  echo "\ncheck these two genes (and their peptides) on hgwdev for '$db':"
  hgsql -e "SELECT * FROM ensGene, ensPep, ensGtp WHERE \
  ensGene.name = ensPep.name AND ensGene.name = ensGtp.transcript LIMIT 2\G" $db
 
  echo "\n\n----------------------"
  echo "run genePredCheck on the ensGene table. if there a failure here,"
  echo "then something is seriously wrong with the ensGene table."  
  echo "MarkD can help you figure out exactly what's wrong."
  echo "\ngenePredCheck results for $db.ensGene on hgwdev:" 
  genePredCheck -db=$db ensGene
 
  echo "\n\n----------------------"
  echo "find out which chroms the genes are on (for both dev and beta)."  
  echo "look for unusually small or large numbers here (or big differences)."
  # don't run this on scaffold assemblies
  set numChroms=`hgsql -Ne "SELECT COUNT(*) FROM chromInfo" $db`
  if ( $numChroms < 100 ) then
   if ( $db == $ensTrack ) then
    countPerChrom.csh $db ensGene $db hgwbeta
   else
    countPerChrom.csh $db ensGene $db
   endif
  else
   echo "$db is a scaffold assembly: skipping countPerChrom"
  endif
  echo
 
  echo "\n\n----------------------"
  echo "featureBits for new (dev) and old (beta) tables"
  echo "\nfeatureBits $db ensGene (on hgwdev):"
  featureBits $db ensGene
  echo "featureBits $db -countGaps ensGene (on hgwdev):"
  featureBits $db -countGaps ensGene
  echo "featureBits $db -countGaps ensGene gap (on hgwdev):"
  featureBits $db -countGaps ensGene gap
 
  if ( $db == $ensTrack ) then
-  setenv HGDB_CONF $HOME/.hg.conf.beta
   echo "\nfeatureBits $db ensGene (on hgwbeta):"
-  featureBits $db ensGene
+  (setenv HGDB_CONF $HOME/.hg.conf.beta; featureBits $db ensGene)
   echo "featureBits $db -countGaps ensGene (on hgwbeta):"
-  featureBits $db -countGaps ensGene
+  (setenv HGDB_CONF $HOME/.hg.conf.beta; featureBits $db -countGaps ensGene)
   echo "featureBits $db -countGaps ensGene gap (on hgwbeta):"
-  featureBits $db -countGaps ensGene gap
+  (setenv HGDB_CONF $HOME/.hg.conf.beta; featureBits $db -countGaps ensGene gap)
  endif
  echo
 
  echo "\n\n----------------------"
  echo "check that the ensGene track is sorted by chrom:"
  echo "positionalTblCheck -verbose=2 $db ensGene\n"
  positionalTblCheck -verbose=2 $db ensGene
  echo
 
  echo "\n\n----------------------"
  echo "run Joiner Check. look for errors in the following two lines only:"
  echo "ensPep.name and ensGtp.transcript"
  echo "\nrunning joinerCheck for $db on ensemblTranscriptId:"
  joinerCheck -keys -database=$db -identifier=ensemblTranscriptId ~/kent/src/hg/makeDb/schema/all.joiner
 
  echo "\n\n----------------------"
  echo "ensGene names typically begin with 'ENS'. if there is a number other"
  echo "than 0, then there are ensGenes that do not begin with 'ENS'."
  echo "check them out on the Ensembl website."
  echo "\nnumber of ensGenes that do not begin with 'ENS' in '$db':"
  set num=`hgsql -Ne "SELECT COUNT(*) FROM ensGene WHERE name \
  NOT LIKE 'ENS%'" $db`
  echo $num
  if ( 0 != $num ) then
   echo "instead of 'ENS', the ensGenes in this table look like this:"
   hgsql -Ne "SELECT name FROM ensGene WHERE name NOT LIKE 'ENS%' LIMIT 3" $db
  endif
 
  echo "\n\n----------------------"
  echo "A few tracks have another table or two associated with them.  For"
  echo "example, when Ensembl uses different scaffold names than we do, there"
  echo "should be a translation table called: ensembleGeneScaffold.  This"
  echo "table supports a separate track called: Ensembl Assembly."
  echo "Assemblies with a UCSC Gene track should also have a table called:"
  echo "knownToEnsembl. Here's what this assembly has:"
  echo
  hgsql -Ne "SHOW TABLES LIKE 'ensemblGeneScaffold'" $db
  hgsql -Ne "SHOW TABLES LIKE 'knownToEnsembl'" $db
 
 end # huge loop through each database
 
 # remember the hgFixed.trackVersion table
 echo "\n\n----------------------"
 echo "Don't forget to also push (to beta and then to the RR)"
 echo "the trackVersion table in the hgFixed database."
 echo "There are rows to allow the correct version number to be displayed in hgTrackUi."
 echo "Before you push the table, check the differences with compareWholeTable.csh hgFixed trackVersion"
 echo "See Wiki for more details: http://genomewiki.cse.ucsc.edu/genecats/index.php/Ensembl_QA"
 
 # make sure the date column has been updated
 echo "\n\n----------------------"
 echo "the dateReference column in the hgFixed.trackVersion table"
 echo "should say 'current' for your database (or all):"
 hgsql -Ne "SELECT db, dateReference FROM trackVersion WHERE version = $ver AND name = 'ensGene' ORDER BY db" hgFixed
 
 # check that the corresponding upstream MAF files have been updated
 echo "\n\n----------------------"
 echo "In conjunction with an Ensembl Gene update, some upstream MAF files need"
 echo "to be rebuilt. Specifically those for: ornAna1, fr2, gasAcu1, oryLat2"
 echo "Check for them here (look for new dates) there should be 3 for each db:"
 echo "hgwdev:/data/apache/htdocs-hgdownload/goldenPath/<db>/multiz*way/maf/ensGene.upstream?000.maf.gz"
 echo
 
 # clean up
 rm -f xxDbList$$ xxNotActive$$ xxNotOnBeta$$
 
 echo "\nthe end.\n"
 
 exit 0