4b666b1c585cd29776c86cac527f26037b671383 steve Thu Aug 8 10:51:58 2013 -0700 Made changes suggested in code review to limit loading of .hg.conf.beta to a subshell (redmine #11481) diff --git src/utils/qa/qaEnsGenes.csh src/utils/qa/qaEnsGenes.csh index 342cbe5..af8e644 100755 --- src/utils/qa/qaEnsGenes.csh +++ src/utils/qa/qaEnsGenes.csh @@ -1,256 +1,255 @@ #!/bin/tcsh source `which qaConfig.csh` ############################################### # # 03-18-08 # Ann Zweig # # Runs through the usual checks for Ensembl # Gene updates. # ############################################### set runOn='' set ver=0 set dbs='' set db='' set betaList='' if ($#argv != 2 ) then echo echo " runs test suite for ensGene track update" echo " run this script before you push the new tables to beta" echo " (makes lots of files: run in junk directory)" echo " (it's best to direct output and errors to a file: '>&')" echo echo " usage: ensGeneVersionNumber (db | all)" echo " ensGeneVersionNumber = Ensembl's version, e.g. 49" echo " choose one database (db) or all dbs with ensGenes tracks (all)" echo exit 1 else set ver=$argv[1] set runOn=$argv[2] endif # run only from hgwdev if ( "$HOST" != "hgwdev" ) then echo "\nERROR: you must run this script on hgwdev!\n" exit 1 endif # check input if ( $ver <= 45 || $ver >= 100 ) then echo "\nERROR: you must enter a valid version number!\n" exit 1 endif # get rid of files, if they are around rm -f xxDbList$$ xxNotActive$$ xxNotOnBeta$$ # figure out which assemblies already have an ensGene track on beta set betaList=`getAssemblies.csh ensGene | egrep -v 'get' | egrep -v 'ensGene'` # figure out which databases we're running it on if ( 'all' == $runOn ) then set dbs=`hgsql -Ne "SELECT db FROM trackVersion WHERE version = '$ver' \ and name='ensGene' ORDER BY db" hgFixed | sort -u` echo "\nThe following databases were updated on hgwdev to ensGenes v$ver :" echo "$dbs\n" if ( "" == "$dbs" ) then echo "\nERROR: there is no update available for version number $ver\n" exit 1 else # updates for this ensGene version exist foreach db ($dbs) set onBeta=`hgsql -h $sqlbeta -Ne "SELECT name FROM dbDb \ WHERE name = '$db' AND active = 1" hgcentralbeta` if ( "" == "$onBeta" ) then echo $db >> xxNotActive$$ else set hasTrack='' set hasTrack=`echo $betaList | egrep -wo $db` if ( "$db" != "$hasTrack" ) then echo $db >> xxNotOnBeta$$ echo $db >> xxDbList$$ else echo $db >> xxDbList$$ endif endif end # print out all results if ( -e xxNotActive$$ ) then echo "\nOf that list, the following databases are not active on beta" echo "so the tests in this script will not be run on them:" cat xxNotActive$$ endif if ( -e xxNotOnBeta$$ ) then echo "\nOf that list, the following databases do not have an ensGenes track" echo "on hgwbeta (however, the tests in this script will still be run on them)." echo "You might consider releasing an ensGenes track for these databases:" cat xxNotOnBeta$$ endif set dbs=`cat xxDbList$$` endif else # running on one database only (don't check, just run) echo "\nRunning script for Ensebml Genes v$ver on this assembly:\n" set dbs=$runOn echo $dbs endif # a huge loop through all databases foreach db ($dbs) echo "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo "~~~~~~~~~ $db ~~~~~~~~~~~~" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" # find out if this is a new Ensembl Genes track (or an update) set ensTrack=`echo $betaList | egrep -wo $db` echo "\n\n----------------------" echo "compare new (dev) and old (beta) ens* tables" echo "this shows the counts of rows unique to dev, unique to beta, and" echo "present on both. you should be suspicious if there are big differences" compareWholeColumn.csh $db ensGene name compareWholeColumn.csh $db ensPep name compareWholeColumn.csh $db ensGtp transcript echo echo "\n\n----------------------" echo "check a few of the new additions to the ensGene table" echo "(be sure to click all the way out to Ensembl website)" echo "\ncheck these in $db browser on hgwdev:" head -2 $db.ensGene.name.devOnly tail -2 $db.ensGene.name.devOnly # only do this test if the ensGene track already exists on beta if ( $db == $ensTrack ) then echo "\n\n----------------------" echo "check a few of the deleted items from the ensGene table" echo "(make sure they have also been dropped from Ensembl website)" echo "\ncheck these in $db browser on hgwbeta:" head -2 $db.ensGene.name.betaOnly tail -2 $db.ensGene.name.betaOnly endif echo "\n\n----------------------" echo "these are full sets of corresponding rows from the three tables:" echo "ensGene <-> ensPep <-> ensGtp on hgwdev" echo "\ncheck these two genes (and their peptides) on hgwdev for '$db':" hgsql -e "SELECT * FROM ensGene, ensPep, ensGtp WHERE \ ensGene.name = ensPep.name AND ensGene.name = ensGtp.transcript LIMIT 2\G" $db echo "\n\n----------------------" echo "run genePredCheck on the ensGene table. if there a failure here," echo "then something is seriously wrong with the ensGene table." echo "MarkD can help you figure out exactly what's wrong." echo "\ngenePredCheck results for $db.ensGene on hgwdev:" genePredCheck -db=$db ensGene echo "\n\n----------------------" echo "find out which chroms the genes are on (for both dev and beta)." echo "look for unusually small or large numbers here (or big differences)." # don't run this on scaffold assemblies set numChroms=`hgsql -Ne "SELECT COUNT(*) FROM chromInfo" $db` if ( $numChroms < 100 ) then if ( $db == $ensTrack ) then countPerChrom.csh $db ensGene $db hgwbeta else countPerChrom.csh $db ensGene $db endif else echo "$db is a scaffold assembly: skipping countPerChrom" endif echo echo "\n\n----------------------" echo "featureBits for new (dev) and old (beta) tables" echo "\nfeatureBits $db ensGene (on hgwdev):" featureBits $db ensGene echo "featureBits $db -countGaps ensGene (on hgwdev):" featureBits $db -countGaps ensGene echo "featureBits $db -countGaps ensGene gap (on hgwdev):" featureBits $db -countGaps ensGene gap if ( $db == $ensTrack ) then - setenv HGDB_CONF $HOME/.hg.conf.beta echo "\nfeatureBits $db ensGene (on hgwbeta):" - featureBits $db ensGene + (setenv HGDB_CONF $HOME/.hg.conf.beta; featureBits $db ensGene) echo "featureBits $db -countGaps ensGene (on hgwbeta):" - featureBits $db -countGaps ensGene + (setenv HGDB_CONF $HOME/.hg.conf.beta; featureBits $db -countGaps ensGene) echo "featureBits $db -countGaps ensGene gap (on hgwbeta):" - featureBits $db -countGaps ensGene gap + (setenv HGDB_CONF $HOME/.hg.conf.beta; featureBits $db -countGaps ensGene gap) endif echo echo "\n\n----------------------" echo "check that the ensGene track is sorted by chrom:" echo "positionalTblCheck -verbose=2 $db ensGene\n" positionalTblCheck -verbose=2 $db ensGene echo echo "\n\n----------------------" echo "run Joiner Check. look for errors in the following two lines only:" echo "ensPep.name and ensGtp.transcript" echo "\nrunning joinerCheck for $db on ensemblTranscriptId:" joinerCheck -keys -database=$db -identifier=ensemblTranscriptId ~/kent/src/hg/makeDb/schema/all.joiner echo "\n\n----------------------" echo "ensGene names typically begin with 'ENS'. if there is a number other" echo "than 0, then there are ensGenes that do not begin with 'ENS'." echo "check them out on the Ensembl website." echo "\nnumber of ensGenes that do not begin with 'ENS' in '$db':" set num=`hgsql -Ne "SELECT COUNT(*) FROM ensGene WHERE name \ NOT LIKE 'ENS%'" $db` echo $num if ( 0 != $num ) then echo "instead of 'ENS', the ensGenes in this table look like this:" hgsql -Ne "SELECT name FROM ensGene WHERE name NOT LIKE 'ENS%' LIMIT 3" $db endif echo "\n\n----------------------" echo "A few tracks have another table or two associated with them. For" echo "example, when Ensembl uses different scaffold names than we do, there" echo "should be a translation table called: ensembleGeneScaffold. This" echo "table supports a separate track called: Ensembl Assembly." echo "Assemblies with a UCSC Gene track should also have a table called:" echo "knownToEnsembl. Here's what this assembly has:" echo hgsql -Ne "SHOW TABLES LIKE 'ensemblGeneScaffold'" $db hgsql -Ne "SHOW TABLES LIKE 'knownToEnsembl'" $db end # huge loop through each database # remember the hgFixed.trackVersion table echo "\n\n----------------------" echo "Don't forget to also push (to beta and then to the RR)" echo "the trackVersion table in the hgFixed database." echo "There are rows to allow the correct version number to be displayed in hgTrackUi." echo "Before you push the table, check the differences with compareWholeTable.csh hgFixed trackVersion" echo "See Wiki for more details: http://genomewiki.cse.ucsc.edu/genecats/index.php/Ensembl_QA" # make sure the date column has been updated echo "\n\n----------------------" echo "the dateReference column in the hgFixed.trackVersion table" echo "should say 'current' for your database (or all):" hgsql -Ne "SELECT db, dateReference FROM trackVersion WHERE version = $ver AND name = 'ensGene' ORDER BY db" hgFixed # check that the corresponding upstream MAF files have been updated echo "\n\n----------------------" echo "In conjunction with an Ensembl Gene update, some upstream MAF files need" echo "to be rebuilt. Specifically those for: ornAna1, fr2, gasAcu1, oryLat2" echo "Check for them here (look for new dates) there should be 3 for each db:" echo "hgwdev:/data/apache/htdocs-hgdownload/goldenPath/<db>/multiz*way/maf/ensGene.upstream?000.maf.gz" echo # clean up rm -f xxDbList$$ xxNotActive$$ xxNotOnBeta$$ echo "\nthe end.\n" exit 0