735c3b225873336cbe95a3ad08aec5448f2b974c max Mon Jan 11 07:49:43 2016 -0800 major change to gbib update script, refs #16616 diff --git src/browserbox/root/updateBrowser.sh src/browserbox/root/updateBrowser.sh index 3de485a..1bf3466 100755 --- src/browserbox/root/updateBrowser.sh +++ src/browserbox/root/updateBrowser.sh @@ -1,254 +1,365 @@ #!/bin/bash -# update script on the box +# update script on the GBiB virtual machine # - updates itself and then run itself # - updates cgis, html and gbdb via rsync -# - updates hg.conf via wget # - patches the menu -# - hides conservation+retro -# - removes some searches if latency to UCSC is > 90msecs +# - calls hgMirror to hide some slow default tracks, e.g. conservation+retro # will not run if: # - not run as root # - if a script named updateBrowser.sh already is running # - any hgMirror jobs are running # - hgdownload is offline # - if a flagFile on hgDownload is not more recent than a local flag file +# - if the VirtualBox Guest property "gbibAutoUpdateOff" is set. To set it, run this on the host +# "VBoxManage guestproperty set browserbox gbibAutoUpdateOff" + +# To find out why the script did not run, use the command echo $? to show the +# exit code of the script: +# 1 - not root +# 2 - already running +# 3 - no internet connection +# 4 - hgMirror job is running +# 5 - not enough arguments +# 6 - virtualbox guest property is set # parameters: # - parameter "hgwdev": does not update itself, copies only the beta/alpha CGIs/htdocs from hgwdev # - parameter "notSelf": does not update itself and does not check flagfile +# this script is not using the bash options pipefail or errabort. +# In case something goes wrong it continues, this is intentional to +# avoid a virtual machine that can not update itself anymore + # rsync options: # l = preserve symlinks # t = preserve time # r = recurse # z = compress # v = verbose # h = human readable # u = skip if file is newer on receiver RSYNCOPTS="-ltrzvh" # rsync server for CGIs and html files RSYNCSRC="rsync://hgdownload.cse.ucsc.edu" RSYNCCGIBIN=cgi-bin RSYNCHTDOCS=htdocs UPDATEFLAG=http://hgdownload.cse.ucsc.edu/gbib/lastUpdate +LOGFILE=/var/log/gbibUpdates.log + +# make sure that apt-get never opens an interactive dialog +export DEBIAN_FRONTEND=noninteractive # help if [ "$1" == "-h" ] ; then echo "Without any options, this script checks if it can reach hgdownload and if " echo "new data has been added since the last run. It updates itself and runs the new copy." echo "The new copy rsyncs the CGIs/MysqlDbs/htdocs from hgdownload." echo "It rsyncs the gbib/push directory into / to update other files." echo "It finally repairs mysql tables, makes some trackDb changes, adds symlinks," echo "and chmods the directories." echo "Parameters:" echo "updateBrowser.sh notSelf - do not update the script itself. Do not check if data has been " echo " added to hgdownload since the last run" echo "updateBrowser.sh hgwdev - more info on how to update to alpha versions (used only by UCSC)" exit 0 fi # check if running as root if [ "$(id -u)" != "0" ] ; then echo "This script must be run as root" 1>&2 exit 1 fi # check if running already, 3 = the main script + its update + the subshell where this command is running RUNNING=`ps --no-headers -CupdateBrowser.sh | wc -l` if [ ${RUNNING} -gt 3 ] ; then #echo update already running exit 2 fi -# check if auto-updates were deactivated from the Vbox host via a property -if VBoxControl guestproperty get gbibAutoUpdateOff | grep -xq "Value: yes" ; then +# check if the VirtualBox guest addition kernel modules work and if yes, if auto-updates were +# deactivated from the Vbox host via a property +if modprobe vboxguest 2> /dev/null > /dev/null; then + if VBoxControl guestproperty get gbibAutoUpdateOff | grep -xq "Value: yes" 2> /dev/null ; then exit 6 + # exit only if no argument specified on command line = run from cronjob + if [ "$#" -eq 0 ] ; then + exit 6 + fi + fi +else + # only show this notice when run with a command line argument (=not from cron) + if [ "$#" -ne 0 ] ; then + echo - Info: GBiB not running under VirtualBox or VirtualBox Guest Utils are not working + fi fi -# check if we have internet +# check if we have internet, stop if not wget -q --tries=1 --timeout=10 --spider http://hgdownload.soe.ucsc.edu -O /dev/null if [ $? -ne 0 ] ; then exit 3 fi -# check flag if run with no parameter (=from cron) +# check flag file if run with no parameter (=from cron) if [ "$#" -eq 0 ] ; then # check a flag file to see if anything on hgdownload actually changed if /root/urlIsNotNewerThanFile $UPDATEFLAG /root/lastUpdateTime.flag then exit 0 fi fi +# keep a log of all output of this script and the date +echo --------------------------------- >> $LOGFILE +date >> $LOGFILE +echo --------------------------------- >> $LOGFILE +exec >> >(tee -a $LOGFILE) 2>&1 + # unless already calling self, update self and call self unless doing only cgis -if [ "$BASH_ARGV" != "notSelf" -a "$1" != "hgwdev" ] ; then +# self-updates are not done when suppressed with notSelf and also not in hgwdev-mode to allow testing of local updateBrowser.sh changes +# Internal sidenote: if you want hgwdev CGIs and also the current hgwdev update +# script, do a gbibCoreUpdateBeta+updateBrowser hgwdev + +# gbibCoreUpdateBeta ends with -Beta because it is used during beta time, to +# test the current dev update script The update script itself has only a +# two-stage release process, beta and final, as the alpha version of the script +# is on the GBiB of the developer itself. +# the file /root/gbibSkipUpdate allows to skip one single auto-update +if [[ ( "$BASH_ARGV" != "notSelf" && "$1" != "hgwdev" ) && ( ! -e /root/gbibSkipNextUpdate ) ]] ; then echo getting new update script # we got three VMs where updateBrowser.sh was 0 bytes, so doing download/move now wget http://hgdownload.soe.ucsc.edu/gbib/updateBrowser.sh -O /root/updateBrowser.sh.new -q && mv /root/updateBrowser.sh.new /root/updateBrowser.sh chmod a+x /root/updateBrowser.sh /root/updateBrowser.sh $1 notSelf exit 0 fi +rm -f /root/gbibSkipNextUpdate + # check if any hgMirror jobs are running right now # check if the group id file exists and also if any processes exist with this group id # note that the .pid actually contains a group id, not a process id if [ -f /tmp/lastJob.pid ] && [ "$(ps x -o pgid | grep $(cat /tmp/lastJob.pid) | wc -l)" != "0" ] ; then echo a hgMirror job is running right now, not updating exit 4 fi +# activate the apt repo 'main' and 'universe' so we can install external software +if ! apt-cache policy r-base | grep "Unable to locate" > /dev/null; then + if ! grep '^deb http://us.archive.ubuntu.com/ubuntu trusty main universe multiverse$' /etc/apt/sources.list > /dev/null; then + echo - Activating the main Ubuntu package repository + echo 'deb http://us.archive.ubuntu.com/ubuntu trusty main universe multiverse' >> /etc/apt/sources.list + fi +fi + +# activate daily automated security updates with automated reboots +# automated reboots are strange but probably better than to risk exploits +# see https://help.ubuntu.com/community/AutomaticSecurityUpdates +if apt-cache policy unattended-upgrades | grep "Installed: .none." > /dev/null; then + echo - Activating automated daily Ubuntu security updates + # from http://askubuntu.com/questions/203337/enabling-unattended-upgrades-from-a-shell-script + apt-get update + apt-get install -y unattended-upgrades update-notifier-common + # update package lists every day + echo 'APT::Periodic::Update-Package-Lists "1";' > /etc/apt/apt.conf.d/20auto-upgrades + # do the upgrade every day + echo 'APT::Periodic::Unattended-Upgrade "1";' >> /etc/apt/apt.conf.d/20auto-upgrades + # reboot if needed + echo 'Unattended-Upgrade::Automatic-Reboot "true";' >> /etc/apt/apt.conf.d/20auto-upgrades + # remove packages not used anymore + echo 'Unattended-Upgrade::Remove-Unused-Dependencies "true";' >> /etc/apt/apt.conf.d/20auto-upgrades + # and remove the downloaded tarballs at the end + echo 'APT::Periodic::AutocleanInterval "1";' >> /etc/apt/apt.conf.d/20auto-upgrades + /etc/init.d/unattended-upgrades restart +fi + +# unattended security upgrades take a while to start, better to force one right now and show the progress +# this may lead to an auto-reboot, so let's skip the auto-update of this script for this reboot +touch /root/gbibSkipNextUpdate +echo - Applying Ubuntu security updates +unattended-upgrade -v +rm -f /root/gbibSkipNextUpdate + +# The original GBiB image did not use the Ubuntu Virtualbox guest utils +# fix this now and switch to these +if apt-cache policy virtualbox-guest-dkms | grep "Installed: .none." > /dev/null; then + apt-get install -y linux-headers-generic + apt-get install -y virtualbox-guest-dkms + apt-get -y autoremove + /etc/init.d/vboxadd start + # during 2015, a directory /home/browser/bin got created in the official image and filled with a copy of the user tools + # remove this directory now to avoid confusion + shred -fzu -n1 /home/browser/bin/blat/* + rm -rf /home/browser/bin/blat + shred -fzu -n1 /home/browser/bin/* + rm -rf /home/browser/bin +fi + +# install R for the gtex tracks +if apt-cache policy r-base | grep "Installed: .none." > /dev/null; then + echo - Installing R + apt-get update + apt-get install -y r-base + apt-get -y autoremove +fi + echo -echo Updating the genome browser software via rsync: +echo - Updating the genome browser software via rsync: # CGI-BIN and HTDOCS: # the parameter "hgwdev" copies over only the beta/alpha CGIs from hgwdev if [ "$1" == "hgwdev" ] ; then # note the missing -u option to RSYNC: in hgwdev mode, we want to overwrite everything. # On a development machine, the developer might have touched a file # for testing. We want to make sure that all local files are overwritten by the # files on hgwdev RSYNCOPTS="-ltrzvh" user=$2 dirExt=$3 if [ "$user" == "" ]; then echo arguments: updateBrowser hgwdev hgwdevUsername cgiDirectoryExtension echo in alpha/beta mode you need to provide a username for the hgwdev login echo and a directory extension, the part after /usr/local/apache/cgi-bin-XXX echo The extension '"alpha"' is translated to '"no extension"' echo example '"updateBrowser hgwdev kent alpha"' echo example '"updateBrowser hgwdev hiram beta"' exit 5 fi if [ "$dirExt" == "alpha" ] ; then cgiExt="" htmlExt="" else cgiExt="-"$dirExt htmlExt="-"$dirExt fi RSYNCAPACHE="$RSYNCOPTS --delete" # remove a lot of clutter that accumulated in hgwdev's alpha cgi-bin dir if [ "$dirExt" == "alpha" ] ; then RSYNCAPACHE="$RSYNCAPACHE --exclude ENCODE/**.pdf --exclude *.gz --exclude *.bw --exclude *.bb --exclude *.bam --exclude goldenPath/**.pdf --exclude admin/** --exclude goldenPath/customTracks/** --exclude pubs/** --exclude ancestors/** --exclude training/** --exclude trash --exclude style-public/** --exclude js-public/** --exclude **/edw* --exclude images/mammalPsg/** --exclude **/encodeTestHub* --exclude favicon.ico --exclude folders --exclude ENCODE/** --exclude ENCODE/** --exclude Neandertal/** --exclude gbib/** --exclude generator/** --exclude js-*/** --exclude js/*/* --exclude .\* --exclude x86_64/* --exclude .xauth --exclude .hg.conf --exclude hgHeatmap* --exclude hg.conf --exclude 'hgt/**' --exclude admin/** --exclude images --exclude trash --exclude edw* --exclude visiGeneData/** --exclude crom_dir/ --exclude testp/ --exclude *.exe --exclude *.old --exclude *.tmp --exclude *.bak --exclude test* --exclude hg.conf* --exclude **/hgHeatmap* --exclude ~* --exclude Intronerator** --exclude hgText --exclude hgSubj --exclude gisaid* --exclude nt4.dir --exclude qaPush* --exclude docIdView --exclude ct/ --exclude *.bak --exclude hg.conf* --exclude gsid*/ --exclude *.private --exclude useCount --exclude ~* --exclude lssnp/ --exclude hg.conf.local" fi # remove things that are on hgwdev beta directories but not necessary on the gbib if [ "$dirExt" == "beta" ] ; then RSYNCAPACHE="$RSYNCAPACHE --exclude favicon*.ico --exclude hg.conf* --exclude ENCODE --exclude *.gz --exclude *.bw --exclude *.bb --exclude *.bam --exclude goldenPath/**.pdf --exclude admin --exclude goldenPath/customTracks --exclude pubs --exclude ancestors --exclude training --exclude trash --exclude .htaccess --exclude htdocs --exclude Neandertal --exclude RNA-img --exclude ebola --exclude encodeDCC --exclude evoFold --exclude geneExtra --exclude js-public --exclude style-public --exclude hgNearData --exclude visiGeneData --exclude visiGene" fi rsync $RSYNCAPACHE $user@hgwdev.soe.ucsc.edu:/usr/local/apache/htdocs${htmlExt}/ /usr/local/apache/htdocs/ rsync $RSYNCAPACHE $user@hgwdev.soe.ucsc.edu:/usr/local/apache/cgi-bin${cgiExt}/ /usr/local/apache/cgi-bin/ PUSHLOC=$user@hgwdev.soe.ucsc.edu:/usr/local/apache/htdocs/gbib/push/ # normal public updates from hgdownload are easier, not many excludes necessary else # update CGIs - echo updating CGIs... - rsync --delete $RSYNCOPTS $RSYNCSRC/$RSYNCCGIBIN /usr/local/apache/cgi-bin/ --exclude=hg.conf --exclude=hg.conf.local --exclude edw* --exclude *private --exclude hgNearData --exclude visiGeneData --exclude Neandertal + echo - Updating CGIs... + rsync --delete -u $RSYNCOPTS $RSYNCSRC/$RSYNCCGIBIN /usr/local/apache/cgi-bin/ --exclude=hg.conf --exclude=hg.conf.local --exclude edw* --exclude *private --exclude hgNearData --exclude visiGeneData --exclude Neandertal - echo updating HTML files... - rsync --delete $RSYNCOPTS $RSYNCSRC/$RSYNCHTDOCS/ /usr/local/apache/htdocs/ --include **/customTracks/*.html --exclude ENCODE/ --exclude *.bam --exclude *.bb --exclude */*.bw --exclude */*.gz --exclude favicon.ico --exclude folders --exclude ancestors --exclude admin --exclude goldenPath/customTracks --exclude images/mammalPsg --exclude style/gbib.css --exclude images/title.jpg --exclude images/homeIconSprite.png --exclude goldenPath/**.pdf --exclude training + echo - Updating HTML files... + rsync --delete -u $RSYNCOPTS $RSYNCSRC/$RSYNCHTDOCS/ /usr/local/apache/htdocs/ --include **/customTracks/*.html --exclude ENCODE/ --exclude *.bam --exclude *.bb --exclude */*.bw --exclude */*.gz --exclude favicon.ico --exclude folders --exclude ancestors --exclude admin --exclude goldenPath/customTracks --exclude images/mammalPsg --exclude style/gbib.css --exclude images/title.jpg --exclude images/homeIconSprite.png --exclude goldenPath/**.pdf --exclude training PUSHLOC=hgdownload.cse.ucsc.edu::gbib/push/ fi chown -R www-data.www-data /usr/local/apache/cgi-bin/* chown -R www-data.www-data /usr/local/apache/htdocs/ chmod -R a+r /usr/local/apache/htdocs if [ "$1" != "hgwdev" ] ; then - echo updating GBDB files... + echo - Updating GBDB files... rsync $RSYNCOPTS --existing rsync://hgdownload.cse.ucsc.edu/gbdb/ /data/gbdb/ chown -R www-data.www-data /data/gbdb/ fi -echo pulling other files +echo - Pulling other files # make sure we never overwrite the hg.conf.local file rsync $RSYNCOPTS $PUSHLOC / --exclude=hg.conf.local if [ "$1" != "hgwdev" ] ; then echo updating MYSQL files - browser will not work during the MYSQL update # inspired by http://forums.mysql.com/read.php?35,45577,47063#msg-47063 # it doesn't work if I use two mysql invocations, as 'flush tables with read lock' # is only valid as long as the session is open # so I use the SYSTEM command echo "FLUSH TABLES WITH READ LOCK; SYSTEM rsync $RSYNCOPTS --existing rsync://hgdownload.cse.ucsc.edu/mysql/ /data/mysql/; SYSTEM chown -R mysql.mysql /data/mysql/; UNLOCK TABLES;" | mysql echo updating hgcentral database, make sure to always overwrite echo "FLUSH TABLES WITH READ LOCK; SYSTEM rsync -vrz --existing rsync://hgdownload.cse.ucsc.edu/mysql/hgcentral/ /data/mysql/hgcentral/; SYSTEM chown -R mysql.mysql /data/mysql/hgcentral; UNLOCK TABLES;" | mysql # update blat servers mysql hgcentral -e 'UPDATE blatServers SET host=CONCAT(host,".cse.ucsc.edu") WHERE host not like "%ucsc.edu"' # the box does not officially support the HAL right now, remove the ecoli hubs mysql hgcentral -e 'delete from hubPublic where hubUrl like "%nknguyen%"' fi -echo patching menu +echo - Adapting the menu cp /usr/local/apache/htdocs/inc/globalNavBar.inc /tmp/navbar.inc # remove mirrors and downloads menu sed -i '/<li class="menuparent" id="mirrors">/,/^<\/li>$/d' /tmp/navbar.inc sed -i '/<li class="menuparent" id="downloads">/,/^<\/li>$/d' /tmp/navbar.inc -cat /tmp/navbar.inc | grep -v hgNear | grep -v hgVisiGene | sed -e '/hgLiftOver/a <li><a href="../cgi-bin/hgMirror">Mirror tracks</a></li>' | sed '/genomewiki/a <li><a href="../goldenPath/help/gbib.html">Help on GBiB</a></li>' | uniq > /usr/local/apache/htdocs/inc/globalNavBar.inc +# adding the link to the mirror tracks tool +sed -i '/hgLiftOver/a <li><a href="../cgi-bin/hgMirror">Mirror tracks</a></li>' /tmp/navbar.inc +# add a link to the gbib shared data folder +sed -i '/Track Hubs/a <li><a href="http:\/\/127.0.0.1:1234\/folders\/">GBiB Shared Data Folder<\/a><\/li>' /tmp/navbar.inc +# adding a link to the GBIB help pages +sed -i '/genomewiki/a <li><a href="../goldenPath/help/gbib.html">Help on GBiB</a></li>' /tmp/navbar.inc +cat /tmp/navbar.inc | grep -v hgNear | grep -v hgVisiGene | uniq > /usr/local/apache/htdocs/inc/globalNavBar.inc rm /tmp/navbar.inc # patch left side menu: # remove encode, neandertal, galaxy, visiGene, Downloads, cancer browser, microbial, mirrors, jobs for i in hgNear ENCODE Neandertal galaxy VisiGene hgdownload genome-cancer microbes mirror jobs; do sed -i '/<A CLASS="leftbar" .*'$i'.*$/,/<HR>$/d' /usr/local/apache/htdocs/index.html done # patch main pages sed -i 's/About the UCSC Genome Bioinformatics Site/About the UCSC Genome Browser in a Box/g' /usr/local/apache/htdocs/indexIntro.html perl -0777 -pi -e 's/It also.+ provides.+> project. //s' /usr/local/apache/htdocs/indexIntro.html perl -0777 -pi -e 's/Program-driven.+ per day.//s' /usr/local/apache/htdocs/indexInfo.html # patch contacts page sed -i 's/......cgi-bin\/hgUserSuggestion/http:\/\/genome.ucsc.edu\/cgi-bin\/hgUserSuggestion/' /usr/local/apache/htdocs/contacts.html # remove visigene from top menu sed -i '/hgVisiGene/d' /usr/local/apache/htdocs/inc/home.topbar.html # maybe not needed, but running anyway chown www-data.www-data /usr/local/apache/htdocs/inc/globalNavBar.inc # make sure we have the right symlink to /media sudo ln -sfT /media /usr/local/apache/htdocs/folders sudo ln -sfT /data/trash /usr/local/apache/htdocs/trash # make sure this tableList is not there, it can break the box # hgcentral on hgdownload has tables missing: those with users and passwords mysql hgcentral -e 'drop table if exists tableList' # hide the really big tracks mysql hg19 -e 'update trackDb set visibility=0 where tableName like "cons%way"' mysql hg19 -e 'update trackDb set visibility=0 where tableName like "ucscRetroAli%"' # temporary fix for hgdownload problem, Oct 2014 ls /data/mysql/eboVir3 > /dev/null 2> /dev/null && mysql eboVir3 -e 'drop table if exists history' # rsync tables on hgdownload are sometimes in a crashed state echo checking mysql tables #sudo myisamchk --force --silent --fast /data/mysql/hg19/*.MYI /data/mysql/hgcentral/*.MYI /data/mysql/hgFixed/*.MYI 2> /dev/null mysqlcheck --all-databases --auto-repair --quick --fast --silent #LATENCY=`ping genome.ucsc.edu -n -c1 -q | grep rtt | cut -d' ' -f4 | cut -d/ -f2 | cut -d. -f1` #if [ "$LATENCY" -gt "90" ]; then #echo making low-latency changes /usr/local/apache/cgi-bin/hgMirror postRsyncUpdates # the local-only hg.conf settings file has to exist as it is included from hg.conf # In case it got deleted due to some error, recreate it if [ ! -f /usr/local/apache/cgi-bin/hg.conf.local ] ; then echo Creating /usr/local/apache/cgi-bin/hg.conf.local echo allowHgMirror=true > /usr/local/apache/cgi-bin/hg.conf.local fi touch /root/lastUpdateTime.flag -echo update done. +echo - GBiB update done cat /etc/issue | tr -s '\n'