735c3b225873336cbe95a3ad08aec5448f2b974c
max
  Mon Jan 11 07:49:43 2016 -0800
major change to gbib update script, refs #16616

diff --git src/browserbox/root/updateBrowser.sh src/browserbox/root/updateBrowser.sh
index 3de485a..1bf3466 100755
--- src/browserbox/root/updateBrowser.sh
+++ src/browserbox/root/updateBrowser.sh
@@ -1,119 +1,224 @@
 #!/bin/bash
 
-# update script on the box
+# update script on the GBiB virtual machine
 # - updates itself and then run itself
 # - updates cgis, html and gbdb via rsync 
-# - updates hg.conf via wget
 # - patches the menu
-# - hides conservation+retro 
-# - removes some searches if latency to UCSC is > 90msecs
+# - calls hgMirror to hide some slow default tracks, e.g. conservation+retro 
 
 # will not run if:
 # - not run as root
 # - if a script named updateBrowser.sh already is running
 # - any hgMirror jobs are running
 # - hgdownload is offline
 # - if a flagFile on hgDownload is not more recent than a local flag file
+# - if the VirtualBox Guest property "gbibAutoUpdateOff" is set. To set it, run this on the host 
+#   "VBoxManage guestproperty set browserbox gbibAutoUpdateOff"
+
+# To find out why the script did not run, use the command echo $? to show the
+# exit code of the script:
+# 1 - not root
+# 2 - already running
+# 3 - no internet connection
+# 4 - hgMirror job is running
+# 5 - not enough arguments
+# 6 - virtualbox guest property is set
 
 # parameters:
 # - parameter "hgwdev": does not update itself, copies only the beta/alpha CGIs/htdocs from hgwdev
 # - parameter "notSelf": does not update itself and does not check flagfile
 
+# this script is not using the bash options pipefail or errabort.
+# In case something goes wrong it continues, this is intentional to 
+# avoid a virtual machine that can not update itself anymore
+
 # rsync options:
 # l = preserve symlinks
 # t = preserve time
 # r = recurse
 # z = compress
 # v = verbose
 # h = human readable
 # u = skip if file is newer on receiver
 RSYNCOPTS="-ltrzvh"
 # rsync server for CGIs and html files
 RSYNCSRC="rsync://hgdownload.cse.ucsc.edu"
 RSYNCCGIBIN=cgi-bin
 RSYNCHTDOCS=htdocs
 UPDATEFLAG=http://hgdownload.cse.ucsc.edu/gbib/lastUpdate
+LOGFILE=/var/log/gbibUpdates.log
+
+# make sure that apt-get never opens an interactive dialog
+export DEBIAN_FRONTEND=noninteractive 
 
 # help
 if [ "$1" == "-h" ] ; then
    echo "Without any options, this script checks if it can reach hgdownload and if "
    echo "new data has been added since the last run. It updates itself and runs the new copy."
    echo "The new copy rsyncs the CGIs/MysqlDbs/htdocs from hgdownload."
    echo "It rsyncs the gbib/push directory into / to update other files."
    echo "It finally repairs mysql tables, makes some trackDb changes, adds symlinks,"
    echo "and chmods the directories."
    echo "Parameters:"
    echo "updateBrowser.sh notSelf - do not update the script itself. Do not check if data has been "
    echo "                           added to hgdownload since the last run"
    echo "updateBrowser.sh hgwdev - more info on how to update to alpha versions (used only by UCSC)"
    exit 0
 fi
 
 # check if running as root
 if [ "$(id -u)" != "0" ] ; then
    echo "This script must be run as root" 1>&2
    exit 1
 fi
 
 # check if running already, 3 = the main script + its update + the subshell where this command is running
 RUNNING=`ps --no-headers -CupdateBrowser.sh | wc -l`
 if [ ${RUNNING} -gt 3 ] ; then
     #echo update already running
     exit 2
 fi
 	
-# check if auto-updates were deactivated from the Vbox host via a property
-if VBoxControl guestproperty get gbibAutoUpdateOff | grep -xq "Value: yes" ; then
+# check if the VirtualBox guest addition kernel modules work and if yes, if auto-updates were 
+# deactivated from the Vbox host via a property
+if modprobe vboxguest 2> /dev/null > /dev/null; then
+   if VBoxControl guestproperty get gbibAutoUpdateOff | grep -xq "Value: yes" 2> /dev/null ; then
        exit 6
+   # exit only if no argument specified on command line = run from cronjob
+       if [ "$#" -eq 0 ] ; then
+           exit 6
+       fi
+   fi
+else
+   # only show this notice when run with a command line argument (=not from cron)
+   if [ "$#" -ne 0 ] ; then
+      echo - Info: GBiB not running under VirtualBox or VirtualBox Guest Utils are not working
+   fi
 fi
 
-# check if we have internet
+# check if we have internet, stop if not
 wget -q --tries=1 --timeout=10 --spider http://hgdownload.soe.ucsc.edu -O /dev/null
 if [ $? -ne 0 ] ; then
     exit 3
 fi
 
-# check flag if run with no parameter (=from cron)
+# check flag file if run with no parameter (=from cron)
 if [ "$#" -eq 0 ] ; then
    # check a flag file to see if anything on hgdownload actually changed
    if /root/urlIsNotNewerThanFile $UPDATEFLAG /root/lastUpdateTime.flag
    then
        exit 0
    fi
 fi
 
+# keep a log of all output of this script and the date
+echo --------------------------------- >> $LOGFILE
+date  >> $LOGFILE
+echo --------------------------------- >> $LOGFILE
+exec >> >(tee -a $LOGFILE) 2>&1
+
 # unless already calling self, update self and call self unless doing only cgis
-if [ "$BASH_ARGV" != "notSelf" -a "$1" != "hgwdev" ] ; then
+# self-updates are not done when suppressed with notSelf and also not in hgwdev-mode to allow testing of local updateBrowser.sh changes
+# Internal sidenote: if you want hgwdev CGIs and also the current hgwdev update
+# script, do a gbibCoreUpdateBeta+updateBrowser hgwdev
+
+# gbibCoreUpdateBeta ends with -Beta because it is used during beta time, to
+# test the current dev update script The update script itself has only a
+# two-stage release process, beta and final, as the alpha version of the script
+# is on the GBiB of the developer itself.
+# the file /root/gbibSkipUpdate allows to skip one single auto-update
+if [[ ( "$BASH_ARGV" != "notSelf" && "$1" != "hgwdev" ) && ( ! -e /root/gbibSkipNextUpdate ) ]] ; then
     echo getting new update script
     # we got three VMs where updateBrowser.sh was 0 bytes, so doing download/move now
     wget http://hgdownload.soe.ucsc.edu/gbib/updateBrowser.sh -O /root/updateBrowser.sh.new -q && mv /root/updateBrowser.sh.new /root/updateBrowser.sh
     chmod a+x /root/updateBrowser.sh
     /root/updateBrowser.sh $1 notSelf
     exit 0
 fi
 
+rm -f /root/gbibSkipNextUpdate
+
 # check if any hgMirror jobs are running right now
 # check if the group id file exists and also if any processes exist with this group id
 # note that the .pid actually contains a group id, not a process id
 if [ -f /tmp/lastJob.pid ] && [ "$(ps x -o pgid | grep $(cat /tmp/lastJob.pid) | wc -l)" != "0" ] ; then
     echo a hgMirror job is running right now, not updating
     exit 4
 fi
 	
+# activate the apt repo 'main' and 'universe' so we can install external software
+if ! apt-cache policy r-base | grep "Unable to locate" > /dev/null; then
+   if ! grep '^deb http://us.archive.ubuntu.com/ubuntu trusty main universe multiverse$' /etc/apt/sources.list > /dev/null; then
+       echo - Activating the main Ubuntu package repository
+       echo 'deb http://us.archive.ubuntu.com/ubuntu trusty main universe multiverse' >> /etc/apt/sources.list
+   fi
+fi
+
+# activate daily automated security updates with automated reboots
+# automated reboots are strange but probably better than to risk exploits
+# see https://help.ubuntu.com/community/AutomaticSecurityUpdates
+if apt-cache policy unattended-upgrades | grep "Installed: .none." > /dev/null; then
+   echo - Activating automated daily Ubuntu security updates
+   # from http://askubuntu.com/questions/203337/enabling-unattended-upgrades-from-a-shell-script
+   apt-get update
+   apt-get install -y unattended-upgrades update-notifier-common
+   # update package lists every day
+   echo 'APT::Periodic::Update-Package-Lists "1";' > /etc/apt/apt.conf.d/20auto-upgrades
+   # do the upgrade every day
+   echo 'APT::Periodic::Unattended-Upgrade "1";' >> /etc/apt/apt.conf.d/20auto-upgrades
+   # reboot if needed
+   echo 'Unattended-Upgrade::Automatic-Reboot "true";' >> /etc/apt/apt.conf.d/20auto-upgrades
+   # remove packages not used anymore
+   echo 'Unattended-Upgrade::Remove-Unused-Dependencies "true";' >> /etc/apt/apt.conf.d/20auto-upgrades
+   # and remove the downloaded tarballs at the end
+   echo 'APT::Periodic::AutocleanInterval "1";' >> /etc/apt/apt.conf.d/20auto-upgrades
+   /etc/init.d/unattended-upgrades restart
+fi
+   
+# unattended security upgrades take a while to start, better to force one right now and show the progress
+# this may lead to an auto-reboot, so let's skip the auto-update of this script for this reboot
+touch /root/gbibSkipNextUpdate
+echo - Applying Ubuntu security updates
+unattended-upgrade -v
+rm -f /root/gbibSkipNextUpdate
+
+# The original GBiB image did not use the Ubuntu Virtualbox guest utils
+# fix this now and switch to these
+if apt-cache policy virtualbox-guest-dkms | grep "Installed: .none." > /dev/null; then
+    apt-get install -y linux-headers-generic 
+    apt-get install -y virtualbox-guest-dkms
+    apt-get -y autoremove
+    /etc/init.d/vboxadd start
+    # during 2015, a directory /home/browser/bin got created in the official image and filled with a copy of the user tools
+    # remove this directory now to avoid confusion
+    shred -fzu -n1 /home/browser/bin/blat/*
+    rm -rf /home/browser/bin/blat
+    shred -fzu -n1 /home/browser/bin/*
+    rm -rf /home/browser/bin
+fi
+
+# install R for the gtex tracks
+if apt-cache policy r-base | grep "Installed: .none." > /dev/null; then
+   echo - Installing R
+   apt-get update
+   apt-get install -y r-base
+   apt-get -y autoremove
+fi
+
 echo
-echo Updating the genome browser software via rsync:
+echo - Updating the genome browser software via rsync:
 
 # CGI-BIN and HTDOCS:
 # the parameter "hgwdev" copies over only the beta/alpha CGIs from hgwdev
 if [ "$1" == "hgwdev" ] ; then
     # note the missing -u option to RSYNC: in hgwdev mode, we want to overwrite everything.
     # On a development machine, the developer might have touched a file
     # for testing. We want to make sure that all local files are overwritten by the 
     # files on hgwdev
     RSYNCOPTS="-ltrzvh"
     user=$2
     dirExt=$3
 
     if [ "$user" == "" ]; then
         echo arguments: updateBrowser hgwdev hgwdevUsername cgiDirectoryExtension 
         echo in alpha/beta mode you need to provide a username for the hgwdev login
@@ -140,75 +245,81 @@
     fi
   
     # remove things that are on hgwdev beta directories but not necessary on the gbib
     if [ "$dirExt" == "beta" ] ; then
       RSYNCAPACHE="$RSYNCAPACHE --exclude favicon*.ico --exclude hg.conf* --exclude ENCODE --exclude *.gz --exclude *.bw --exclude *.bb --exclude *.bam --exclude goldenPath/**.pdf --exclude admin --exclude goldenPath/customTracks --exclude pubs --exclude ancestors --exclude training --exclude trash --exclude .htaccess --exclude htdocs --exclude Neandertal --exclude RNA-img --exclude ebola --exclude encodeDCC --exclude evoFold --exclude geneExtra --exclude js-public --exclude style-public --exclude hgNearData --exclude visiGeneData --exclude visiGene"
     fi
 
     rsync $RSYNCAPACHE $user@hgwdev.soe.ucsc.edu:/usr/local/apache/htdocs${htmlExt}/ /usr/local/apache/htdocs/
     rsync $RSYNCAPACHE $user@hgwdev.soe.ucsc.edu:/usr/local/apache/cgi-bin${cgiExt}/ /usr/local/apache/cgi-bin/
 
     PUSHLOC=$user@hgwdev.soe.ucsc.edu:/usr/local/apache/htdocs/gbib/push/
 
 # normal public updates from hgdownload are easier, not many excludes necessary
 else
     # update CGIs
-    echo updating CGIs...
-    rsync --delete $RSYNCOPTS $RSYNCSRC/$RSYNCCGIBIN /usr/local/apache/cgi-bin/ --exclude=hg.conf --exclude=hg.conf.local --exclude edw* --exclude *private --exclude hgNearData --exclude visiGeneData --exclude Neandertal 
+    echo - Updating CGIs...
+    rsync --delete -u $RSYNCOPTS $RSYNCSRC/$RSYNCCGIBIN /usr/local/apache/cgi-bin/ --exclude=hg.conf --exclude=hg.conf.local --exclude edw* --exclude *private --exclude hgNearData --exclude visiGeneData --exclude Neandertal 
 
-    echo updating HTML files...
-    rsync --delete $RSYNCOPTS $RSYNCSRC/$RSYNCHTDOCS/ /usr/local/apache/htdocs/ --include **/customTracks/*.html --exclude ENCODE/ --exclude *.bam --exclude *.bb --exclude */*.bw --exclude */*.gz --exclude favicon.ico --exclude folders --exclude ancestors --exclude admin --exclude goldenPath/customTracks --exclude images/mammalPsg --exclude style/gbib.css --exclude images/title.jpg --exclude images/homeIconSprite.png --exclude goldenPath/**.pdf --exclude training
+    echo - Updating HTML files...
+    rsync --delete -u $RSYNCOPTS $RSYNCSRC/$RSYNCHTDOCS/ /usr/local/apache/htdocs/ --include **/customTracks/*.html --exclude ENCODE/ --exclude *.bam --exclude *.bb --exclude */*.bw --exclude */*.gz --exclude favicon.ico --exclude folders --exclude ancestors --exclude admin --exclude goldenPath/customTracks --exclude images/mammalPsg --exclude style/gbib.css --exclude images/title.jpg --exclude images/homeIconSprite.png --exclude goldenPath/**.pdf --exclude training
 
     PUSHLOC=hgdownload.cse.ucsc.edu::gbib/push/
 fi
 
 chown -R www-data.www-data /usr/local/apache/cgi-bin/*
 chown -R www-data.www-data /usr/local/apache/htdocs/
 chmod -R a+r /usr/local/apache/htdocs
 
 if [ "$1" != "hgwdev" ] ; then
-  echo updating GBDB files...
+  echo - Updating GBDB files...
   rsync $RSYNCOPTS --existing rsync://hgdownload.cse.ucsc.edu/gbdb/ /data/gbdb/
   chown -R www-data.www-data /data/gbdb/
 fi
 
-echo pulling other files
+echo - Pulling other files
 # make sure we never overwrite the hg.conf.local file
 rsync $RSYNCOPTS $PUSHLOC / --exclude=hg.conf.local
 
 if [ "$1" != "hgwdev" ] ; then
   echo updating MYSQL files - browser will not work during the MYSQL update
   # inspired by http://forums.mysql.com/read.php?35,45577,47063#msg-47063
   # it doesn't work if I use two mysql invocations, as 'flush tables with read lock'
   # is only valid as long as the session is open
   # so I use the SYSTEM command
   echo "FLUSH TABLES WITH READ LOCK; SYSTEM rsync $RSYNCOPTS --existing rsync://hgdownload.cse.ucsc.edu/mysql/ /data/mysql/; SYSTEM chown -R mysql.mysql /data/mysql/; UNLOCK TABLES;" | mysql
   
   echo updating hgcentral database, make sure to always overwrite
   echo "FLUSH TABLES WITH READ LOCK; SYSTEM rsync -vrz --existing rsync://hgdownload.cse.ucsc.edu/mysql/hgcentral/ /data/mysql/hgcentral/; SYSTEM chown -R mysql.mysql /data/mysql/hgcentral; UNLOCK TABLES;" | mysql
   # update blat servers
   mysql hgcentral -e 'UPDATE blatServers SET host=CONCAT(host,".cse.ucsc.edu") WHERE host not like "%ucsc.edu"'
   # the box does not officially support the HAL right now, remove the ecoli hubs
   mysql hgcentral -e 'delete from hubPublic where hubUrl like "%nknguyen%"'
 fi
 
-echo patching menu 
+echo - Adapting the menu 
 cp /usr/local/apache/htdocs/inc/globalNavBar.inc /tmp/navbar.inc
 # remove mirrors and downloads menu
 sed -i '/<li class="menuparent" id="mirrors">/,/^<\/li>$/d' /tmp/navbar.inc 
 sed -i '/<li class="menuparent" id="downloads">/,/^<\/li>$/d' /tmp/navbar.inc 
-cat /tmp/navbar.inc | grep -v hgNear | grep -v hgVisiGene | sed -e '/hgLiftOver/a <li><a href="../cgi-bin/hgMirror">Mirror tracks</a></li>' | sed '/genomewiki/a <li><a href="../goldenPath/help/gbib.html">Help on GBiB</a></li>' | uniq > /usr/local/apache/htdocs/inc/globalNavBar.inc 
+# adding the link to the mirror tracks tool
+sed -i '/hgLiftOver/a <li><a href="../cgi-bin/hgMirror">Mirror tracks</a></li>' /tmp/navbar.inc
+# add a link to the gbib shared data folder
+sed -i '/Track Hubs/a <li><a href="http:\/\/127.0.0.1:1234\/folders\/">GBiB Shared Data Folder<\/a><\/li>' /tmp/navbar.inc
+# adding a link to the GBIB help pages
+sed -i '/genomewiki/a <li><a href="../goldenPath/help/gbib.html">Help on GBiB</a></li>' /tmp/navbar.inc
+cat /tmp/navbar.inc | grep -v hgNear | grep -v hgVisiGene | uniq > /usr/local/apache/htdocs/inc/globalNavBar.inc
 rm /tmp/navbar.inc
 
 # patch left side menu:
 # remove encode, neandertal, galaxy, visiGene, Downloads, cancer browser, microbial, mirrors, jobs
 for i in hgNear ENCODE Neandertal galaxy VisiGene hgdownload genome-cancer microbes mirror jobs; do
     sed -i '/<A CLASS="leftbar" .*'$i'.*$/,/<HR>$/d' /usr/local/apache/htdocs/index.html
 done
 
 # patch main pages
 sed -i 's/About the UCSC Genome Bioinformatics Site/About the UCSC Genome Browser in a Box/g' /usr/local/apache/htdocs/indexIntro.html
 perl -0777 -pi -e 's/It also.+ provides.+> project. //s' /usr/local/apache/htdocs/indexIntro.html
 perl -0777 -pi -e 's/Program-driven.+ per day.//s' /usr/local/apache/htdocs/indexInfo.html
 # patch contacts page
 sed -i 's/......cgi-bin\/hgUserSuggestion/http:\/\/genome.ucsc.edu\/cgi-bin\/hgUserSuggestion/' /usr/local/apache/htdocs/contacts.html
 # remove visigene from top menu
@@ -238,17 +349,17 @@
 mysqlcheck --all-databases --auto-repair --quick --fast --silent
 
 #LATENCY=`ping genome.ucsc.edu -n -c1 -q | grep rtt | cut -d' ' -f4 | cut -d/ -f2 | cut -d. -f1`
 #if [ "$LATENCY" -gt "90" ]; then
 #echo making low-latency changes
 /usr/local/apache/cgi-bin/hgMirror postRsyncUpdates
 
 # the local-only hg.conf settings file has to exist as it is included from hg.conf
 # In case it got deleted due to some error, recreate it
 if [ ! -f /usr/local/apache/cgi-bin/hg.conf.local ] ; then
    echo Creating /usr/local/apache/cgi-bin/hg.conf.local
    echo allowHgMirror=true > /usr/local/apache/cgi-bin/hg.conf.local
 fi
 
 touch /root/lastUpdateTime.flag
-echo update done.
+echo - GBiB update done
 cat /etc/issue | tr -s '\n'