c4c80a07b85c6aa44fa6f862536e297c8310a53a hiram Thu Feb 2 12:37:12 2023 -0800 adding otto cron scripts to keep the hgcentral.genark table up to date diff --git src/hg/utils/otto/genArk/updateHgcentral.sh src/hg/utils/otto/genArk/updateHgcentral.sh new file mode 100755 index 0000000..6f0836a --- /dev/null +++ src/hg/utils/otto/genArk/updateHgcentral.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +# *** Do not edit this file outside of the source tree. *** +# This file is from source tree: +# kent/src/hg/utils/otto/genArk/updatgeHgcentral.sh +# called by otto cron job in /hive/data/inside/GenArk/ +# and using the helper script: +# kent/src/hg/utils/otto/genArk/genArkListToSql.pl + +# exit on any error +set -beEu -o pipefail + +if [ "xxx$1" != "xxxmakeItSo" ]; then + printf "usage: ./updateCentral.sh makeItSo\n" 1>&2 + printf "updates hgcentral.genark with the latest hub list from hgdownload\n" 1>&2 + exit 255 +fi + +cd /hive/data/inside/GenArk +export DS=`date "+%F"` +export YYYY=`date "+%Y"` +export LC_NUMERIC=en_US +export msgTo="hclawson@ucsc.edu" +# export msgTo="hclawson@ucsc.edu,lrnassar@ucsc.edu" +export msgFile="/tmp/ottoGenArk.$$.txt" + +########################################################################### +### helper functions +########################################################################### + +function msgToFrom() { + printf "To: %s\n" "${msgTo}" > ${msgFile} + printf "From: hiram@soe.ucsc.edu\n" >> ${msgFile} +} + +function sendMsg() { + cat $msgFile | /usr/sbin/sendmail -t -oi + rm -f $msgFile +} + +function oddRowCounts() { + todayCount=$1 + mysqlCount=$2 + msgToFrom + printf "Subject: ALERT: hgcentral.genark update problem\n" >> ${msgFile} + printf "\n" >> ${msgFile} + printf "# puzzling, count in file %'d is not larger than the MySQL table %'d\n" "${todayCount}" "${mysqlCount}" >> ${msgFile} + sendMsg +} + +function updateIncorrect() { + todayCount=$1 + mysqlCount=$2 + msgToFrom + printf "Subject: ALERT: hgcentral.genark update problem\n" >> ${msgFile} + printf "\n" >> ${msgFile} + printf "# ERROR: hgcentral.genark table has been reloaded, but row count %'d is not larger than than previous MySQL table %'d\n" "${todayCount}" "${mysqlCount}" >> ${msgFile} + sendMsg +} + +function updateOK() { + todayCount=$1 + mysqlCount=$2 + msgToFrom + printf "Subject: NOTE: hgcentral.genark has been updated\n" >> ${msgFile} + printf "\n" >> ${msgFile} + printf "# today MySQL rowCount %'d vs. previous MySQL rowCount %'d\n" "${todayCount}" "${mysqlCount}" >> ${msgFile} + sendMsg +} + +########################################################################### +### begin processing +########################################################################### + +# everything depends on this file from hgdownload +rsync -a qateam@hgdownload:/mirrordata/hubs/UCSC_GI.assemblyHubList.txt ./list.${DS} + +# see if it became newer than previous +newSum=`grep -v "^#" list.${DS} | sort | md5sum | cut -d' ' -f1` +prevSum=`grep -v "^#" previousList.txt | sort | md5sum | cut -d' ' -f1` + +if [ "${prevSum}" = "${newSum}" ]; then + rm -f list.${DS} + exit 0 +fi + +# save this new list in history +mkdir -p history/${YYYY} +cp -p list.${DS} history/${YYYY} +countToday=`grep -v "^#" list.${DS} | wc -l` +rowCount=`hgsql -N hgcentraltest -e 'select count(*) from genark;' | cat` + +if [ "${countToday}" -gt "${rowCount}" ]; then + + ./genArkListToSql.pl list.${DS} > genark.tsv + hgsql hgcentraltest -e 'drop table genark;' + hgsql hgcentraltest < ~/kent/src/hg/lib/genark.sql + hgsql hgcentraltest -e "LOAD DATA LOCAL INFILE 'genark.tsv' INTO TABLE genark;" + newCount=`hgsql -N hgcentraltest -e 'select count(*) from genark;' | cat` + if [ "${newCount}" -gt "${rowCount}" ]; then + updateOK "${newCount}" "${rowCount}" + rm -f previousList.txt + mv list.${DS} previousList.txt + else + updateIncorrect "${newCount}" "${rowCount}" + exit 255 + fi +else + oddRowCounts "${countToday}" "${rowCount}" + exit 255 +fi + +exit $?