8a0a3fdc82e2a1362e24e91a2f5de0ee9c1931fe hiram Thu Oct 21 11:17:30 2021 -0700 scripts for the GRC Incident otto job no redmine diff --git src/hg/utils/otto/grcIncident/grcUpdate.sh src/hg/utils/otto/grcIncident/grcUpdate.sh new file mode 100755 index 0000000..09e9275 --- /dev/null +++ src/hg/utils/otto/grcIncident/grcUpdate.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# exit on any failure +set -beEu -o pipefail + +TOP="/hive/data/outside/grc/incidentDb" + +export ECHO="/bin/echo -e" +export bbiInfo="/cluster/bin/x86_64/bigBedInfo" +export failMail="hiram@soe.ucsc.edu" + +if [ $# -ne 4 ]; then + echo "usage: grcUpdate.sh " 1>&2 + exit 255 +fi + +export debug=0 +export workDir="$1" +export db="$2" +# This ^ trick upper cases the first letter of the string in variable db +export Db=${db^} +export GRC_issue="$3" +export ftpPath="$4/${GRC_issue}.gff3" + +cd "${TOP}/${workDir}" + +if [ $debug -ne 0 ]; then + rm -f updateRunning.pid +fi + +if [ -s updateRunning.pid ]; then + printf "To: $failMail\nFrom: $failMail\nSubject: ERROR: GRC Incident update $Db\n\nERROR: ${db} GRC incident update, previous update did not finish\n" | /usr/sbin/sendmail -t -oi + +# echo "ERROR: ${db} GRC incident update, previous update did not finish" \ +# | /bin/mail -s "ERROR: GRC Incident update $Db" ${failMail} +# > /dev/null 2> /dev/null + exit 245 +fi + +echo $$ > updateRunning.pid + +export YM=`date "+%Y/%m"` +mkdir -p ${YM} +export DS=`date "+%F"` + +wget --timestamping \ + ftp://ftp.ncbi.nlm.nih.gov/pub/grc/${ftpPath} \ + -O ${YM}/${GRC_issue}.${DS}.gff > /dev/null 2>&1 +gzip -f ${YM}/${GRC_issue}.${DS}.gff + +if [ $debug -ne 0 ]; then + printf "# fetched into ${workDir}/${YM}/${GRC_issue}.${DS}.gff\n" 1>&2 +fi + +if [ "${db}" = "mm9" ]; then +/hive/data/outside/grc/incidentDb/parseGff.pl ${YM}/${GRC_issue}.${DS}.gff.gz \ + | sort \ + | join -t" " validContigs - > ${YM}/${db}.${DS}.contigs.bed5 + /cluster/bin/x86_64/liftUp -type=.bed ${YM}/${db}.${DS}.bed5 refSeq.lift error ${YM}/${db}.${DS}.contigs.bed5 + gzip -f ${YM}/${db}.${DS}.contigs.bed5 +else + /hive/data/outside/grc/incidentDb/parseGff.pl \ + ${YM}/${GRC_issue}.${DS}.gff.gz | sort \ + | join -t" " refSeq.chromNames.tab - \ + | cut -f2- | sort -k1,1 -k2,2n > ${YM}/${db}.${DS}.bed5 +fi + +/cluster/bin/x86_64/bedToBigBed -type=bed4+1 \ + -as=$HOME/kent/src/hg/lib/grcIncidentDb.as \ + ${YM}/${db}.${DS}.bed5 /hive/data/genomes/${db}/chrom.sizes \ + ${YM}/${DS}.${Db}.grcIncidentDb.bb > /dev/null 2>&1 + +gzip -f ${YM}/${db}.${DS}.bed5 +if [ $debug -ne 0 ]; then + printf "# parsed into ${YM}/${db}.${DS}.bed5.gz\n" 1>&2 + ls -og ${YM}/${db}.${DS}.bed5.gz 1>&2 +fi + +export newSum=`md5sum -b ${YM}/${DS}.${Db}.grcIncidentDb.bb | awk '{print $1}'` +export oldSum=0 +if [ -s ${Db}.grcIncidentDb.bb ]; then + oldSum=`md5sum -b ${Db}.grcIncidentDb.bb | awk '{print $1}'` +fi + +if [ $debug -ne 0 ]; then + printf "# newSum $newSum ${workDir}/${YM}/${DS}.${Db}.grcIncidentDb.bb\n" 1>&2 + ls -og ${YM}/${DS}.${Db}.grcIncidentDb.bb 1>&2 + printf "# oldSum $oldSum ${workDir}/${Db}.grcIncidentDb.bb\n" 1>&2 + ls -og ${Db}.grcIncidentDb.bb 1>&2 +fi + +if [ "${oldSum}" = "${newSum}" ]; then +# echo "${db} GRC update no change from previous ${DS}" \ +# | mail -s "ALERT: GRC Incident update $Db" ${failMail} \ +# > /dev/null 2> /dev/null + rm -f ${YM}/${DS}.${Db}.grcIncidentDb.bb + rm -f ${YM}/${db}.${DS}.bed5.gz + rm -f ${YM}/${GRC_issue}.${DS}.gff.gz + rm -f ${YM}/${db}.${DS}.contigs.bed5.gz +else + rm -f ${Db}.grcIncidentDb.bb + cp -p ${YM}/${DS}.${Db}.grcIncidentDb.bb ${Db}.grcIncidentDb.bb + printf "To: $failMail\nFrom: $failMail\nSubject: ALERT: GRC Incident update $Db\n\n=== Updated $Db ===\n%s\n" \ + "`${bbiInfo} -udcDir=./udcCache ${Db}.grcIncidentDb.bb`" \ + | /usr/sbin/sendmail -t -oi + rm -fr ./udcCache + /cluster/bin/scripts/gwUploadFile ${Db}.grcIncidentDb.bb ${Db}.grcIncidentDb.bb > /dev/null 2>&1 + url=`/cluster/bin/x86_64/hgsql -N -e "select * from grcIncidentDb;" $db` + rm -fr ./udcCache + mkdir ./udcCache + ${bbiInfo} -udcDir=./udcCache "${url}" 2>&1 \ + | mail -s "ALERT: GRC Incident update $Db" ${failMail} \ + > /dev/null 2> /dev/null + rm -fr ./udcCache + zcat ${YM}/${db}.${DS}.bed5.gz | cut -f4 | sort -u | awk '{printf "%s\t%s\n", $1, $1}' > ${Db}.nameIndex.txt + /cluster/bin/x86_64/ixIxx ${Db}.nameIndex.txt ${Db}.grcIncidentDb.ix ${Db}.grcIncidentDb.ixx +fi +rm -f updateRunning.pid +${ECHO} SUCCESS +exit 0