2aea926850613e4fcef435265d32564573fdb613 lrnassar Wed Aug 28 11:23:21 2024 -0700 Adding the runTrimLogs.sh script made by ChrisL to the tree, this has been running for a few years now. It processes the raw error logs into a better format that can be ingested by the various stats scripts such as the monthly usage reports. Primarily it makes sure that any error log lines belonging to a single cart does not span multiple lines. The second script is a small addition to check if the error logs are no longer updating. This has come up various times over the years, most recently botht he asia and euro logs had not updated since May, roughly 4.5 months old. No RM. diff --git src/utils/qa/runTrimLogs.sh src/utils/qa/runTrimLogs.sh new file mode 100755 index 0000000..93f08e4 --- /dev/null +++ src/utils/qa/runTrimLogs.sh @@ -0,0 +1,222 @@ +#!/bin/bash + +############################ +# meant to be run via cronjob +############################ + +set -beEu -o pipefail + +WORKDIR="/hive/users/chmalee/logs/trimmedLogs" +EMAIL="browserqa-group@ucsc.edu" +GENSUB="/cluster/bin/x86_64/gensub2" + +# work dir +today=`date +%F` + +# which step of the script are we at +trimStep=1 + +# which cluster to use, default to ku but can use hgwdev in a pinch +cluster="ku" + +# force a re-run on all files, not just the new ones +force=1 + +function usage() +{ +cat << EOF +Usage: `basename $0` [-htc] + +Optional Arguments: +-h Show this help +-t Trim error logs. Smart enough to only run on most recent additions. +-f Force a re-run on all files, not just the newest ones +-c Use hgwdev instead of ku for cluster run + +This script is meant to be run via cronjob to check for new error logs and trim them +down via parasol (the -t option). Checks /hive/data/inside/wwwstats/RR/ for new error_log files +and trims them via the errorLogTrim script. Run that script with no args for more +information. +EOF +} + +function combineTrimmed() +{ + # when the jobs are done combine the result files into one: + fileList=$1 + cd ${WORKDIR}/${today} + for f in $(cat ${fileList}) + do + fName=`echo $f | grep -o "hgw.*"` + fName=`echo ${fName} | sed -e 's/\.gz$//'` + #echo ${fName} + cat ${WORKDIR}/result/${fName}.trimmed.gz >> ${WORKDIR}/result/full.gz + done +} + +function getMachName() +{ + # the ## strips the longest match from beginning of string, + # while the % strips the shortest match from the end, thus + # fname=/hive/data/inside/wwwstats/RR/2020/hgw1/error_log.20200105.gz will + # become: hgw1 + fname=$1 + mach="" + if [[ "${fname}" == *"euroNode"* ]] + then + mach="euroNode" + elif [[ "${fname}" == *"asiaNode"* ]] + then + mach="asiaNode" + else + mach=${f##*hgw} + mach="hgw"${mach%/error_log*} + fi + # bash does not have return so we echo and capture in the caller + echo $mach +} + + +# substitute for gensub2 because asia/euro node have different +# format than RR machine files +function makeJobList() { + fname=$1 + machName=$(getMachName $fname) + root1=${fname##*error_log.} + root1=${root1%.gz} + echo "../trimLogs.sh ${fname} {check out exists ../result/${machName}/${root1}.trimmed.gz}" >> jobList +} + +function runPara() +{ + cd ${WORKDIR}/${today} + if [ -e jobFileList ] + then + for f in $(cat jobFileList); do + makeJobList $f + done + if [[ "${cluster}" == "ku" ]] + then + ssh ku "cd ${WORKDIR}/${today}; para create jobList; para push; exit" + else + para create -ram=10g jobList + para push -maxJob=10 + fi + fi +} + +function getFileList() +{ + cd ${WORKDIR} + # find the most recent dir, example %T+ output + #2019-10-21+09:41:17.5243280000 ./2019-10-21 + oldDir=`find . -maxdepth 1 -type d -name "20*" -printf "%T+\t%p\n" | sort -r | head -1 | cut -f2` + mkdir -p ${WORKDIR}/${today} + cd ${WORKDIR}/${today} + set +e + sort ${WORKDIR}/${oldDir}/jobFileList > jobFileList.prev + find /hive/data/inside/wwwstats/RR/{2019,202*}/hgw{1,2,3,4,5,6}/error_log.*.gz -print 2>/dev/null | sort > rr.tmp + find /hive/data/inside/wwwstats/{euroNode,asiaNode}/{2019,202*}/error_log.*.gz -print 2>/dev/null | sort > asiaEuro.tmp + sort rr.tmp asiaEuro.tmp > allFileList + rm rr.tmp asiaEuro.tmp + + set -e + if [[ ${force} -ne 0 ]] + then + cp jobFileList.prev jobFileList.tmp + # the most recently checked logs plus the new ones: + comm -13 ../${oldDir}/allFileList allFileList >> jobFileList.tmp + # every time this script is run, we need to force a re-run on the last weeks' logs, + # as they may have only been partially complete when we last ran the cluster job: + lastWeek=`date -d "21 days ago" +%s` + for f in `cat allFileList` + do + machName=$(getMachName $f) + r=${f##*error_log.} + r=${r%.gz} + range=`date -d "${r}" +%s` + if [ ${range} -gt ${lastWeek} ] + then + toRemove="../result/${machName}/${r}.trimmed.gz" + if [ -e ${toRemove} ]; then + rm ${toRemove} + fi + echo $f >> jobFileList + fi + # if it's a brand new file we need to run it! + if [ ! -e "../result/${machName}/${r}.trimmed.gz" ]; + then + echo $f >> jobFileList + fi + done + if [ -e jobFileList ] + then + rm jobFileList.tmp + else + mv jobFileList.tmp jobFileList + fi + else + for f in `cat allFileList` + do + set +e + machName=$(getMachName $f) + r=${f##*error_log.} + r=${r%.gz} + toRemove="../result/${machName}/${r}.trimmed.gz" + if [ -e ${toRemove} ]; then + rm ${toRemove} + fi + set -e + done + cp allFileList jobFileList + fi +} + +function doTrimStep() +{ + getFileList + runPara +} + +while getopts "htcf" opt +do + case $opt in + h) + usage + exit 0 + ;; + t) + trimStep=0 + ;; + c) + cluster="hgwdev" + ;; + f) + force=0 + ;; + ?) + printf "unknown option %s\n" "$opt" + EXIT_STATUS=1 + ;; + esac +done + +if [[ $# -eq 0 && -n "${trimStep}" ]] +then + printf "please run with -t\n" + usage + trimStep=1 + force=1 +fi + +if [[ ${trimStep} -eq 0 ]] +then + doTrimStep +fi + +if [[ $? -eq 0 ]] +then + echo "Done trimming logs" +else + echo "Potential error during log trimming. Check ${WORKDIR}/${today} for more information." +fi