9461632185d991b472fb061651d21c358c5690ba chmalee Mon Feb 10 10:52:53 2025 -0800 Make track log trim script use hgwdev as a cluster instead of ku, refs Lou email diff --git src/hg/logCrawl/trimLogs/runTrimLogs.sh src/hg/logCrawl/trimLogs/runTrimLogs.sh index 8372a73d7f9..c36332c5a5c 100755 --- src/hg/logCrawl/trimLogs/runTrimLogs.sh +++ src/hg/logCrawl/trimLogs/runTrimLogs.sh @@ -1,49 +1,45 @@ #!/bin/bash ############################ # meant to be run via cronjob ############################ set -beEu -o pipefail WORKDIR="/hive/users/chmalee/logs/trimmedLogs" -EMAIL="chmalee@ucsc.edu" +EMAIL="browserqa-group@ucsc.edu" GENSUB="/cluster/bin/x86_64/gensub2" # work dir today=`date +%F` # which step of the script are we at trimStep=1 -# which cluster to use, default to ku but can use hgwdev in a pinch -cluster="ku" - # force a re-run on all files, not just the new ones force=1 function usage() { cat << EOF Usage: `basename $0` [-htc] Optional Arguments: -h Show this help -t Trim error logs. Smart enough to only run on most recent additions. -f Force a re-run on all files, not just the newest ones --c Use hgwdev instead of ku for cluster run This script is meant to be run via cronjob to check for new error logs and trim them down via parasol (the -t option). Checks /hive/data/inside/wwwstats/RR/ for new error_log files and trims them via the errorLogTrim script. Run that script with no args for more information. EOF } function combineTrimmed() { # when the jobs are done combine the result files into one: fileList=$1 cd ${WORKDIR}/${today} for f in $(cat ${fileList}) do @@ -83,38 +79,33 @@ fname=$1 machName=$(getMachName $fname) root1=${fname##*error_log.} root1=${root1%.gz} echo "../trimLogs.sh ${fname} {check out exists ../result/${machName}/${root1}.trimmed.gz}" >> jobList } function runPara() { cd ${WORKDIR}/${today} if [ -e jobFileList ] then for f in $(cat jobFileList); do makeJobList $f done - if [[ "${cluster}" == "ku" ]] - then - ssh ku "cd ${WORKDIR}/${today}; para create jobList; para push; exit" - else para create -ram=10g jobList para push -maxJob=10 fi - fi } function getFileList() { cd ${WORKDIR} # find the most recent dir, example %T+ output #2019-10-21+09:41:17.5243280000 ./2019-10-21 oldDir=`find . -maxdepth 1 -type d -name "20*" -printf "%T+\t%p\n" | sort -r | head -1 | cut -f2` mkdir -p ${WORKDIR}/${today} cd ${WORKDIR}/${today} set +e sort ${WORKDIR}/${oldDir}/jobFileList > jobFileList.prev find /hive/data/inside/wwwstats/RR/{2019,202*}/hgw{1,2,3,4,5,6}/error_log.*.gz -print 2>/dev/null | sort > rr.tmp find /hive/data/inside/wwwstats/{euroNode,asiaNode}/{2019,202*}/error_log.*.gz -print 2>/dev/null | sort > asiaEuro.tmp sort rr.tmp asiaEuro.tmp > allFileList @@ -176,33 +167,30 @@ { getFileList runPara } while getopts "htcf" opt do case $opt in h) usage exit 0 ;; t) trimStep=0 ;; - c) - cluster="hgwdev" - ;; f) force=0 ;; ?) printf "unknown option %s\n" "$opt" EXIT_STATUS=1 ;; esac done if [[ $# -eq 0 && -n "${trimStep}" ]] then printf "please run with -t\n" usage trimStep=1