87435ae48fec925550b10151ef921ef36d75bb67
lrnassar
  Wed Aug 28 12:46:28 2024 -0700
Removing this from this dir as it already exists in ~/kent/src/hg/logCrawl/trimLogs/.

diff --git src/utils/qa/runTrimLogs.sh src/utils/qa/runTrimLogs.sh
deleted file mode 100755
index 93f08e4..0000000
--- src/utils/qa/runTrimLogs.sh
+++ /dev/null
@@ -1,222 +0,0 @@
-#!/bin/bash
-
-############################
-# meant to be run via cronjob
-############################
-
-set -beEu -o pipefail
-
-WORKDIR="/hive/users/chmalee/logs/trimmedLogs"
-EMAIL="browserqa-group@ucsc.edu"
-GENSUB="/cluster/bin/x86_64/gensub2"
-
-# work dir
-today=`date +%F`
-
-# which step of the script are we at
-trimStep=1
-
-# which cluster to use, default to ku but can use hgwdev in a pinch
-cluster="ku"
-
-# force a re-run on all files, not just the new ones
-force=1
-
-function usage()
-{
-cat << EOF
-Usage: `basename $0` [-htc]
-
-Optional Arguments:
--h                  Show this help
--t                  Trim error logs. Smart enough to only run on most recent additions.
--f                  Force a re-run on all files, not just the newest ones
--c                  Use hgwdev instead of ku for cluster run
-
-This script is meant to be run via cronjob to check for new error logs and trim them
-down via parasol (the -t option). Checks /hive/data/inside/wwwstats/RR/ for new error_log files
-and trims them via the errorLogTrim script. Run that script with no args for more
-information.
-EOF
-}
-
-function combineTrimmed()
-{
-    # when the jobs are done combine the result files into one:
-    fileList=$1
-    cd ${WORKDIR}/${today}
-    for f in $(cat ${fileList})
-    do
-        fName=`echo $f | grep -o "hgw.*"`
-        fName=`echo ${fName} | sed -e 's/\.gz$//'`
-        #echo ${fName}
-        cat ${WORKDIR}/result/${fName}.trimmed.gz >> ${WORKDIR}/result/full.gz
-    done
-}
-
-function getMachName()
-{
-    # the ## strips the longest match from beginning of string,
-    # while the % strips the shortest match from the end, thus
-    # fname=/hive/data/inside/wwwstats/RR/2020/hgw1/error_log.20200105.gz will
-    # become: hgw1
-    fname=$1
-    mach=""
-    if [[ "${fname}" == *"euroNode"* ]]
-    then
-        mach="euroNode"
-    elif [[ "${fname}" == *"asiaNode"* ]]
-    then
-        mach="asiaNode"
-    else
-        mach=${f##*hgw}
-        mach="hgw"${mach%/error_log*}
-    fi
-    # bash does not have return so we echo and capture in the caller
-    echo $mach
-}
-
-
-# substitute for gensub2 because asia/euro node have different
-# format than RR machine files
-function makeJobList() {
-    fname=$1
-    machName=$(getMachName $fname)
-    root1=${fname##*error_log.}
-    root1=${root1%.gz}
-    echo "../trimLogs.sh ${fname} {check out exists ../result/${machName}/${root1}.trimmed.gz}" >> jobList
-}
-
-function runPara()
-{
-    cd ${WORKDIR}/${today}
-    if [ -e jobFileList ]
-    then
-        for f in $(cat jobFileList); do
-            makeJobList $f
-        done
-        if [[ "${cluster}" == "ku" ]]
-        then
-            ssh ku "cd ${WORKDIR}/${today}; para create jobList; para push; exit"
-        else
-            para create -ram=10g jobList
-            para push -maxJob=10
-        fi
-    fi
-}
-
-function getFileList()
-{
-    cd ${WORKDIR}
-    # find the most recent dir, example %T+ output
-    #2019-10-21+09:41:17.5243280000  ./2019-10-21
-    oldDir=`find . -maxdepth 1 -type d -name "20*" -printf "%T+\t%p\n" | sort -r | head -1 | cut -f2`
-    mkdir -p ${WORKDIR}/${today}
-    cd ${WORKDIR}/${today}
-    set +e
-    sort ${WORKDIR}/${oldDir}/jobFileList > jobFileList.prev
-    find /hive/data/inside/wwwstats/RR/{2019,202*}/hgw{1,2,3,4,5,6}/error_log.*.gz -print 2>/dev/null | sort > rr.tmp
-    find /hive/data/inside/wwwstats/{euroNode,asiaNode}/{2019,202*}/error_log.*.gz -print 2>/dev/null | sort > asiaEuro.tmp
-    sort rr.tmp asiaEuro.tmp > allFileList
-    rm rr.tmp asiaEuro.tmp
-
-    set -e
-    if [[ ${force} -ne 0 ]]
-    then
-        cp jobFileList.prev jobFileList.tmp
-        # the most recently checked logs plus the new ones:
-        comm -13 ../${oldDir}/allFileList allFileList >> jobFileList.tmp
-        # every time this script is run, we need to force a re-run on the last weeks' logs,
-        # as they may have only been partially complete when we last ran the cluster job:
-        lastWeek=`date -d "21 days ago" +%s`
-        for f in `cat allFileList`
-        do
-            machName=$(getMachName $f)
-            r=${f##*error_log.}
-            r=${r%.gz}
-            range=`date -d "${r}" +%s`
-            if [ ${range} -gt ${lastWeek} ]
-            then
-                toRemove="../result/${machName}/${r}.trimmed.gz"
-                if [ -e ${toRemove} ]; then
-                    rm ${toRemove}
-                fi
-                echo $f >> jobFileList
-            fi
-            # if it's a brand new file we need to run it!
-            if [ ! -e "../result/${machName}/${r}.trimmed.gz" ];
-            then
-                echo $f >> jobFileList
-            fi
-        done
-        if [ -e jobFileList ]
-        then
-            rm jobFileList.tmp
-        else
-            mv jobFileList.tmp jobFileList
-        fi
-    else
-        for f in `cat allFileList`
-        do
-            set +e
-            machName=$(getMachName $f)
-            r=${f##*error_log.}
-            r=${r%.gz}
-            toRemove="../result/${machName}/${r}.trimmed.gz"
-            if [ -e ${toRemove} ]; then
-                rm ${toRemove}
-            fi
-            set -e
-        done
-        cp allFileList jobFileList
-    fi
-}
-
-function doTrimStep()
-{
-    getFileList
-    runPara
-}
-
-while getopts "htcf" opt
-do
-    case $opt in
-        h)
-            usage
-            exit 0
-            ;;
-        t)
-            trimStep=0
-            ;;
-        c)
-            cluster="hgwdev"
-            ;;
-        f)
-            force=0
-            ;;
-        ?)
-            printf "unknown option %s\n" "$opt"
-            EXIT_STATUS=1
-            ;;
-    esac
-done
-
-if [[ $# -eq 0 && -n "${trimStep}" ]]
-then
-    printf "please run with -t\n"
-    usage
-    trimStep=1
-    force=1
-fi
-
-if [[ ${trimStep} -eq 0 ]]
-then
-    doTrimStep
-fi
-
-if [[ $? -eq 0 ]]
-then
-    echo "Done trimming logs"
-else
-    echo "Potential error during log trimming. Check ${WORKDIR}/${today} for more information."
-fi