5adcf6bc2904690de7b7b30a83ec8a7a0996abe9 galt Tue Aug 21 00:01:25 2018 -0700 changing cse subdomain to soe diff --git src/utils/qa/qaAutoTrack.sh src/utils/qa/qaAutoTrack.sh index 722d5dd..e09fc9b 100755 --- src/utils/qa/qaAutoTrack.sh +++ src/utils/qa/qaAutoTrack.sh @@ -1,339 +1,339 @@ #!/bin/bash # quit if something within the script fails set -beEu -o pipefail source `which qaConfig.bash` export HGDB_CONF=$HOME/.hg.conf.beta umask 002 ################################ # # 02-13-2015 # Matthew Speir # # qaAutoTrack.sh # Performs basic QA for auto-pushed tracks, which includes: # - Checks when data for track was last updated # - Coverage from featureBits -countGaps # - Percentage difference in coverage between now and the last time the script was run # ################################ ##### Variables ##### # Set by command-line options db="" tableName="" bigBedMode="" verboseMode=false overwriteLogUrl=false newLogDir="" # Other variables currDate=$(date +%F) currTime=$(date +%H_%M_%S) output="" # holds output message -logUrl="http://genecats.cse.ucsc.edu/qa/test-results/qaAutoTrackLogs" +logUrl="http://genecats.soe.ucsc.edu/qa/test-results/qaAutoTrackLogs" logDir="/usr/local/apache/htdocs-genecats/qa/test-results/qaAutoTrackLogs" currLogFile="" prevLogFile="" prevLogDate="" # Variables for issue checking issueNote="" tooOld="" percentDiff="" ##### Functions ##### # Usage message function showHelp() { cat << EOF Usage: `basename $0` [-hbvs] [-l log dir] [-u log url] \$database \$table Required arguments: database UCSC database name, e.g. hg19 or hg38. table Table name, e.g. gwasCatalog. Optional arguments: -h Display this help and exit -b BigBed mode. Used for tracks supported bigBed files, i.e. grcIncidentDb. -v Verbose mode. Outputs test results to standard out as well as file. -l log directory Alternate directory for output log -u log url Alternate URL for output log. -s Suppress URL in output. Will instead print directory in messages. Performs basic QA for auto-pushed tracks, which includes: - Checks when data for track was last updated - Coverage from featureBits -countGaps - Percentage difference in coverage between now and the last time the script was run By default, all results are output to a file and only issues are output to the command line. Use the "-v" option to see results on the command line as well. All log files are output to: -http://genecats.cse.ucsc.edu/qa/test-results/qaAutoTrack. +http://genecats.soe.ucsc.edu/qa/test-results/qaAutoTrack. Notes: - For OMIM, ClinGen (formerly ISCA), or ClinVar tracks use omim, isca, or clinvar as the table name. - If run more than once per day, then subsequent log files will include current time in name. EOF } # Output function function outputCovDiff () { # four positional arguments. $1 == prevLogFile. $2 == tblCov. $3 == tableName. $4 == tblDate. # Dependency: Coverage information from previous log file (positional argument $1) # If no previous log file exists or if previous log file doesn't contain coverage info, # then no coverage diff will be calculated and the percentDiff variable will retain its # default value of "" if [[ $1 != "" ]] # Check for previous log file. True if file exists. then # get info needed for diff rawCount=$(echo $2 | awk '{print $1}') prevCov=$(egrep -A2 "^$3" $1 | grep "^Coverage New" | cut -d" " -f3-) # Grabs coverage from previous log file. rawCountPrev=$(echo $prevCov | awk '{print $1}') if [[ prevCov != "" ]] # Check needed so script doesn't fail if prevLogFile doesn't contain coverage info then # Calculate diff between new and old coverage rawCountDiff=$(echo $(expr $rawCount - $rawCountPrev)|tr -d -) rawCountAvg=$(expr $rawCount / 2 + $rawCountPrev / 2) percentDiff=$(awk -v rcd=$rawCountDiff -v rca=$rawCountAvg 'BEGIN{print 100 * rcd / rca}') # Build output string output+="$3\nLast updated: $4\nCoverage New: $2\nCoverage Old: $prevCov\nCoverage Diff: $percentDiff%\n\n" else output+="$3\nLast updated: $4\nCoverage New: $2\n\n" fi else output+="$3\nLast updated: $4\nCoverage New: $2\n\n" fi } # Function to raise errors function checkForIssues () { # four positional arguments. $1 == tblDate. $2 == tooOld. $3 == tableName. $4 == precentDiff. # Dependency: percentDiff, positional argument $4, is expected to be set by outputCovDiff # If percent diff is not set (so it retains default empty "" value), then checkForIssues # will raise no errors. # Raises an error if it's been too long since last update if [ $(date -d "$1" +%s) -le $(date -d "$2" +%s) ] then issueNote+="$db.$3 has not been updated since $1\n" fi # Raises error if coverage diff between track versions is too large if [[ "$4" != "" ]] then # Round our percentDiff to 3 decimal places. percentDiffRounded=$(printf "%.3f\n" "$4") if [ $(echo "$percentDiffRounded >= 10" | bc) -ne 0 ] then issueNote+="Large coverage diff for $db.$3\n" fi fi } ##### Parse command-line input ##### OPTIND=1 # Reset is necessary if getopts was used previously in the script. while getopts "hl:u:bvs" opt do case $opt in h) showHelp exit 0 ;; v) verboseMode=true ;; b) bigBedMode="on" ;; l) # Check if directory is valid before attempting to write to it. if [ -d "$OPTARG" ] then newLogDir=$OPTARG else echo -e "Sorry, directory \"$2\" does not exist." echo -e "Check spelling or create this directory and try again.\n" exit 1 fi ;; u) logUrl=$OPTARG ;; s) overwriteLogUrl=true ;; '?') showHelp >&2 exit 1 ;; esac done shift "$((OPTIND-1))" # Shift off the options and optional --. # Check number of required arguments if [ $# -ne 2 ] then # Output usage message if number of required arguments is wrong showHelp >&2 exit 1 else # Set variables with required argument values db=$1 tableName=$2 fi ##### Main Program ##### # Set some variables based on optional input if [[ "$newLogDir" != "" ]] then # Set logDir if newLogDir is provided logDir="$newLogDir" fi if [[ $overwriteLogUrl == true ]] then # Overwrite url if option is set logUrl=$logDir fi # set currLogFile currLogFile="$db.$tableName.$currDate.$currTime.txt" # set info for prevLog prevLogDate=$(ls -Lt $logDir | sed -n /$db.$tableName/p | head -1 | awk -F . '{print $3}') prevLogTime=$(ls -Lt $logDir | sed -n /$db.$tableName/p | head -1 | awk -F . '{print $4}') #initialize output string output="\n$db\n" if [ -e $logDir/$db.$tableName.$prevLogDate.$prevLogTime.txt ] then prevLogFile="$logDir/$db.$tableName.$prevLogDate.$prevLogTime.txt" fi # Set tooOld for different tables if [[ "$tableName" == "clinvar" ]] || [[ "$tableName" == "grcIndcidentDb" ]] then tooOld=$(date -d "$currDate - 2 months" +%F) else tooOld=$(date -d "$currDate - 1 month" +%F) fi # Run tests for different tracks if [[ $bigBedMode == "on" ]] then # ClinVar has muliple tables if [[ $tableName == "clinvar" ]] then for tbl in clinvarMain clinvarCnv do # Get file name from beta fileName=$(hgsql -Ne "SELECT * FROM $tbl LIMIT 1" $db) # Get table update time from beta tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T") MYTEMPFILE=$(mktemp --tmpdir tmp.XXXXXXXXXX.bed) # featureBits doesn't work with bigBeds, need to turn into bed first ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $MYTEMPFILE tblCov=$(featureBits -countGaps $db $MYTEMPFILE 2>&1) outputCovDiff "$prevLogFile" "$tblCov" "$tbl" "$tblDate" # Check for issues with table checkForIssues "$tblDate" "$tooOld" "$tbl" "$percentDiff" rm -f $MYTEMPFILE done # GRC Incident track relies on remote file so curl must be used instead of stat elif [[ $tableName == "grcIncidentDb" ]] then fileName=$(hgsql -Ne "SELECT * FROM $tableName LIMIT 1" $db) # Use curl to get update time on file tblDate=$(date -d "$(curl -s -v -I $fileName 2>&1 | grep '^< Last-Modified:'| cut -d ' ' -f3- )" '+%F %T') MYTEMPFILE=$(mktemp --tmpdir tmp.XXXXXXXXXX.bed) # featureBits doesn't work with bigBeds, need to turn into bed first bigBedToBed $fileName $MYTEMPFILE tblCov=$(featureBits -countGaps $db $MYTEMPFILE 2>&1) outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate" # Check for issues with table checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff" rm -f $MYTEMPFILE # Tests for all other bigBed based autopushed tracks (assuming they don't use remote bigBed files) else fileName=$(hgsql -Ne "SELECT * FROM $tableName LIMIT 1" $db) # Get table update time from beta tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T") MYTEMPFILE=$(mktemp --tmpdir tmp.XXXXXXXXXX.bed) # featureBits doesn't work with bigBeds, need to turn into bed first ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $MYTEMPFILE tblCov=$(featureBits -countGaps $db $MYTEMPFILE 2>&1) outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate" # Check for issues with table checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff" rm -f $MYTEMPFILE fi # Tests for non-bigBed tracks else # OMIM and ISCA both have a large number of tables if [[ $tableName == "omim" ]] || [[ $tableName == "isca" ]] then for tbl in $(hgsql -Ne "SHOW TABLES LIKE '%$tableName%'" $db) # Grabs list of all omim or isca tables from beta do tblDate=$(hgsql -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tbl'") # Only some omim tables have coordinates if [[ $tbl == "omimGene2" ]] || [[ $tbl == "omimAvSnp" ]] || [[ $tbl == "omimLocation" ]] || [[ $tableName == "isca" ]] then tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tbl 2>&1") outputCovDiff "$prevLogFile" "$tblCov" "$tbl" "$tblDate" # Check for issues with table checkForIssues "$tblDate" "$tooOld" "$tbl" "$percentDiff" # Output for tables that don't contain coordinates else output+="$tbl\nLast updated: $tblDate\n\n" # Check for issues with table checkForIssues "$tblDate" "$tooOld" "$tbl" "" fi done # Tests for all other table based autopushed tracks else tblDate=$(hgsql -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tableName'") tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tableName 2>&1") outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate" # Check for issues with table checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff" fi fi # Output results of tests if [[ $issueNote != "" ]] then #Put URL to log file at end of issue note issueNote+="\nSee $logUrl/$currLogFile for more details about these errors.\n\n" echo -e $issueNote | tee $logDir/$currLogFile fi if [[ $verboseMode == true ]] # True if verboseMode is on then echo -e $output | tee -a $logDir/$currLogFile else echo -e $output >> $logDir/$currLogFile fi