d70e1a26514f40e07dcfed2f45f6e369962ec80b mspeir Tue Feb 17 07:56:54 2015 -0800 New script to QA autopushed tracks. QA inlucdes last update and coverage info. refs #12561 diff --git src/utils/qa/qaAutoTrack.sh src/utils/qa/qaAutoTrack.sh new file mode 100755 index 0000000..d3d107b --- /dev/null +++ src/utils/qa/qaAutoTrack.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# quit if something within the script fails +set -beEu -o pipefail +source `which qaConfig.bash` + +umask 002 + +################################ +# +# 02-13-2015 +# Matthew Speir +# +# qaAutoTrack.sh +# Performs basic QA for auto-pushed tracks, which includes: +# - Checks when data for track was last updated +# - Coverage from featureBits -countGaps +# - Percentage difference in coverage between now and the last time the script was run +# +################################ + +##### Variables ##### +# Set by command-line options +db="" +tableName="" +bigBedMode="" +verboseMode="" + +# Other variables +currDate=$(date --rfc-3339=date) +output="" # holds output message +logUrl="http://genecats.cse.ucsc.edu/qa/test-results/qaAutoTrack" +logDir="/usr/local/apache/htdocs-genecats/qa/test-results/qaAutoTrack" +currLogFile="" +prevLogFile="" +prevLogDate="" + +# Variables for issue checking +maxChange=0.1000 +issue=false +issueNote="" +tooOld="" +percentDiff="" + +# Usage message as variable +usage=" +Performs basic QA for auto-pushed tracks, which includes: +- Checks when data for track was last updated +- Coverage from featureBits -countGaps +- Percentage difference in coverage between now and the last time the script was run + +Usage: $0 database tableName [bigBed] [verbose] + +Notes: + Use 'bigBed' for tracks supported by bigBed files. + For OMIM, ISCA, or ClinVar tracks use omim, isca, or clinvar as the table name. + Can only be run once for each database/track pair per day. +" +##### Functions ##### + +# Output function +function outputCovDiff { + if [[ $prevLogFile != "" ]] + then + # get info needed for diff + rawCount=$(echo $tblCov | awk '{print $1}') + prevCov=$(egrep -A2 "^$tableName" $prevLogFile | grep "^Coverage New" | cut -d" " -f3-) + rawCountPrev=$(echo $prevCov | awk '{print $1}') + + # Calculate diff between new and old coverage + rawCountDiff=$(echo $(expr $rawCount - $rawCountPrev)|tr -d -) + rawCountAvg=$(expr $rawCount / 2 + $rawCountPrev / 2) + percentDiff=$(awk -v rcd=$rawCountDiff -v rca=$rawCountAvg 'BEGIN{print rcd / rca}') + + # Build output string + output+="$tableName\nLast updated: $tblDate\nCoverage New: $tblCov\nCoverage Old: $prevCov\nCoverage Diff: $(awk -v pd=$percentDiff 'BEGIN{print pd * 100}')%\n\n" + else + output+="$tableName\nLast updated: $tblDate\nCoverage New: $tblCov\n\n" + fi +} + +# Function to raise errors +function raiseIssue { + # Raises an error if it's been too long since last update + if [ $(date -d "$tblDate" +%s) -le $(date -d "$tooOld" +%s) ] + then + issue=true + issueNote+="$tableName has not been updated since $tblDate, see $logUrl/$db.$tableName.$currDate.txt for more details\n" + fi + + # Raises error if coverage diff between versions is too large + if [[ "$percentDiff" != "" ]] && [[ "$percentDiff" > "$maxChange" ]] + then + issue=true + issueNote+="Large coverage diff for $tableName, see $logUrl/$db.$tableName.$currDate.txt for more details\n" + fi +} + +##### Parse command-line input ##### + +# print usage +if (( $# < 2 )) || (( $# > 4 )) +then + echo -e "$usage" + exit 1 +# set required variables +else + db="$1" + tableName="$2" +fi + +# Setting optional arguments +if [ $# == 3 ] +then + bigBedMode=$3 + if [[ $bigBedMode != "bigBed" ]] + then + verboseMode=$3 + bigBedMode="" + # error if 3rd isn't one of two optional args + if [[ $bigBedMode == "" ]] && [[ $verboseMode != "verbose" ]] + then + echo -e "$usage" + exit 1 + fi + fi +fi + +if [ $# == 4 ] +then + bigBedMode=$3 + verboseMode=$4 + # error if 4th arg isn't verbose + if [[ $verboseMode != "verbose" ]] + then + echo -e "$usage" + exit 1 + fi +fi + +##### Main Program ##### + +# set currLogFile +currLogFile="$logDir"/"$db.$tableName.$currDate.txt" + +# look for previous log file +if [ -e $logDir/$db.$tableName.*.txt ] +then + prevLogDate+=$(ls -Llt --time-style long-iso $logDir|grep -v total|egrep -m 1 -oh "$db\.$tableName\.[0-9]{4}-[0-9]{2}-[0-9]{2}"|sed -e "s/$db\.$tableName\.//g") + prevLogFile="$logDir"/"$db.$tableName.$prevLogDate.txt" +fi + +# Can't run twice in one day as it messes up the "Coverage Old" output +if [[ $currDate == $prevLogDate ]] +then + echo -e "Previous log date is the same as today's date, $currDate" + exit 1 +fi + +# Set tooOld for different tables +if [[ $tableName == clinvar ]] || [[ $tableName == grcIndcidentDb ]] +then + tooOld=$(date -d "$currDate - 1 month" +%F) +else + tooOld=$(date -d "$currDate - 15 days" +%F) +fi + +# Run tests for different tracks +if [[ $bigBedMode == "bigBed" ]] +then + # ClinVar has muliple tables + if [[ $tableName == "clinvar" ]] + then + for tbl in clinvarMain clinvarCnv + do + # Get file name from beta + fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tbl" $db) + # Get table update time from beta + tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T") + # featureBits doesn't work with bigBeds, need to turn into bed first + ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $TMPDIR/temp.$tbl.bed + tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tbl.bed 2>&1) + # outoutCovDiff function needs variable tableName + tableName=$tbl + + outputCovDiff + + # Check for issues with table + raiseIssue + + rm -f $TMPDIR/temp.$tbl.bed + done + # GRC Incident track relies on remote file so curl must be used instead of stat + elif [[ $tableName == "grcIncidentDb" ]] + then + fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tableName" $db) + # Use curl to get update time on file + tblDate=$(date -d "$(curl -s -v -X HEAD $fileName 2>&1 | grep '^< Last-Modified:'| cut -d" " -f3- )" +%F" "%T) + # featureBits doesn't work with bigBeds, need to turn into bed first + bigBedToBed $fileName $TMPDIR/temp.$tableName.bed + tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tableName.bed 2>&1) + + outputCovDiff + + # Check for issues with table + raiseIssue + + rm -f $TMPDIR/temp.$tableName.bed + # Tests for all other bigBed based autopushed tracks (assuming they don't use remote bigBed files) + else + fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tableName" $db) + # Get table update time from beta + tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T") + # featureBits doesn't work with bigBeds, need to turn into bed first + ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $TMPDIR/temp.$tbl.bed + tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tbl.bed 2>&1) + + outputCovDiff + + # Check for issues with table + raiseIssue + + rm -f $TMPDIR/temp.$tableName.bed + fi +# Tests for non-bigBed tracks +else + # OMIM and ISCA both have a large number of tables + if [[ $tableName == "omim" ]] || [[ $tableName == "isca" ]] + then + for tbl in $(hgsql -h mysqlbeta -Ne "SHOW TABLES LIKE '%$tableName%'" $db) # Grabs list of all omim or isca tables from beta + do + tblDate=$(hgsql -h mysqlbeta -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tbl'") + # Only some omim tables have coordinates + if [[ $tbl == "omimGene2" ]] || [[ $tbl == "omimAvSnp" ]] || [[ $tbl == "omimLocation" ]] || [[ $tableName == "isca" ]] + then + tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tbl 2>&1") + tableName=$tbl + + outputCovDiff + # Output for tables that don't contain coordinates + else + output+="$tbl\nLast updated: $tblDate\n\n" + fi + done + # Check for different issues with table + # Must be outside of for loop so as to only output one error message for the entire table set + raiseIssue + # Tests for all other table based autopushed tracks + else + tblDate=$(hgsql -h mysqlbeta -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tableName'") + tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tableName 2>&1") + + outputCovDiff + + # Check for issues with table + raiseIssue + fi +fi + +# Output results of tests +if [[ $issue == true ]] +then + if [[ $verboseMode != "" ]] # True if verboseMode is on + then + #print error message + echo -e "$issueNote" | tee $currLogFile + #print output to log file and to screen + echo -e $output | tee -a $currLogFile + else + #print error message + echo -e "$issueNote" | tee $currLogFile + #print output to log file + echo -e $output >> $currLogFile + fi +else + if [[ $verboseMode != "" ]] # True if verboseMode is on + then + #print output to log file and to screen + echo -e $output | tee $currLogFile + else + #print output to log file + echo -e $output > $currLogFile + fi +fi