7e8c8145c41f37a949183d56736df53d575f7936 mspeir Wed Feb 18 11:25:28 2015 -0800 Fixing output for multi-table tracks like isca. Fixing issues with if statement where it would fail if there was more that one prevLogFile diff --git src/utils/qa/qaAutoTrack.sh src/utils/qa/qaAutoTrack.sh index d3d107b..94bc284 100755 --- src/utils/qa/qaAutoTrack.sh +++ src/utils/qa/qaAutoTrack.sh @@ -1,283 +1,288 @@ #!/bin/bash # quit if something within the script fails set -beEu -o pipefail source `which qaConfig.bash` umask 002 ################################ # # 02-13-2015 # Matthew Speir # # qaAutoTrack.sh # Performs basic QA for auto-pushed tracks, which includes: # - Checks when data for track was last updated # - Coverage from featureBits -countGaps # - Percentage difference in coverage between now and the last time the script was run # ################################ ##### Variables ##### # Set by command-line options db="" tableName="" bigBedMode="" verboseMode="" # Other variables currDate=$(date --rfc-3339=date) output="" # holds output message logUrl="http://genecats.cse.ucsc.edu/qa/test-results/qaAutoTrack" logDir="/usr/local/apache/htdocs-genecats/qa/test-results/qaAutoTrack" currLogFile="" prevLogFile="" prevLogDate="" # Variables for issue checking maxChange=0.1000 issue=false issueNote="" tooOld="" percentDiff="" # Usage message as variable usage=" Performs basic QA for auto-pushed tracks, which includes: - Checks when data for track was last updated - Coverage from featureBits -countGaps - Percentage difference in coverage between now and the last time the script was run Usage: $0 database tableName [bigBed] [verbose] Notes: Use 'bigBed' for tracks supported by bigBed files. For OMIM, ISCA, or ClinVar tracks use omim, isca, or clinvar as the table name. Can only be run once for each database/track pair per day. " ##### Functions ##### # Output function function outputCovDiff { if [[ $prevLogFile != "" ]] then # get info needed for diff rawCount=$(echo $tblCov | awk '{print $1}') prevCov=$(egrep -A2 "^$tableName" $prevLogFile | grep "^Coverage New" | cut -d" " -f3-) rawCountPrev=$(echo $prevCov | awk '{print $1}') # Calculate diff between new and old coverage rawCountDiff=$(echo $(expr $rawCount - $rawCountPrev)|tr -d -) rawCountAvg=$(expr $rawCount / 2 + $rawCountPrev / 2) percentDiff=$(awk -v rcd=$rawCountDiff -v rca=$rawCountAvg 'BEGIN{print rcd / rca}') # Build output string output+="$tableName\nLast updated: $tblDate\nCoverage New: $tblCov\nCoverage Old: $prevCov\nCoverage Diff: $(awk -v pd=$percentDiff 'BEGIN{print pd * 100}')%\n\n" else output+="$tableName\nLast updated: $tblDate\nCoverage New: $tblCov\n\n" fi } # Function to raise errors function raiseIssue { # Raises an error if it's been too long since last update if [ $(date -d "$tblDate" +%s) -le $(date -d "$tooOld" +%s) ] then issue=true issueNote+="$tableName has not been updated since $tblDate, see $logUrl/$db.$tableName.$currDate.txt for more details\n" fi # Raises error if coverage diff between versions is too large if [[ "$percentDiff" != "" ]] && [[ "$percentDiff" > "$maxChange" ]] then issue=true issueNote+="Large coverage diff for $tableName, see $logUrl/$db.$tableName.$currDate.txt for more details\n" fi } ##### Parse command-line input ##### # print usage if (( $# < 2 )) || (( $# > 4 )) then echo -e "$usage" exit 1 # set required variables else db="$1" tableName="$2" fi # Setting optional arguments if [ $# == 3 ] then bigBedMode=$3 if [[ $bigBedMode != "bigBed" ]] then verboseMode=$3 bigBedMode="" # error if 3rd isn't one of two optional args if [[ $bigBedMode == "" ]] && [[ $verboseMode != "verbose" ]] then echo -e "$usage" exit 1 fi fi fi if [ $# == 4 ] then bigBedMode=$3 verboseMode=$4 # error if 4th arg isn't verbose if [[ $verboseMode != "verbose" ]] then echo -e "$usage" exit 1 fi fi ##### Main Program ##### # set currLogFile currLogFile="$logDir"/"$db.$tableName.$currDate.txt" -# look for previous log file -if [ -e $logDir/$db.$tableName.*.txt ] -then +# set info for prevLog prevLogDate+=$(ls -Llt --time-style long-iso $logDir|grep -v total|egrep -m 1 -oh "$db\.$tableName\.[0-9]{4}-[0-9]{2}-[0-9]{2}"|sed -e "s/$db\.$tableName\.//g") +if [ -e $logDir/$db.$tableName.$prevLogDate.txt ] +then prevLogFile="$logDir"/"$db.$tableName.$prevLogDate.txt" fi # Can't run twice in one day as it messes up the "Coverage Old" output if [[ $currDate == $prevLogDate ]] then echo -e "Previous log date is the same as today's date, $currDate" exit 1 fi # Set tooOld for different tables if [[ $tableName == clinvar ]] || [[ $tableName == grcIndcidentDb ]] then tooOld=$(date -d "$currDate - 1 month" +%F) else tooOld=$(date -d "$currDate - 15 days" +%F) fi # Run tests for different tracks if [[ $bigBedMode == "bigBed" ]] then # ClinVar has muliple tables if [[ $tableName == "clinvar" ]] then for tbl in clinvarMain clinvarCnv do # Get file name from beta fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tbl" $db) # Get table update time from beta tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T") # featureBits doesn't work with bigBeds, need to turn into bed first ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $TMPDIR/temp.$tbl.bed tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tbl.bed 2>&1) # outoutCovDiff function needs variable tableName tableName=$tbl outputCovDiff # Check for issues with table raiseIssue rm -f $TMPDIR/temp.$tbl.bed done # GRC Incident track relies on remote file so curl must be used instead of stat elif [[ $tableName == "grcIncidentDb" ]] then fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tableName" $db) # Use curl to get update time on file tblDate=$(date -d "$(curl -s -v -X HEAD $fileName 2>&1 | grep '^< Last-Modified:'| cut -d" " -f3- )" +%F" "%T) # featureBits doesn't work with bigBeds, need to turn into bed first bigBedToBed $fileName $TMPDIR/temp.$tableName.bed tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tableName.bed 2>&1) outputCovDiff # Check for issues with table raiseIssue rm -f $TMPDIR/temp.$tableName.bed # Tests for all other bigBed based autopushed tracks (assuming they don't use remote bigBed files) else fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tableName" $db) # Get table update time from beta tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T") # featureBits doesn't work with bigBeds, need to turn into bed first ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $TMPDIR/temp.$tbl.bed tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tbl.bed 2>&1) outputCovDiff # Check for issues with table raiseIssue rm -f $TMPDIR/temp.$tableName.bed fi # Tests for non-bigBed tracks else # OMIM and ISCA both have a large number of tables if [[ $tableName == "omim" ]] || [[ $tableName == "isca" ]] then for tbl in $(hgsql -h mysqlbeta -Ne "SHOW TABLES LIKE '%$tableName%'" $db) # Grabs list of all omim or isca tables from beta do tblDate=$(hgsql -h mysqlbeta -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tbl'") # Only some omim tables have coordinates if [[ $tbl == "omimGene2" ]] || [[ $tbl == "omimAvSnp" ]] || [[ $tbl == "omimLocation" ]] || [[ $tableName == "isca" ]] then tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tbl 2>&1") + # temporary holder so we don't loose original input tableName + tableNameTemp=$tableName + # set tableName to tbl temporarily so we can use one output function for all tables tableName=$tbl outputCovDiff + # reset tableName to original name + tableName=$tableNameTemp # Output for tables that don't contain coordinates else output+="$tbl\nLast updated: $tblDate\n\n" fi done # Check for different issues with table # Must be outside of for loop so as to only output one error message for the entire table set raiseIssue # Tests for all other table based autopushed tracks else tblDate=$(hgsql -h mysqlbeta -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tableName'") tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tableName 2>&1") outputCovDiff # Check for issues with table raiseIssue fi fi # Output results of tests if [[ $issue == true ]] then if [[ $verboseMode != "" ]] # True if verboseMode is on then #print error message echo -e "$issueNote" | tee $currLogFile #print output to log file and to screen echo -e $output | tee -a $currLogFile else #print error message echo -e "$issueNote" | tee $currLogFile #print output to log file echo -e $output >> $currLogFile fi else if [[ $verboseMode != "" ]] # True if verboseMode is on then #print output to log file and to screen echo -e $output | tee $currLogFile else #print output to log file echo -e $output > $currLogFile fi fi