4c0e72e76585dbab832741abb0af4797bdc8efe2
mspeir
  Fri Mar 13 09:03:55 2015 -0700
Changes based on Code Review, refs #14895

diff --git src/utils/qa/qaAutoTrack.sh src/utils/qa/qaAutoTrack.sh
index 94bc284..44a2331 100755
--- src/utils/qa/qaAutoTrack.sh
+++ src/utils/qa/qaAutoTrack.sh
@@ -1,288 +1,280 @@
 #!/bin/bash
 # quit if something within the script fails
 set -beEu -o pipefail
 source `which qaConfig.bash`
-
+export HGDB_CONF=$HOME/.hg.conf.beta
 umask 002
 
 ################################
 #
 #  02-13-2015
 #  Matthew Speir
 #
 #  qaAutoTrack.sh
 #  Performs basic QA for auto-pushed tracks, which includes:
 #  - Checks when data for track was last updated
 #  - Coverage from featureBits -countGaps
 #  - Percentage difference in coverage between now and the last time the script was run
 #
 ################################
 
 ##### Variables #####
 # Set by command-line options
 db=""
 tableName=""
 bigBedMode=""
 verboseMode=""
 
 # Other variables
-currDate=$(date --rfc-3339=date)
+currDate=$(date +%F)
 output="" # holds output message
 logUrl="http://genecats.cse.ucsc.edu/qa/test-results/qaAutoTrack"
 logDir="/usr/local/apache/htdocs-genecats/qa/test-results/qaAutoTrack"
 currLogFile=""
 prevLogFile=""
 prevLogDate=""
 
 # Variables for issue checking
-maxChange=0.1000
-issue=false
 issueNote=""
 tooOld=""
 percentDiff=""
 
-# Usage message as variable
-usage="
-Performs basic QA for auto-pushed tracks, which includes:
-- Checks when data for track was last updated
-- Coverage from featureBits -countGaps
-- Percentage difference in coverage between now and the last time the script was run
-
-Usage: $0 database tableName [bigBed] [verbose]
-
-Notes:
-	Use 'bigBed' for tracks supported by bigBed files.
-	For OMIM, ISCA, or ClinVar tracks use omim, isca, or clinvar as the table name.
-	Can only be run once for each database/track pair per day.
-"
 ##### Functions #####
 
 # Output function
-function outputCovDiff {
-	if [[ $prevLogFile != "" ]]
+function outputCovDiff () {
+	# four positional arguments. $1 == prevLogFile. $2 == tblCov. $3 == tableName. $4 == tblDate.
+	if [[ $1 != "" ]] # Check for previous log file. True if file exists.
 	then
 		# get info needed for diff
-		rawCount=$(echo $tblCov | awk '{print $1}')
-		prevCov=$(egrep -A2 "^$tableName" $prevLogFile | grep "^Coverage New" | cut -d" " -f3-)
+		rawCount=$(echo $2 | awk '{print $1}')
+		prevCov=$(egrep -A2 "^$3" $1 | grep "^Coverage New" | cut -d" " -f3-) # Grabs coverage from previous log file.
 		rawCountPrev=$(echo $prevCov | awk '{print $1}')
 
 		# Calculate diff between new and old coverage
 		rawCountDiff=$(echo $(expr $rawCount - $rawCountPrev)|tr -d -)
 		rawCountAvg=$(expr $rawCount / 2 + $rawCountPrev / 2)
-		percentDiff=$(awk -v rcd=$rawCountDiff -v rca=$rawCountAvg 'BEGIN{print rcd / rca}')
+		percentDiff=$(awk -v rcd=$rawCountDiff -v rca=$rawCountAvg 'BEGIN{print 100 * rcd / rca}')
 
 		# Build output string
-		output+="$tableName\nLast updated: $tblDate\nCoverage New: $tblCov\nCoverage Old: $prevCov\nCoverage Diff: $(awk -v pd=$percentDiff 'BEGIN{print pd * 100}')%\n\n"
+		output+="$3\nLast updated: $4\nCoverage New: $2\nCoverage Old: $prevCov\nCoverage Diff: $percentDiff%\n\n"
 	else
-		output+="$tableName\nLast updated: $tblDate\nCoverage New: $tblCov\n\n"
+		output+="$3\nLast updated: $4\nCoverage New: $2\n\n"
 	fi
 }
 
 # Function to raise errors
-function raiseIssue {
+function checkForIssues () {
+	# four positional arguments. $1 == tblDate. $2 == tooOld. $3 == tableName. $4 == precentDiff.
 	# Raises an error if it's been too long since last update
-	if [ $(date -d "$tblDate" +%s) -le $(date -d "$tooOld" +%s) ]
+	if [ $(date -d "$1" +%s) -le $(date -d "$2" +%s) ]
 	then
-		issue=true
-		issueNote+="$tableName has not been updated since $tblDate, see $logUrl/$db.$tableName.$currDate.txt for more details\n"
+		issueNote+="$3 has not been updated since $1, see $logUrl/$db.$tableName.$currDate.txt for more details\n"
 	fi
 
-	# Raises error if coverage diff between versions is too large
-	if [[ "$percentDiff" != "" ]] && [[ "$percentDiff" > "$maxChange" ]]
+	# Raises error if coverage diff between track versions is too large
+	if [[ "$4" != "" ]] && [[ $(echo $4 >= 10 | bc) -eq 1 ]]
 	then
-		issue=true
-		issueNote+="Large coverage diff for $tableName, see $logUrl/$db.$tableName.$currDate.txt for more details\n"
+		issueNote+="Large coverage diff for $3, see $logUrl/$db.$tableName.$currDate.txt for more details\n"
 	fi
 }
 
 ##### Parse command-line input #####
 
-# print usage
-if (( $# < 2 )) || (( $# > 4 ))
-then
-	echo -e "$usage"
-	exit 1
-# set required variables
-else
-	db="$1"
-	tableName="$2"
-fi
+# Define function for showing usage message
+showHelp() {
+cat << EOF
+Usage: $0 [-hbv] [-d DATABASE] [-t TABLENAME]
 
-# Setting optional  arguments
-if [ $# ==  3 ]
-then
-	bigBedMode=$3
-	if [[ $bigBedMode != "bigBed" ]]
-	then
-		verboseMode=$3
-		bigBedMode=""
-		# error if 3rd isn't one of two optional args
-		if [[ $bigBedMode == "" ]] && [[ $verboseMode != "verbose" ]]
-		then
-			echo -e "$usage"
-			exit 1
-		fi
-  	fi
-fi
+	-h		Display this help and exit
+	-d DATABASE	UCSC database name, i.e. hg19 or hg38.
+	-t TABLENAME	Table name, i.e. gwasCatalog.
+	-b		BigBed mode. Used for tracks supported
+			bigBed files, i.e. grcIncidentDb.
+	-v		Verbose mode. Outputs test results to
+			standard out as well as file.
 
-if [ $# == 4 ]
-then
-	bigBedMode=$3
-	verboseMode=$4
-	# error if 4th arg isn't verbose
-	if [[ $verboseMode != "verbose" ]]
-	then
-		echo -e "$usage"
+Performs basic QA for auto-pushed tracks, which includes:
+- Checks when data for track was last updated
+- Coverage from featureBits -countGaps
+- Percentage difference in coverage between now and the last time the script was run
+
+Notes:
+	- For OMIM, ISCA, or ClinVar tracks use omim, isca, or clinvar as the table name.
+	- Can only be run once for each database/track pair per day.
+
+EOF
+}
+
+OPTIND=1 # Reset is necessary if getopts was used previously in the script.  It is a good idea to make this local in a function.
+while getopts "hd:t:bv" opt
+do
+	case $opt in
+		h)
+			showHelp
+			exit 0
+			;;
+		d)
+			db=$OPTARG
+			;;
+		t)
+			tableName=$OPTARG
+			;;
+		v)
+			verboseMode="on"
+			;;
+		b)
+			bigBedMode="on"
+			;;
+		'?')
+			show_help >&2
 			exit 1
-	fi
-fi
+			;;
+	esac
+done
+shift "$((OPTIND-1))" # Shift off the options and optional --.
 
 ##### Main Program #####
 
 # set currLogFile
-currLogFile="$logDir"/"$db.$tableName.$currDate.txt"
+currLogFile="$logDir/$db.$tableName.$currDate.txt"
 
 # set info for prevLog
-prevLogDate+=$(ls -Llt --time-style long-iso $logDir|grep -v total|egrep -m 1 -oh "$db\.$tableName\.[0-9]{4}-[0-9]{2}-[0-9]{2}"|sed -e "s/$db\.$tableName\.//g")
+prevLogDate=$(ls -Lt $logDir | sed -n /$db.$tableName/p | head -1 | awk -F . '{print $3}')
+
 if [ -e $logDir/$db.$tableName.$prevLogDate.txt ]
 then
-	prevLogFile="$logDir"/"$db.$tableName.$prevLogDate.txt"
+	prevLogFile="$logDir/$db.$tableName.$prevLogDate.txt"
 fi
 
 # Can't run twice in one day as it messes up the "Coverage Old" output
-if [[ $currDate == $prevLogDate ]]
+if [[ "$currDate" == "$prevLogDate" ]]
 then
 	echo -e "Previous log date is the same as today's date, $currDate"
 	exit 1
 fi
 
 # Set tooOld for different tables
-if  [[ $tableName == clinvar ]] || [[ $tableName == grcIndcidentDb ]]
+if  [[ "$tableName" == "clinvar" ]] || [[ "$tableName" == "grcIndcidentDb" ]]
 then 
 	tooOld=$(date -d "$currDate - 1 month" +%F)
 else
 	tooOld=$(date -d "$currDate - 15 days" +%F)
 fi
 
 # Run tests for different tracks 
-if [[ $bigBedMode == "bigBed" ]]
+if [[ $bigBedMode == "on" ]]
 then
 	# ClinVar has muliple tables
 	if [[ $tableName == "clinvar" ]]
 	then
 		for tbl in clinvarMain clinvarCnv
 		do
 			# Get file name from beta
-			fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tbl" $db)
+			fileName=$(hgsql -Ne "SELECT * FROM $tbl LIMIT 1" $db)
 			# Get table update time from beta
 			tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T")
+			MYTEMPFILE=$(mktemp --tmpdir tmp.XXXXXXXXXX.bed)
 			# featureBits doesn't work with bigBeds, need to turn into bed first
-			ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $TMPDIR/temp.$tbl.bed
-			tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tbl.bed 2>&1)
-			# outoutCovDiff function needs variable tableName
+			ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $MYTEMPFILE
+			tblCov=$(featureBits -countGaps $db $MYTEMPFILE 2>&1)
+
+			# temporary holder so we don't loose original input tableName
+			tableNameTemp=$tableName
+			# set tableName to tbl temporarily so we can use one output function for all tables
 			tableName=$tbl
 
-			outputCovDiff
+			outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate"
+			# reset tableName to original name
+			tableName=$tableNameTemp
 
 			# Check for issues with table
-			raiseIssue
+			checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff"
 
-			rm -f $TMPDIR/temp.$tbl.bed
+			rm -f $MYTEMPFILE
 		done
 	# GRC Incident track relies on remote file so curl must be used instead of stat
 	elif [[ $tableName == "grcIncidentDb" ]]
 	then
-		fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tableName" $db)
+		fileName=$(hgsql -Ne "SELECT * FROM $tableName LIMIT 1" $db)
 		# Use curl to get update time on file
-		tblDate=$(date -d "$(curl -s -v -X HEAD $fileName 2>&1 | grep '^< Last-Modified:'| cut -d" " -f3- )" +%F" "%T)
+		tblDate=$(date -d "$(curl -s -v -I $fileName 2>&1 | grep '^< Last-Modified:'| cut -d ' ' -f3- )" '+%F %T')
+		MYTEMPFILE=$(mktemp --tmpdir tmp.XXXXXXXXXX.bed)
 		# featureBits doesn't work with bigBeds, need to turn into bed first
-		bigBedToBed $fileName $TMPDIR/temp.$tableName.bed
-		tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tableName.bed 2>&1)
-
-		outputCovDiff
+		bigBedToBed $fileName $MYTEMPFILE
+		tblCov=$(featureBits -countGaps $db $MYTEMPFILE 2>&1)
 
+		outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate"
 		# Check for issues with table
-		raiseIssue
+		checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff"
 			
-		rm -f $TMPDIR/temp.$tableName.bed
+		rm -f $MYTEMPFILE
 	# Tests for all other bigBed based autopushed tracks (assuming they don't use remote bigBed files)
 	else
-		fileName=$(hgsql -h mysqlbeta -Ne "SELECT * FROM $tableName" $db)
+		fileName=$(hgsql -Ne "SELECT * FROM $tableName LIMIT 1" $db)
 		# Get table update time from beta
 		tblDate=$(ssh qateam@hgwbeta "date -d '$(stat -Lc '%y' $fileName)' +%F' '%T")
+		MYTEMPFILE=$(mktemp --tmpdir tmp.XXXXXXXXXX.bed)
 		# featureBits doesn't work with bigBeds, need to turn into bed first
-		ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $TMPDIR/temp.$tbl.bed
-		tblCov=$(featureBits -countGaps $db $TMPDIR/temp.$tbl.bed 2>&1)
+		ssh qateam@hgwbeta "/usr/local/apache/cgi-bin/utils/bigBedToBed $fileName stdout" > $MYTEMPFILE
+		tblCov=$(featureBits -countGaps $db $MYTEMPFILE 2>&1)
 
-		outputCovDiff
+		outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate"
 	
 		# Check for issues with table
-		raiseIssue
+		checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff"
 
-		rm -f $TMPDIR/temp.$tableName.bed
+		rm -f $MYTEMPFILE
 	fi
 # Tests for non-bigBed tracks
 else
 	# OMIM and ISCA both have a large number of tables
 	if [[ $tableName == "omim" ]] || [[ $tableName == "isca" ]]
 	then
-		for tbl in $(hgsql -h mysqlbeta -Ne "SHOW TABLES LIKE '%$tableName%'" $db) # Grabs list of all omim or isca tables from beta
+		for tbl in $(hgsql -Ne "SHOW TABLES LIKE '%$tableName%'" $db) # Grabs list of all omim or isca tables from beta
         	do
-	               	tblDate=$(hgsql -h mysqlbeta -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tbl'")
+			tblDate=$(hgsql -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tbl'")
 			# Only some omim tables have coordinates
                 	if [[ $tbl == "omimGene2" ]] || [[ $tbl == "omimAvSnp" ]] || [[ $tbl == "omimLocation" ]] || [[ $tableName == "isca" ]]
                 	then
                         	tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tbl 2>&1")
 				# temporary holder so we don't loose original input tableName
 				tableNameTemp=$tableName
 				# set tableName to tbl temporarily so we can use one output function for all tables
 				tableName=$tbl
 
-				outputCovDiff
+				outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate"
 				# reset tableName to original name
 				tableName=$tableNameTemp
 			# Output for tables that don't contain coordinates
 			else
 				output+="$tbl\nLast updated: $tblDate\n\n"
 			fi
 		done
 		# Check for different issues with table
 		# Must be outside of for loop so as to only output one error message for the entire table set	
-		raiseIssue
+		checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff"
 	# Tests for all other table based autopushed tracks
 	else
-		tblDate=$(hgsql -h mysqlbeta -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tableName'")
+		tblDate=$(hgsql -Ne "SELECT UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA='$db' AND TABLE_NAME='$tableName'")
 		tblCov=$(ssh qateam@hgwbeta "featureBits -countGaps $db $tableName 2>&1")
 
-		outputCovDiff
+		outputCovDiff "$prevLogFile" "$tblCov" "$tableName" "$tblDate"
 
 		# Check for issues with table
-		raiseIssue
+		checkForIssues "$tblDate" "$tooOld" "$tableName" "$percentDiff"
 	fi
 fi
 
 # Output results of tests
-if [[ $issue == true ]]
+if [[ $issueNote != "" ]]
 then
+	echo -e $issueNote | tee $currLogFile
+fi
+
 if [[ $verboseMode != "" ]] # True if verboseMode is on
 then
-		#print error message
-		echo -e "$issueNote" | tee $currLogFile
-		#print output to log file and to screen
 	echo -e $output | tee -a $currLogFile
 else
-		#print error message	
-		echo -e "$issueNote" | tee $currLogFile
-		#print output to log file
 	echo -e $output >> $currLogFile
 fi
-else
-	if [[ $verboseMode != "" ]] # True if verboseMode is on
-	then
-		#print output to log file and to screen
-		echo -e $output | tee $currLogFile
-	else
-		#print output to log file
-		echo -e $output > $currLogFile
-	fi
-fi