68c9515cb37ea293c7eeb106c8761ed1daea644b gperez2 Wed Jun 21 12:15:44 2023 -0700 Adding cronjob scripts to run hubCheck for Public Hubs, refs #29319 diff --git src/utils/qa/hubCheckDraftEmails.py src/utils/qa/hubCheckDraftEmails.py new file mode 100755 index 0000000..5403863 --- /dev/null +++ src/utils/qa/hubCheckDraftEmails.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +# Program Header +# Name: Gerardo Perez +# Description: A program that parses the hubCheck output into email drafts for hub authors regarding +# missing description pages and couldn't open errors +# +# hubCheckDraftEmails.py +# +# +# Version: Python 3.6.5 +# +import os +import getpass +import sys +import re +import json +import io +import requests +import subprocess +from datetime import datetime + +user = getpass.getuser() + +def bash(cmd): + """Input bash cmd and return stdout""" + rawOutput = subprocess.run(cmd,check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True) + return(rawOutput.stdout.split('\n')[0:-1]) + +#Make directories for the month (Y-M) +try: + os.makedirs("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m"), exist_ok=True) +except OSError as e: + print(f"Error creating directory: {e}") + +try: + os.makedirs("/usr/local/apache/htdocs-genecats/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m"), exist_ok=True) +except OSError as e: + print(f"Error creating directory: {e}") + +# Creates list for the hub.txt URLs that have the error of missing description pages +descPageMis=[] + +# Creates list for the hub.txt URLs that have the error of couldn't open +couldNotOpen=[] + +# Gets hubCheck output +output_line=bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output") + +lineCount=0 +for line in output_line: + lineCount=lineCount+1 + line=str(line) + if "Couldn't open" in line: #Gets each hub.txt that has the error of couldn't open + couldNotOpen.append(bash("head -"+str(lineCount)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A 1 '####' | tail -1")) + if "missing description page" in line: #Gets each hub.txt that has the error of missing description page + descPageMis.append(bash("head -"+str(lineCount)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A 1 '####' | tail -1")) + + + +def checkDuplicates(list): + """Input list to get distinct items""" + newList=[] + for item in list: + if item not in newList: + newList.append(item) + return newList + +def stringTerm(term): + """Coverts list input to string""" + newString=str(term)[1:-1].replace("\'", "") + return newString + +def getEmail(hubUrl): + """Gets email from hubUrl""" + email=stringTerm(bash("curl -Ls "+hubUrl+" | grep '^email' | awk '{print $2}'")) + empty="" + if email==empty: + email="N/A <---------- Check: https://genecats.gi.ucsc.edu/qa/test-results/publicHubContactInfo/publicHubContact.html" + return email + +count=0 +# pattern for line that has number of problems +pattern = r"Found (\d+) problem." +with open("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/filtered_output.txt", 'a') as f: + f.write("#############################################\n") + # For loop that goes through each line from the hubCheck output + for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output")): + count=count+1 + # Checks if the line has is number of problems line + if stringTerm(re.findall(pattern, line)).isdigit(): + if int(stringTerm(re.findall(pattern, line))) >= 6:# If the line is above the limit then write 5 lines of errors, ... and ### + for l in (bash("head -"+str(count+5)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tail -7")): + f.write(l+'\n') + f.write('...\n') + f.write("#############################################\n") + else: # Else write all the errors within the limit + for l in (bash("head -"+str(count+ int(stringTerm(re.findall(pattern, line))))+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tail -"+str(int(stringTerm(re.findall(pattern, line)))+2))): + f.write(l+'\n') + f.write("#############################################\n") + + +emailIntro= """Dear UCSC Public Hub author, + +I am writing on behalf of the UCSC Genome Browser. We wanted to alert you that your +public track hub at the address:""" + + +endEmail=""" +hubCheck is a command-line utility that checks files in the hub are correctly formatted. If you +would like to run the hubCheck utility on your own machine, you can download the tool from the +utilities directory: +https://hgdownload.soe.ucsc.edu/downloads.html#utilities_downloads + +Please update your public track hub. If you have any questions, please let us know, and we will be +happy to assist. Do not hesitate to let us know if we can help you resolve this situation, e.g. by +updating the URL where the hub is hosted or possibly hosting the files on our servers. + +You can reach us at genome-www@soe.ucsc.edu. + +Thank you for your interest and contributions, +The UCSC Genome Browser Group +""" + +# Gets the total lines number from the hubCheck output +totalLines=bash("wc -l /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tr ' ' '\t' | cut -f1") +with open("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt", 'a') as f: + f.write('##########################\n') + # For loop that gets each hub.txt that has the error of missing description page + for item in checkDuplicates(descPageMis): + f.write('Send email to: '+getEmail(stringTerm(item))+'\n') + f.write(emailIntro % item) + # For loop that gets hubCheck output for each hub.txt that has the error of missing description page + for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A "+stringTerm(totalLines)+" "+stringTerm(item))): + if '#' not in line: + f.write(line+'\n') + else: break + f.write("\nWhen running hubCheck "+stringTerm(item)+'\n') + f.write(endEmail) + f.write('##########################\n') + # For loop that gets each hub.txt that has the error of couldn't open + for item in checkDuplicates(couldNotOpen): + f.write('Send email to: '+getEmail(stringTerm(item))+'\n') + f.write(emailIntro % item) + # For loop that gets hubCheck output for each hub.txt that has the error of couldn't open + for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A "+stringTerm(totalLines)+" "+stringTerm(item))): + if '#' not in line: + f.write(line+'\n') + else: break + f.write("\nWhen running hubCheck "+stringTerm(item)+'\n') + f.write(endEmail) + f.write('##########################\n') + +print("Check https://genecats.gi.ucsc.edu/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt to email hub authors about missing/broken public hub files") +print("Archive of monthly raw data can be found here: /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m"))