a36da3aabda1ec73fdb6a35c7001e53d53f6aff3 gperez2 Sun Jul 30 21:36:20 2023 -0700 Making the draftedMessages.txt output file visible through genecats.gi.ucsc.edu, refs #29319 diff --git src/utils/qa/hubCheckDraftEmails.py src/utils/qa/hubCheckDraftEmails.py index 5403863..bc0b692 100755 --- src/utils/qa/hubCheckDraftEmails.py +++ src/utils/qa/hubCheckDraftEmails.py @@ -1,155 +1,156 @@ #!/usr/bin/env python3 # Program Header # Name: Gerardo Perez # Description: A program that parses the hubCheck output into email drafts for hub authors regarding # missing description pages and couldn't open errors # # hubCheckDraftEmails.py # # # Version: Python 3.6.5 # import os import getpass import sys import re import json import io import requests import subprocess from datetime import datetime user = getpass.getuser() def bash(cmd): """Input bash cmd and return stdout""" rawOutput = subprocess.run(cmd,check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True) return(rawOutput.stdout.split('\n')[0:-1]) #Make directories for the month (Y-M) try: os.makedirs("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m"), exist_ok=True) except OSError as e: print(f"Error creating directory: {e}") try: os.makedirs("/usr/local/apache/htdocs-genecats/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m"), exist_ok=True) except OSError as e: print(f"Error creating directory: {e}") # Creates list for the hub.txt URLs that have the error of missing description pages descPageMis=[] # Creates list for the hub.txt URLs that have the error of couldn't open couldNotOpen=[] # Gets hubCheck output output_line=bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output") lineCount=0 for line in output_line: lineCount=lineCount+1 line=str(line) if "Couldn't open" in line: #Gets each hub.txt that has the error of couldn't open couldNotOpen.append(bash("head -"+str(lineCount)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A 1 '####' | tail -1")) if "missing description page" in line: #Gets each hub.txt that has the error of missing description page descPageMis.append(bash("head -"+str(lineCount)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A 1 '####' | tail -1")) def checkDuplicates(list): """Input list to get distinct items""" newList=[] for item in list: if item not in newList: newList.append(item) return newList def stringTerm(term): """Coverts list input to string""" newString=str(term)[1:-1].replace("\'", "") return newString def getEmail(hubUrl): """Gets email from hubUrl""" email=stringTerm(bash("curl -Ls "+hubUrl+" | grep '^email' | awk '{print $2}'")) empty="" if email==empty: email="N/A <---------- Check: https://genecats.gi.ucsc.edu/qa/test-results/publicHubContactInfo/publicHubContact.html" return email count=0 # pattern for line that has number of problems pattern = r"Found (\d+) problem." with open("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/filtered_output.txt", 'a') as f: f.write("#############################################\n") # For loop that goes through each line from the hubCheck output for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output")): count=count+1 # Checks if the line has is number of problems line if stringTerm(re.findall(pattern, line)).isdigit(): if int(stringTerm(re.findall(pattern, line))) >= 6:# If the line is above the limit then write 5 lines of errors, ... and ### for l in (bash("head -"+str(count+5)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tail -7")): f.write(l+'\n') f.write('...\n') f.write("#############################################\n") else: # Else write all the errors within the limit for l in (bash("head -"+str(count+ int(stringTerm(re.findall(pattern, line))))+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tail -"+str(int(stringTerm(re.findall(pattern, line)))+2))): f.write(l+'\n') f.write("#############################################\n") emailIntro= """Dear UCSC Public Hub author, I am writing on behalf of the UCSC Genome Browser. We wanted to alert you that your public track hub at the address:""" endEmail=""" hubCheck is a command-line utility that checks files in the hub are correctly formatted. If you would like to run the hubCheck utility on your own machine, you can download the tool from the utilities directory: https://hgdownload.soe.ucsc.edu/downloads.html#utilities_downloads Please update your public track hub. If you have any questions, please let us know, and we will be happy to assist. Do not hesitate to let us know if we can help you resolve this situation, e.g. by updating the URL where the hub is hosted or possibly hosting the files on our servers. You can reach us at genome-www@soe.ucsc.edu. Thank you for your interest and contributions, The UCSC Genome Browser Group """ # Gets the total lines number from the hubCheck output totalLines=bash("wc -l /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tr ' ' '\t' | cut -f1") with open("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt", 'a') as f: f.write('##########################\n') # For loop that gets each hub.txt that has the error of missing description page for item in checkDuplicates(descPageMis): f.write('Send email to: '+getEmail(stringTerm(item))+'\n') f.write(emailIntro % item) # For loop that gets hubCheck output for each hub.txt that has the error of missing description page for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A "+stringTerm(totalLines)+" "+stringTerm(item))): if '#' not in line: f.write(line+'\n') else: break f.write("\nWhen running hubCheck "+stringTerm(item)+'\n') f.write(endEmail) f.write('##########################\n') # For loop that gets each hub.txt that has the error of couldn't open for item in checkDuplicates(couldNotOpen): f.write('Send email to: '+getEmail(stringTerm(item))+'\n') f.write(emailIntro % item) # For loop that gets hubCheck output for each hub.txt that has the error of couldn't open for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A "+stringTerm(totalLines)+" "+stringTerm(item))): if '#' not in line: f.write(line+'\n') else: break f.write("\nWhen running hubCheck "+stringTerm(item)+'\n') f.write(endEmail) f.write('##########################\n') +bash("cp /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt /usr/local/apache/htdocs-genecats/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m")) print("Check https://genecats.gi.ucsc.edu/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt to email hub authors about missing/broken public hub files") print("Archive of monthly raw data can be found here: /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m"))