a36da3aabda1ec73fdb6a35c7001e53d53f6aff3
gperez2
  Sun Jul 30 21:36:20 2023 -0700
Making the draftedMessages.txt output file visible through genecats.gi.ucsc.edu, refs #29319

diff --git src/utils/qa/hubCheckDraftEmails.py src/utils/qa/hubCheckDraftEmails.py
index 5403863..bc0b692 100755
--- src/utils/qa/hubCheckDraftEmails.py
+++ src/utils/qa/hubCheckDraftEmails.py
@@ -1,155 +1,156 @@
 #!/usr/bin/env python3
 
 # Program Header
 # Name:   Gerardo Perez
 # Description: A program that parses the hubCheck output into email drafts for hub authors regarding
 #              missing description pages and couldn't open errors
 #
 # hubCheckDraftEmails.py
 #
 #
 # Version: Python 3.6.5 
 #
 import os
 import getpass
 import sys
 import re
 import json
 import io
 import requests
 import subprocess
 from datetime import datetime
 
 user = getpass.getuser()
 
 def bash(cmd):
     """Input bash cmd and return stdout"""
     rawOutput = subprocess.run(cmd,check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True)
     return(rawOutput.stdout.split('\n')[0:-1])
 
 #Make directories for the month (Y-M) 
 try:
    os.makedirs("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m"), exist_ok=True)
 except OSError as e:
     print(f"Error creating directory: {e}")
 
 try:
    os.makedirs("/usr/local/apache/htdocs-genecats/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m"), exist_ok=True)
 except OSError as e:
     print(f"Error creating directory: {e}")
 
 # Creates list for the hub.txt URLs that have the error of missing description pages
 descPageMis=[]
 
 # Creates list for the hub.txt URLs that have the error of couldn't open
 couldNotOpen=[]
 
 # Gets hubCheck output
 output_line=bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output")
 
 lineCount=0
 for line in output_line:
     lineCount=lineCount+1
     line=str(line)
     if "Couldn't open" in line: #Gets each hub.txt that has the error of couldn't open
        couldNotOpen.append(bash("head -"+str(lineCount)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A 1 '####' | tail -1"))
     if "missing description page" in line: #Gets each hub.txt that has the error of missing description page
        descPageMis.append(bash("head -"+str(lineCount)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A 1 '####' | tail -1"))
 
  
 
 def checkDuplicates(list):
     """Input list to get distinct items"""
     newList=[]
     for item in list:
         if item not in newList:
            newList.append(item)
     return newList
 
 def stringTerm(term):
     """Coverts list input to string"""
     newString=str(term)[1:-1].replace("\'", "")
     return newString
 
 def getEmail(hubUrl):
     """Gets email from hubUrl"""
     email=stringTerm(bash("curl -Ls "+hubUrl+" | grep '^email' | awk '{print $2}'"))
     empty=""
     if email==empty:
        email="N/A <---------- Check: https://genecats.gi.ucsc.edu/qa/test-results/publicHubContactInfo/publicHubContact.html"
     return email
 
 count=0
 # pattern for line that has number of problems
 pattern = r"Found (\d+) problem."
 with open("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/filtered_output.txt", 'a') as f:
     f.write("#############################################\n")
     # For loop that goes through each line from the hubCheck output
     for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output")):
        count=count+1
        # Checks if the line has is number of problems line
        if stringTerm(re.findall(pattern, line)).isdigit():
           if int(stringTerm(re.findall(pattern, line))) >= 6:# If the line is above the limit then write 5 lines of errors, ... and ###
              for l in (bash("head -"+str(count+5)+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tail -7")):
                  f.write(l+'\n')
              f.write('...\n')
              f.write("#############################################\n")
           else: # Else write all the errors within the limit
              for l in (bash("head -"+str(count+ int(stringTerm(re.findall(pattern, line))))+" /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tail -"+str(int(stringTerm(re.findall(pattern, line)))+2))):
                 f.write(l+'\n')
              f.write("#############################################\n")
 
 
 emailIntro= """Dear UCSC Public Hub author,
 
 I am writing on behalf of the UCSC Genome Browser. We wanted to alert you that your
 public track hub at the address:"""
 
 
 endEmail="""
 hubCheck is a command-line utility that checks files in the hub are correctly formatted. If you
 would like to run the hubCheck utility on your own machine, you can download the tool from the
 utilities directory: 
 https://hgdownload.soe.ucsc.edu/downloads.html#utilities_downloads
 
 Please update your public track hub. If you have any questions, please let us know, and we will be
 happy to assist. Do not hesitate to let us know if we can help you resolve this situation, e.g. by
 updating the URL where the hub is hosted or possibly hosting the files on our servers.
 
 You can reach us at genome-www@soe.ucsc.edu.
 
 Thank you for your interest and contributions,
 The UCSC Genome Browser Group
 """
 
 # Gets the total lines number from the hubCheck output 
 totalLines=bash("wc -l /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | tr ' ' '\t' | cut -f1")
 with open("/hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt", 'a') as f:
      f.write('##########################\n')
      # For loop that gets each hub.txt that has the error of missing description page
      for item in checkDuplicates(descPageMis):
              f.write('Send email to: '+getEmail(stringTerm(item))+'\n')
              f.write(emailIntro % item)
              # For loop that gets hubCheck output for each hub.txt that has the error of missing description page
              for line in (bash("cat /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A "+stringTerm(totalLines)+" "+stringTerm(item))):
                 if '#' not in line:
                    f.write(line+'\n')
                 else: break
              f.write("\nWhen running hubCheck "+stringTerm(item)+'\n')
              f.write(endEmail)
              f.write('##########################\n')
      # For loop that gets each hub.txt that has the error of couldn't open 
      for item in checkDuplicates(couldNotOpen):
              f.write('Send email to: '+getEmail(stringTerm(item))+'\n')
              f.write(emailIntro % item)
              # For loop that gets hubCheck output for each hub.txt that has the error of couldn't open
              for line in (bash("cat  /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/hubCheck_output | grep -A "+stringTerm(totalLines)+" "+stringTerm(item))):
                 if '#' not in line:
                    f.write(line+'\n')
                 else: break
              f.write("\nWhen running hubCheck "+stringTerm(item)+'\n')
              f.write(endEmail)
              f.write('##########################\n')
 
+bash("cp /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt /usr/local/apache/htdocs-genecats/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m"))
 print("Check https://genecats.gi.ucsc.edu/qa/test-results/hubCheckCron/"+datetime.now().strftime("%Y-%m")+"/draftedMessages.txt to email hub authors about missing/broken public hub files")
 print("Archive of monthly raw data can be found here: /hive/users/qateam/hubCheckCronArchive/"+datetime.now().strftime("%Y-%m"))