2424404585a3d8562fe2e94436fe3a32bab2980b lrnassar Wed Jan 15 11:38:30 2025 -0800 Script that calculates public hub usage, given a number of pre-generated hubPublic statistics files, then drafts and sends out emails to all public hubs informing them of their hub activity over the N time period. By default the sendEmail functions are all commented out so as not to accidentally spam our users. Refs #34421 diff --git src/utils/qa/sendPublicHubAuthorsUsageStatistics.py src/utils/qa/sendPublicHubAuthorsUsageStatistics.py new file mode 100755 index 0000000..33bdbcd --- /dev/null +++ src/utils/qa/sendPublicHubAuthorsUsageStatistics.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# WARNING: This can spam user emails - be careful + +import logging, sys, optparse, os, sys, urllib3, atexit, urllib +from os.path import isfile +from email.mime.text import MIMEText +import subprocess +from subprocess import Popen, PIPE + +# Define a function to execute a shell command and capture its output +def bash(cmd): + """Run the cmd in bash subprocess""" + try: + # Run the command with subprocess.run, capturing stdout and stderr + rawBashOutput = subprocess.run(cmd, check=True, shell=True,\ + stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT) + bashStdoutt = rawBashOutput.stdout # Extract standard output + except subprocess.CalledProcessError as e: + # Handle errors by raising an exception with relevant details + raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output)) + return bashStdoutt + +# Define a function to send an email +def sendEmail(dest, body): + """Send email to dest""" + msg = MIMEText(body) # Create the email body + msg["From"] = fromEmail # Set the sender email + # Combine recipient emails (to dest and a hardcoded email) + msg["To"] = dest+","+"browserqa-group@ucsc.edu" +# msg["To"] = dest + "," + "lrnassar@ucsc.edu" #For troubleshooting + msg["Subject"] = "Your UCSC Genome Browser Hub Usage Statistics" # Email subject + # Send the email using sendmail command + p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE) + p.communicate(msg.as_bytes()) + +# Sender email +# fromEmail = "lrnassar@ucsc.edu" #For testing +fromEmail="qateam@gi.ucsc.edu" + +# Email template with placeholders for hub name and unique hits +emailTemplate = """Dear UCSC Public Hub Author, + +This is an automated email from the UCSC Genome Browser Group. + +Your public hub %s received %s unique users over the last calendar year (January 2024 - December 2024). A unique user is defined as a continuous browsing session with at least two page loads, typically unique to the web browser tab. + +Thank you for your continued support and contributions to the UCSC Genome Browser community. Your hub plays an essential role in providing valuable data to researchers and users worldwide. + +If you have any questions or need assistance, please don’t hesitate to contact us at genome-www@soe.ucsc.edu. We’re here to help. + +Warm regards, +The UCSC Genome Browser Group +""" + +# Dictionary to store hub usage statistics +myDic = {} +# Base file path for usage data +fileUrl = "/usr/local/apache/htdocs-genecats/qa/test-results/usageStats/publicHubUsageCounts/pubHubUsageCounts." +# List of monthly data files +dates = ['2024-11.txt', '2024-10.txt', '2024-09.txt', '2024-08.txt'] + +# Parse usage data from the listed files +n = 0 +for date in dates: + n += 1 + fileToOpen = fileUrl + date # Construct the file path + trackList = bash("cat " + fileToOpen).split("\n") # Read the file contents + for line in trackList: + if line and not any(line.startswith(prefix) for prefix in ["This", "individual", "\n", "assembly"]): + line = line.split("\t") # Split the line by tab + databse = line[0] # Database field + useCount = int(line[1]) # Usage count field + hubName = line[3] # Hub name field + if n == 1: + # Initialize entry for the first file + myDic[hubName] = {'useCount': useCount} + else: + # Accumulate usage counts for subsequent files + if hubName not in myDic: + myDic[hubName] = {'useCount': useCount} + else: + myDic[hubName]['useCount'] += useCount + +# Read hub public email status +hubEmailFile = bash("cat /cluster/home/qateam/cronScripts/hubPublicMailStatus.tab").split("\n") + +# Adjust usage counts and retrieve hub URLs +for key in myDic.keys(): + myDic[key]['useCount'] *= 3 # Multiply usage counts by 3 for averaging + # Query hub URL using hgsql + hubUrl = bash("""hgsql -e "select hubUrl from hubPublic where shortLabel='"""+key+"""'" hgcentraltest""").split("\n")[1] + myDic[key]['hubUrl'] = hubUrl # Save hub URL + # Match hub URL to email address in the hubEmailFile + for line in hubEmailFile: + if myDic[key]["hubUrl"] in line: + line = line.split("\t") + myDic[key]['email'] = line[1] # Save email address + +# Send emails +n = 0 +for key in myDic.keys(): + n += 1 + # Retrieve email details for each hub +# destEmail = "lrnassar@ucsc.edu" # For testing + destEmail = myDic[key]['email'] + hubName = key + uniqueHits = myDic[key]['useCount'] + emailText = emailTemplate % (hubName, uniqueHits) # Format email text +# sendEmail(destEmail, emailText) # This can be used to just go full send +# print(destEmail, emailText) # For testing + +# The section below can be used to test one email first, then uncomment the 2nd to send the rest + +# if n == 1: +# sendEmail(destEmail, emailText) # Send the first email for testing +# if n != 1: +# sendEmail(destEmail, emailText) + + if "email" not in myDic[key]: + print("Problem with: " + key) # Report missing email information + +# Print the total number of emails sent +print("Total number of emails sent: " + str(n))