18969d7b729d4b72ea555cbb7d29a74960d91cfe lrnassar Wed May 24 14:43:45 2023 -0700 Removing the script which now lives under utis/qa, Gera made the addition and I deleted. No RM. diff --git src/utils/hubPublicMail src/utils/hubPublicMail deleted file mode 100755 index 3c71740..0000000 --- src/utils/hubPublicMail +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env python3 - -import logging, sys, optparse, os, requests, sys, urllib3, atexit, urllib -from os.path import isfile -from email.mime.text import MIMEText -from subprocess import Popen, PIPE - -# makes sure that only one instance of the program runs at the same time -lockFname = None - -# switch off insecure SSL warnings -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -fromEmail="genome-www@soe.ucsc.edu" - -emailTemplate = """Dear UCSC Public Hub author, - -This is an automated email sent by the UCSC Genome Browser Group's hubPublicMail system to alert you -that your public track hub at the address: - -%s - -Has been inaccessible for at least 48 hours. If it continues to be offline, we may need to remove it -from our public hubs list at https://genome.ucsc.edu/cgi-bin/hgHubConnect - -Do not hesitate to let us know if we can help you resolve this situation, e.g. by updating the URL -where the hub is hosted or possibly hosting the files on our servers. - -You can reach us at genome-www@soe.ucsc.edu. - -Thank you for your interest and contributions, -The UCSC Genome Browser Group -""" -# ==== functions ===== - -def parseArgs(): - " setup logging, parse command line arguments and options. -h shows auto-generated help page " - parser = optparse.OptionParser("""usage: %prog [options] hgcentralname statusFile - send email if public hub is down - Goes through the following steps: - 1) Get the list of all public hubs - 2) Try to get their URLs and write all to statusFile. - 3) If a hub fails, increase count in statusFile. - 4) If count is > 24, send an email to hub email and set the failCount to -48. - - Example: - hubPublicMail hgcentraltest /tmp/hubPublicStatus.tab - """) - - parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages") - #parser.add_option("-f", "--file", dest="file", action="store", help="run on file") - (options, args) = parser.parse_args() - - if args==[]: - parser.print_help() - exit(1) - - if options.debug: - logging.basicConfig(level=logging.DEBUG) - logging.getLogger().setLevel(logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) - logging.getLogger().setLevel(logging.INFO) - - return args, options -# ----------- main -------------- -def getHubUrls(centralName): - " return list of huburls " - logging.debug("Getting hubUrls from db %s" % centralName) - if centralName == 'hgcentral': - cmd = "hgsql -h genome-centdb %s -se 'select hubUrl from hubPublic'" % centralName - else: - cmd = "hgsql %s -se 'select hubUrl from hubPublic'" % centralName - lines = os.popen(cmd).read().splitlines() # please do not suggest using subprocess in code review. Thx. - return lines - -def parseEmail(s): - " return only email address given email value of hub.txt. handles these weird cases " - # jferna10@ucsc.edu or max@soe.ucsc.edu - # yzz2 at psu.edu - # <a HREF="mailto:dunham@ebi.ac.uk"TARGET=_BLANK>Ian Dunham</a> - if "<" in s: - return s.split('"')[1] - if " at " in s: - return s.replace(" at ", "@") - if " or " in s: - return s.split(" or ")[1] - return s - -def downloadUrls(urls, emails): - " try to read all hub.txt in URLs, return list of failed URLs, and dict hub -> email" - didFail = list() - emails = dict() - - for url in urls: - logging.debug("Checking %s" % url) - - reqFailed = False - if url.startswith("http"): - try: - f = requests.get(url, verify=False) - except KeyboardInterrupt: # handle ctrl-c for debugging - sys.exit(1) - except: - reqFailed = True - text = f.text - else: - # FTP - try: - f = urllib.request.urlopen(url, timeout=10) - text = f.read().decode("utf8") - except: - reqFailed = True - - if reqFailed: - logging.debug("URL %s failed." % url) - didFail.append(url) - continue - - lines = text.splitlines() - for l in lines: - l = l.strip() - if l=="": - continue - keyVal = l.strip().split(None, maxsplit=1) - if len(keyVal)!=2: - # some hubs may have broken hub.txt files. Treat these as if they were broken. - didFail.append(url) - break - - key, val = keyVal - if not key=="email": - continue - emails[url] = parseEmail(val) - - return didFail, emails - -def sendEmail(dest, body): - " send email to dest " - msg = MIMEText(body) - msg["From"] = fromEmail - msg["To"] = dest+","+fromEmail - msg["Subject"] = "Your UCSC public hub is down" - p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE) - p.communicate(msg.as_bytes()) - # Both Python 2.X and 3.X - # p.communicate(msg.as_bytes() if sys.version_info >= (3,0) else msg.as_string()) - # Python 2.X - # p.communicate(msg.as_string()) - -def readStatus(fname): - " read tab sep file with columns hubUrl, email, failCount " - hubs = dict() - if not isfile(fname): - return hubs - - logging.debug("Reading %s" % fname) - for line in open(fname): - if line.startswith("#"): - continue - row = line.rstrip("\n").split("\t") - if len(row)!=3: - logging.error("Cannot parse line in status file: %s" % repr(line)) - assert(False) - hubUrl, email, failCount = row - hubs[hubUrl] = (email, int(failCount)) - return hubs - -def mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails): - """ given a list hubPublic URLs, a list of failed URLs and a dict with url-> email, - return a dict with URL -> (email, failedCount) """ - urlInfo = {} - for url in urls: - oldInfo = oldUrlInfo.get(url) - if oldInfo is None: - oldEmail = urlEmails.get(url) - if oldEmail is None: - print("URL %s is broken and there is no email in the status file. Skipping it." % url) - continue - oldInfo = [oldEmail, 0] - - email = urlEmails.get(url) # prefer most current email address - if email is None: - email = oldInfo[0] - - failCount = oldInfo[1] - - if url in failedUrls: - failCount += 1 - - urlInfo[url] = (email, failCount) - - return urlInfo - -def sendEmails(urlInfo): - " given dict url -> (email, failCount), send email if failCount > 24 and set failCount = -48 " - for url, (destEmail, failCount) in urlInfo.items(): - if failCount>24: - logging.info("HUB %s BROKEN - sending email to %s" % (url, destEmail)) - emailText = emailTemplate % url - sendEmail(destEmail, emailText) - urlInfo[url] = (destEmail, -48) - return urlInfo - -def writeStatus(urlInfo, statusFname): - " write new status file " - statusTmp = statusFname+".tmp" - logging.debug("Writing %s" % statusTmp) - with open(statusTmp, "wt") as ofh: - ofh.write("#url\temail\tfailCount\n") - for url, (email, failCount) in urlInfo.items(): - ofh.write("\t".join([url, email, str(failCount)])) - ofh.write("\n") - logging.debug("Renaming %s to %s" % (statusTmp, statusFname)) - os.rename(statusTmp, statusFname) - -def createLockFile(statusFname): - """ when downloading files, weird things can happen. even wget sometimes gets stuck. So make - sure that this program can't run multiple times """ - global lockFname - lockFname = statusFname+".lock" - - if isfile(lockFname): - logging.error("lockfile %s already exists. Check if this program is already running." % lockFname) - sys.exit(1) - - open(lockFname, "w") # create file - atexit.register(removeLock) - -def removeLock(): - os.remove(lockFname) - -def hubPublicMail(centralName, statusFname): - " send email if a hub fails more than 24 times " - createLockFile(statusFname) - - urls = getHubUrls(centralName) - - oldUrlInfo = readStatus(statusFname) - - failedUrls, urlEmails = downloadUrls(urls, oldUrlInfo) - - if len(failedUrls) > 10: - logging.error("More than 10 broken hubs? Something is weird. Please check the network setup.") - sys.exit(1) - - newUrlInfo = mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails) - newUrlInfo = sendEmails(newUrlInfo) - - writeStatus(newUrlInfo, statusFname) - -def main(): - args, options = parseArgs() - - centralName, statusFname = args - hubPublicMail(centralName, statusFname) - - -main()