d27aaec7fbbb9538cc65fdee35f7647d49246b50
max
  Tue Jul 6 12:51:33 2021 +0200
Wrote hubPublicMail monitor as requested by QA, refs #27779

diff --git src/utils/hubPublicMail src/utils/hubPublicMail
new file mode 100755
index 0000000..6d1c47c
--- /dev/null
+++ src/utils/hubPublicMail
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+
+import logging, sys, optparse, os, requests, sys, urllib3, atexit, urllib
+from os.path import isfile
+from email.mime.text import MIMEText
+from subprocess import Popen, PIPE
+
+# makes sure that only one instance of the program runs at the same time
+lockFname = None
+
+# switch off insecure SSL warnings
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+fromEmail="genome-www@soe.ucsc.edu"
+
+emailTemplate = """Dear UCSC Public Hub author,
+
+this is an automated email sent by the UCSC Genome Browser Group's hubPublicMail system to alert you
+that your public track hub at the address
+
+%s
+
+has been inaccessible for 24 hours. If it continues to be offline, we may need to remove it
+from our public hubs list at https://genome.ucsc.edu/cgi-bin/hgHubConnect
+
+Do not hesitate to let us know if we can help you resolve this situation, e.g. by updating the URL
+where the hub is hosted or possibly hosting the files on our servers.
+
+You can reach us at genome-www@soe.ucsc.edu.
+
+Your
+UCSC Genome Browser Group
+"""
+# ==== functions =====
+    
+def parseArgs():
+    " setup logging, parse command line arguments and options. -h shows auto-generated help page "
+    parser = optparse.OptionParser("""usage: %prog [options] hgcentralname statusFile - send email if public hub is down
+    Get the list of all public hubs
+    Try to get their URLs and write all to statusFile.
+    If a hub fails, increase count in statusFile.
+    If count is > 24, send an email to hub email and set the failCount to -48.
+
+    Example:
+       hubPublicMail hgcentraltest /tmp/hubPublicStatus.tab
+    """)
+
+    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
+    #parser.add_option("-f", "--file", dest="file", action="store", help="run on file") 
+    (options, args) = parser.parse_args()
+
+    if args==[]:
+        parser.print_help()
+        exit(1)
+
+    if options.debug:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    return args, options
+# ----------- main --------------
+def getHubUrls(centralName):
+    " return list of huburls "
+    logging.info("Getting hubUrls from db %s" % centralName)
+    cmd = "hgsql %s -se 'select hubUrl from hubPublic'" % centralName
+    lines = os.popen(cmd).read().splitlines() # please do not suggest using subprocess in code review. Thx.
+    return lines
+
+def parseEmail(s):
+    " return only email address given email value of hub.txt. handles these weird cases "
+    # jferna10@ucsc.edu or max@soe.ucsc.edu
+    # yzz2 at psu.edu
+    # <a HREF="mailto:dunham@ebi.ac.uk"TARGET=_BLANK>Ian Dunham</a>
+    if "<" in s:
+        return s.split('"')[1]
+    if " at " in s:
+        return s.replace(" at ", "@")
+    if " or " in s:
+        return s.split(" or ")[1]
+    return s
+
+def downloadUrls(urls, emails):
+    " try to read all hub.txt in URLs, return list of failed URLs, and dict hub -> email"
+    didFail = list()
+    emails = dict()
+
+    for url in urls:
+        logging.debug("Checking %s" % url)
+
+        reqFailed = False
+        if url.startswith("http"):
+            try:
+                f = requests.get(url, verify=False)
+            except KeyboardInterrupt: # handle ctrl-c for debugging
+                sys.exit(1)
+            except:
+                reqFailed = True
+            text = f.text
+        else:
+            # FTP
+            try:
+                f = urllib.request.urlopen(url)
+            except:
+                reqFailed = True
+            text = f.read().decode("utf8")
+
+        if reqFailed:
+            logging.info("URL %s failed." % url)
+            didFail.append(url)
+            continue
+
+        lines = text.splitlines()
+        for l in lines:
+            l = l.strip()
+            if l=="":
+                continue
+            keyVal = l.strip().split(None, maxsplit=1)
+            if len(keyVal)!=2:
+                # some hubs may have broken hub.txt files. Treat these as if they were broken.
+                didFail.append(url)
+                break
+
+            key, val = keyVal
+            if not key=="email":
+                continue
+            emails[url] = parseEmail(val)
+
+    return didFail, emails
+
+def sendEmail(dest, body):
+    " send email to dest "
+    msg = MIMEText(body)
+    msg["From"] = fromEmail
+    msg["To"] = dest+","+fromEmail
+    msg["Subject"] = "Your UCSC public hub is down"
+    p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE)
+    p.communicate(msg.as_bytes())
+    # Both Python 2.X and 3.X
+    # p.communicate(msg.as_bytes() if sys.version_info >= (3,0) else msg.as_string())
+    # Python 2.X
+    # p.communicate(msg.as_string())
+
+def readStatus(fname):
+    " read tab sep file with columns hubUrl, email, failCount "
+    hubs = dict()
+    if not isfile(fname):
+        return hubs
+
+    logging.debug("Reading %s" % fname)
+    for line in open(fname):
+        if line.startswith("#"):
+            continue
+        row = line.rstrip("\n").split("\t")
+        hubUrl, email, failCount = row
+        hubs[hubUrl] = (email, int(failCount))
+    return hubs
+
+def mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails):
+    """ given a list hubPublic URLs, a list of failed URLs and a dict with url-> email,
+    return a dict with URL -> (email, failedCount) """
+    urlInfo = {}
+    for url in urls:
+        oldInfo = oldUrlInfo.get(url)
+        if oldInfo is None:
+            oldEmail = urlEmails.get(url)
+            if oldEmail is None:
+                print("URL %s is broken and there is no email in the status file. Skipping it." % url)
+                continue
+            oldInfo = [oldEmail, 0]
+
+        email = urlEmails.get(url) # prefer most current email address
+        if email is None:
+            email = oldInfo[0]
+
+        failCount = oldInfo[1]
+
+        if url in failedUrls:
+            failCount += 1
+
+        urlInfo[url] = (email, failCount)
+
+    return urlInfo
+
+def sendEmails(urlInfo):
+    " given dict url -> (email, failCount), send email if failCount > 24 and set failCount = -48 "
+    for url, (destEmail, failCount) in urlInfo.items():
+        if failCount>24:
+            logging.info("HUB %s BROKEN - sending email to %s" % (url, destEmail))
+            emailText = emailTemplate % url
+            sendEmail(destEmail, emailText)
+            urlInfo[url] = (destEmail, -48)
+    return urlInfo
+
+def writeStatus(urlInfo, statusFname):
+    " write new status file "
+    logging.debug("Writing %s" % statusFname)
+    with open(statusFname, "wt") as ofh:
+        ofh.write("#url\temail\tfailCount\n")
+        for url, (email, failCount) in urlInfo.items():
+            ofh.write("\t".join([url, email, str(failCount)]))
+            ofh.write("\n")
+
+def createLockFile(statusFname):
+    """ when downloading files, weird things can happen. even wget sometimes gets stuck. So make
+    sure that this program can't run multiple times """
+    global lockFname
+    lockFname = statusFname+".lock"
+
+    if isfile(lockFname):
+        logging.error("lockfile %s already exists. Check if this program is already running." % lockFname)
+        sys.exit(1)
+
+    open(lockFname, "w") # create file
+    atexit.register(removeLock)
+
+def removeLock():
+    os.remove(lockFname)
+
+def hubPublicMail(centralName, statusFname):
+    " send email if a hub fails more than 24 times "
+    createLockFile(statusFname)
+
+    urls = getHubUrls(centralName)
+
+    oldUrlInfo = readStatus(statusFname)
+
+    failedUrls, urlEmails = downloadUrls(urls, oldUrlInfo)
+
+    if len(failedUrls) > 10:
+        logging.error("More than 10 broken hubs? Something is weird. Please check the network setup.")
+        sys.exit(1)
+
+    newUrlInfo = mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails)
+    newUrlInfo = sendEmails(newUrlInfo)
+
+    writeStatus(newUrlInfo, statusFname)
+
+def main():
+    args, options = parseArgs()
+
+    centralName, statusFname = args
+    hubPublicMail(centralName, statusFname)
+
+
+main()