3bfceef9b449d5a09134333d4762bbf146e6f805
max
  Fri Aug 27 02:49:30 2021 -0700
fixing hubPublicEmail script, refs #27779

diff --git src/utils/hubPublicMail src/utils/hubPublicMail
index 6d1c47c..23e3be0 100755
--- src/utils/hubPublicMail
+++ src/utils/hubPublicMail
@@ -24,34 +24,35 @@
 from our public hubs list at https://genome.ucsc.edu/cgi-bin/hgHubConnect
 
 Do not hesitate to let us know if we can help you resolve this situation, e.g. by updating the URL
 where the hub is hosted or possibly hosting the files on our servers.
 
 You can reach us at genome-www@soe.ucsc.edu.
 
 Your
 UCSC Genome Browser Group
 """
 # ==== functions =====
     
 def parseArgs():
     " setup logging, parse command line arguments and options. -h shows auto-generated help page "
     parser = optparse.OptionParser("""usage: %prog [options] hgcentralname statusFile - send email if public hub is down
-    Get the list of all public hubs
-    Try to get their URLs and write all to statusFile.
-    If a hub fails, increase count in statusFile.
-    If count is > 24, send an email to hub email and set the failCount to -48.
+    Goes through the following steps:
+    1) Get the list of all public hubs
+    2) Try to get their URLs and write all to statusFile.
+    3) If a hub fails, increase count in statusFile.
+    4) If count is > 24, send an email to hub email and set the failCount to -48.
 
     Example:
        hubPublicMail hgcentraltest /tmp/hubPublicStatus.tab
     """)
 
     parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
     #parser.add_option("-f", "--file", dest="file", action="store", help="run on file") 
     (options, args) = parser.parse_args()
 
     if args==[]:
         parser.print_help()
         exit(1)
 
     if options.debug:
         logging.basicConfig(level=logging.DEBUG)
@@ -90,34 +91,34 @@
     for url in urls:
         logging.debug("Checking %s" % url)
 
         reqFailed = False
         if url.startswith("http"):
             try:
                 f = requests.get(url, verify=False)
             except KeyboardInterrupt: # handle ctrl-c for debugging
                 sys.exit(1)
             except:
                 reqFailed = True
             text = f.text
         else:
             # FTP
             try:
-                f = urllib.request.urlopen(url)
+                f = urllib.request.urlopen(url, timeout=10)
+                text = f.read().decode("utf8")
             except:
                 reqFailed = True
-            text = f.read().decode("utf8")
 
         if reqFailed:
             logging.info("URL %s failed." % url)
             didFail.append(url)
             continue
 
         lines = text.splitlines()
         for l in lines:
             l = l.strip()
             if l=="":
                 continue
             keyVal = l.strip().split(None, maxsplit=1)
             if len(keyVal)!=2:
                 # some hubs may have broken hub.txt files. Treat these as if they were broken.
                 didFail.append(url)
@@ -142,30 +143,33 @@
     # p.communicate(msg.as_bytes() if sys.version_info >= (3,0) else msg.as_string())
     # Python 2.X
     # p.communicate(msg.as_string())
 
 def readStatus(fname):
     " read tab sep file with columns hubUrl, email, failCount "
     hubs = dict()
     if not isfile(fname):
         return hubs
 
     logging.debug("Reading %s" % fname)
     for line in open(fname):
         if line.startswith("#"):
             continue
         row = line.rstrip("\n").split("\t")
+        if len(row)!=3:
+            logging.error("Cannot parse line in status file: %s" % repr(line))
+            assert(False)
         hubUrl, email, failCount = row
         hubs[hubUrl] = (email, int(failCount))
     return hubs
 
 def mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails):
     """ given a list hubPublic URLs, a list of failed URLs and a dict with url-> email,
     return a dict with URL -> (email, failedCount) """
     urlInfo = {}
     for url in urls:
         oldInfo = oldUrlInfo.get(url)
         if oldInfo is None:
             oldEmail = urlEmails.get(url)
             if oldEmail is None:
                 print("URL %s is broken and there is no email in the status file. Skipping it." % url)
                 continue
@@ -184,36 +188,39 @@
 
     return urlInfo
 
 def sendEmails(urlInfo):
     " given dict url -> (email, failCount), send email if failCount > 24 and set failCount = -48 "
     for url, (destEmail, failCount) in urlInfo.items():
         if failCount>24:
             logging.info("HUB %s BROKEN - sending email to %s" % (url, destEmail))
             emailText = emailTemplate % url
             sendEmail(destEmail, emailText)
             urlInfo[url] = (destEmail, -48)
     return urlInfo
 
 def writeStatus(urlInfo, statusFname):
     " write new status file "
-    logging.debug("Writing %s" % statusFname)
-    with open(statusFname, "wt") as ofh:
+    statusTmp = statusFname+".tmp"
+    logging.debug("Writing %s" % statusTmp)
+    with open(statusTmp, "wt") as ofh:
         ofh.write("#url\temail\tfailCount\n")
         for url, (email, failCount) in urlInfo.items():
             ofh.write("\t".join([url, email, str(failCount)]))
             ofh.write("\n")
+    logging.debug("Renaming %s to %s" % (statusTmp, statusFname))
+    os.rename(statusTmp, statusFname)
 
 def createLockFile(statusFname):
     """ when downloading files, weird things can happen. even wget sometimes gets stuck. So make
     sure that this program can't run multiple times """
     global lockFname
     lockFname = statusFname+".lock"
 
     if isfile(lockFname):
         logging.error("lockfile %s already exists. Check if this program is already running." % lockFname)
         sys.exit(1)
 
     open(lockFname, "w") # create file
     atexit.register(removeLock)
 
 def removeLock():