ba218fbf9fae129b38710a813d9d748f89f445fa
lrnassar
  Wed May 29 17:21:43 2024 -0700
Fixing up the monthly usage stats cron. Major changes include converting to python from ipython, fixing issue where default tracks were not being filtered out, and now correctly identifies from which mirror all hubs come from, refs #26191

diff --git src/utils/qa/assemblyStatsCron.py src/utils/qa/assemblyStatsCron.py
index 69cbb3c..c754dac 100755
--- src/utils/qa/assemblyStatsCron.py
+++ src/utils/qa/assemblyStatsCron.py
@@ -1,260 +1,283 @@
 #07/20/19
-#This was rendered from a jupyter notebook, hence the ipython3 requirement for running
+#This was adapted from a jupyter notebook - hence lots of weird bash calls
 
 import datetime
 from collections import OrderedDict
 import getpass
+import subprocess
+
+def bash(cmd):
+    """Run the cmd in bash subprocess"""
+    try:
+        rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
+                                       stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
+        bashStdoutt = rawBashOutput.stdout
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
+    return(bashStdoutt)
+
+def bashNoErrorCatch(cmd):
+    """Run the cmd in bash subprocess, don't catch error since grep returns exit code 1 when no match is found"""
+    try:
+        rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
+                                       stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
+        bashStdoutt = rawBashOutput.stdout.rstrip().split("\n")
+    except:
+        bashStdoutt = []
+    return(bashStdoutt)
 
 user = getpass.getuser()
 
 # Get the year to query proper wwwstats directory
 today = datetime.datetime.today()
 year = str(today).split('-')[0]
 
 # Get the last 5 error logs from the RR
-latestLogs = get_ipython().getoutput(u'ls /hive/users/chmalee/logs/trimmedLogs/result/hgw1')
+latestLogs = bash('ls /hive/users/chmalee/logs/trimmedLogs/result/hgw1').rstrip().split("\n")
 ######################## FOR TESTING JUST ONE LOG
 # latestLogs = latestLogs[len(latestLogs)-1:]
 #########################
 latestLogs = latestLogs[len(latestLogs)-5:]
 
 nodes = ['RR', 'asiaNode', 'euroNode'] #Add nodes with error logs, nodes can be added or removed
 
 machines = ['hgw1','hgw2'] #Add hgw machines to check
 for node in nodes:
     if node == 'RR':
         for machine in machines:
             for log in latestLogs: #Copy the 5 latest error logs for each of the rr machines
-                get_ipython().system(u" cp /hive/users/chmalee/logs/trimmedLogs/result/'$machine'/'$log' /hive/users/'$user'/ErrorLogs/'$node''$machine''$log'")
+                bash("cp /hive/users/chmalee/logs/trimmedLogs/result/"+machine+'/'+log+' /hive/users/'+user+'/ErrorLogs/'+node+machine+log)
+
     else:
-        latestLogs = get_ipython().getoutput(u"ls /hive/users/chmalee/logs/trimmedLogs/result/'$node'")
+        latestLogs = bash("ls /hive/users/chmalee/logs/trimmedLogs/result/"+node).rstrip().split("\n")
         latestLogs = latestLogs[len(latestLogs)-5:]
         for log in latestLogs: #Copy the 5 latest error logs for each of the other nodes
-            get_ipython().system(u" cp /hive/users/chmalee/logs/trimmedLogs/result/'$node'/'$log' /hive/users/'$user'/ErrorLogs/'$node''$log'")
-#     elif node == 'asiaNode':
-#         latestLogs = ! ls /hive/users/chmalee/logs/trimmedLogs/result/asiaNode
-#         latestLogs = latestLogs[len(latestLogs)-5:]
-#         for log in latestLogs: #Copy the 5 latest error logs for each of the other nodes
-#             ! cp /hive/users/chmalee/logs/trimmedLogs/result/asiaNode/'$log' /hive/users/'$user'/ErrorLogs/'$node''$log'
+            bash('cp /hive/users/chmalee/logs/trimmedLogs/result/'+node+'/'+log+' /hive/users/'+user+'/ErrorLogs/'+node+log)
 
 # Run generateUsageStats.py with -d (directory), -t (default track stats), -o (output)
-get_ipython().system(u" /cluster/home/'$user'/kent/src/hg/logCrawl/dbTrackAndSearchUsage/generateUsageStats.py -d /hive/users/'$user'/ErrorLogs --allOutput -t -o /hive/users/'$user'/ErrorLogsOutput > /dev/null")
+bash("/cluster/home/"+user+'/kent/src/hg/logCrawl/dbTrackAndSearchUsage/generateUsageStats.py -d /hive/users/'+user+'/ErrorLogs --allOutput -t -o /hive/users/'+user+'/ErrorLogsOutput > /dev/null')
+
 #The following section pulls out a list of default track for the top X assemblies for filtering
 file = open("/hive/users/"+user+"/ErrorLogsOutput/defaults.txt", "a")
 
 #The head command can be expanded to be more inclusive if additional assembly defaults are finding their way onto the list
-get_ipython().system(u" sort /hive/users/'$user'/ErrorLogsOutput/dbCounts.tsv -rnk2 > /hive/users/'$user'/ErrorLogsOutput/dbCountsTopSorted.tsv")
-dbs = get_ipython().getoutput(u'head -n 4 /hive/users/\'$user\'/ErrorLogsOutput/dbCountsTopSorted.tsv | cut -f1 -d "\t"')
+bash("sort /hive/users/"+user+'/ErrorLogsOutput/dbCounts.tsv -rnk2 > /hive/users/'+user+'/ErrorLogsOutput/dbCountsTopSorted.tsv')
+dbs = bash('head -n 4 /hive/users/'+user+'/ErrorLogsOutput/dbCountsTopSorted.tsv | cut -f1 -d "\t"').rstrip().split("\n")
+
 
 for db in dbs[0:4]:
     #The following part queries hgTracks for each of the assemblies and extracts the list of defaults
-    get_ipython().system(u" echo '$db' > /hive/users/'$user'/ErrorLogsOutput/temp.txt")
-    defaults = get_ipython().getoutput(u'HGDB_CONF=$HOME/.hg.conf.beta /usr/local/apache/cgi-bin/hgTracks db=$(cat /hive/users/"$USER"/ErrorLogsOutput/temp.txt) > /dev/null')
+    bash('echo '+db+' > /hive/users/'+user+'/ErrorLogsOutput/temp.txt')
+    defaults = bash('HGDB_CONF=$HOME/.hg.conf.beta /usr/local/apache/cgi-bin/hgTracks db='+db+' hgt.trackImgOnly=1 > /dev/null').rstrip().split("\n")
+
     for trackLogs in range(len(defaults)):
         if trackLogs >= (len(defaults)-1):
             pass
         else:
             defaultsLine = defaults[trackLogs].split(" ")[4]
             defaultsLine = defaultsLine.split(',')
             for track in range(len(defaultsLine)):
                 defaultsLine[track] = defaultsLine[track][0:(len(defaultsLine[track])-2)]
                 file.write(db+"\t"+defaultsLine[track]+"\n")
     file.write(db+"\t"+"cytoBand"+"\n")
 file.close()
 
-get_ipython().system(u' echo This cronjob pulls out GB stats over the last month, across all RR machines and Asia/Euro mirrors using the generateUsageStats.py script. It only counts each hgsid occurrence once, filtering our any hgsid that only showed up one time. > /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo This cronjob pulls out GB stats over the last month, across all RR machines and Asia/Euro mirrors using the generateUsageStats.py script. It only counts each hgsid occurrence once, filtering our any hgsid that only showed up one time. > /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo List of db usage: >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'dbUse >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo List of db usage: >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'dbUse >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' sort /hive/users/"$USER"/ErrorLogsOutput/dbCounts.tsv -rnk2 > /hive/users/"$USER"/ErrorLogsOutput/dbCounts.tsv.sorted')
-get_ipython().system(u' head -n 15 /hive/users/"$USER"/ErrorLogsOutput/dbCounts.tsv.sorted | ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('sort /hive/users/'+user+'/ErrorLogsOutput/dbCounts.tsv -rnk2 > /hive/users/'+user+'/ErrorLogsOutput/dbCounts.tsv.sorted')
+bash('head -n 15 /hive/users/'+user+'/ErrorLogsOutput/dbCounts.tsv.sorted | ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo "List of default track usage for hg38, sorted by how many users are turning off the track:" >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'trackUse$'\\t'% using$'\\t'% turning off$'\\t'trackName >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo "List of default track usage for hg38, sorted by how many users are turning off the track:" >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'trackUse$'\\t'% using$'\\t'% turning off$'\\t'trackName >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' grep ^hg38 /hive/users/"$USER"/ErrorLogsOutput/defaultCounts.tsv | grep -v "MarkH3k27ac" | sort -nrk 5 | awk -v OFS="\\t" \'{ print $1,$3,$4,$5,$2 }\' | head -n 15 | ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('grep ^hg38 /hive/users/'+user+'/ErrorLogsOutput/defaultCounts.tsv | grep -v "MarkH3k27ac" | sort -nrk 5 | awk -v OFS="\\t" \'{ print $1,$3,$4,$5,$2 }\' | head -n 15 | ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo "List of default track usage for hg19, sorted by how many users are turning off the track:" >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'trackUse$'\\t'% using$'\\t'% turning off$'\\t'trackName >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo "List of default track usage for hg19, sorted by how many users are turning off the track:" >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'trackUse$'\\t'% using$'\\t'% turning off$'\\t'trackName >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' grep ^hg19 /hive/users/"$USER"/ErrorLogsOutput/defaultCounts.tsv | grep -v "MarkH3k27ac" | sort -nrk 5 | awk -v OFS="\\t" \'{ print $1,$3,$4,$5,$2 }\' | head -n 15| ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('grep ^hg19 /hive/users/'+user+'/ErrorLogsOutput/defaultCounts.tsv | grep -v "MarkH3k27ac" | sort -nrk 5 | awk -v OFS="\\t" \'{ print $1,$3,$4,$5,$2 }\' | head -n 15| ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo List of non-default track usage: >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'trackUse$'\\t'trackName >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo List of non-default track usage: >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'trackUse$'\\t'trackName >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' sort /hive/users/"$USER"/ErrorLogsOutput/trackCounts.tsv -rnk3 > /hive/users/"$USER"/ErrorLogsOutput/trackCounts.tsv.sorted')
-get_ipython().system(u' cat /hive/users/"$USER"/ErrorLogsOutput/trackCounts.tsv.sorted | grep -v -f /hive/users/"$USER"/ErrorLogsOutput/defaults.txt > /hive/users/"$USER"/ErrorLogsOutput/trackCounts.tsv.sorted.noDefaults')
-get_ipython().system(u' head -n 15 /hive/users/"$USER"/ErrorLogsOutput/trackCounts.tsv.sorted.noDefaults | awk -v OFS="\\t" \'{ print $1,$3,$2 }\' | ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('sort /hive/users/'+user+'/ErrorLogsOutput/trackCounts.tsv -rnk3 > /hive/users/'+user+'/ErrorLogsOutput/trackCounts.tsv.sorted')
+bash('cat /hive/users/'+user+'/ErrorLogsOutput/trackCounts.tsv.sorted | grep -v -f /hive/users/'+user+'/ErrorLogsOutput/defaults.txt > /hive/users/'+user+'/ErrorLogsOutput/trackCounts.tsv.sorted.noDefaults')
+bash('head -n 15 /hive/users/'+user+'/ErrorLogsOutput/trackCounts.tsv.sorted.noDefaults | awk -v OFS="\\t" \'{ print $1,$3,$2 }\' | ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo "List of public hub usage (only most used track represented):" >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'trackUse$'\\t'track$'\\t'pubHub >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo "List of public hub usage (only most used track represented):" >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'trackUse$'\\t'track$'\\t'pubHub >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
 file2 = open("/hive/users/"+user+"/ErrorLogsOutput/filteredHubs.txt", "a") #Using a file to be able to use the same ~markd/bin/tabFmt format
-get_ipython().system(u' sort /hive/users/"$USER"/ErrorLogsOutput/trackCountsHubs.tsv -rnk4 -t $\'\\t\' > /hive/users/"$USER"/ErrorLogsOutput/trackCountsHubs.tsv.sorted')
-topHubs = get_ipython().getoutput(u'head -n 15000 /hive/users/"$USER"/ErrorLogsOutput/trackCountsHubs.tsv.sorted ')
+bash("sort /hive/users/"+user+"/ErrorLogsOutput/trackCountsHubs.tsv -rnk4 -t $\'\\t\' > /hive/users/"+user+"/ErrorLogsOutput/trackCountsHubs.tsv.sorted")
+topHubs = bash('head -n 15000 /hive/users/'+user+'/ErrorLogsOutput/trackCountsHubs.tsv.sorted').rstrip().split("\n")
 #This section pulls out only a single occurence of each public hub, picking the first track (most uses) to represent it
 results = OrderedDict()
 for each in topHubs:
     if len(results.keys()) < 15:
         each = each.split('\t')
         if each[0] not in results.keys():
             results[each[0]] = each[0:]
     else:
         pass
 for key, value in results.items():
     file2.write(value[0]+"\t"+value[1]+"\t"+value[2]+"\t"+value[3]+"\n")
 file2.close()
 
-get_ipython().system(u' cat /hive/users/"$USER"/ErrorLogsOutput/filteredHubs.txt | awk -F "\\t" -v OFS="\\t" \'{ print $2,$4,$3,$1 }\' | ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('cat /hive/users/'+user+'/ErrorLogsOutput/filteredHubs.txt | awk -F "\\t" -v OFS="\\t" \'{ print $2,$4,$3,$1 }\' | ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo List of public hub track usage overall: >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'trackUse$'\\t'track$'\\t'pubHub >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo List of public hub track usage overall: >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'trackUse$'\\t'track$'\\t'pubHub >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' head -n 15 /hive/users/"$USER"/ErrorLogsOutput/trackCountsHubs.tsv.sorted | awk -F "\\t" -v OFS="\\t" \'{ print $2,$4,$3,$1 }\' | ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('head -n 15 /hive/users/'+user+'/ErrorLogsOutput/trackCountsHubs.tsv.sorted | awk -F "\\t" -v OFS="\\t" \'{ print $2,$4,$3,$1 }\' | ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
 #Query hubPublic and hubStats in order to filter out public hubs then sort out the IDs
-get_ipython().system(u' /cluster/bin/x86_64/hgsql -h genome-centdb -e "select hubUrl from hubPublic" hgcentral > /hive/users/"$USER"/ErrorLogsOutput/hubPublicHubUrl.txt')
-get_ipython().system(u' /cluster/bin/x86_64/hgsql -h genome-centdb -e "select hubUrl,id from hubStatus" hgcentral> /hive/users/"$USER"/ErrorLogsOutput/hubStatusHubUrl.txt')
-get_ipython().system(u' grep -f /hive/users/"$USER"/ErrorLogsOutput/hubPublicHubUrl.txt /hive/users/"$USER"/ErrorLogsOutput/hubStatusHubUrl.txt | cut -f2 > /hive/users/"$USER"/ErrorLogsOutput/publicIDs.txt')
+bash('/cluster/bin/x86_64/hgsql -h genome-centdb -e "select hubUrl from hubPublic" hgcentral > /hive/users/'+user+'/ErrorLogsOutput/hubPublicHubUrl.txt')
+bash('/cluster/bin/x86_64/hgsql -h genome-centdb -e "select hubUrl,id from hubStatus" hgcentral> /hive/users/'+user+'/ErrorLogsOutput/hubStatusHubUrl.txt')
+bash('grep -f /hive/users/'+user+'/ErrorLogsOutput/hubPublicHubUrl.txt /hive/users/'+user+'/ErrorLogsOutput/hubStatusHubUrl.txt | cut -f2 > /hive/users/'+user+'/ErrorLogsOutput/publicIDs.txt')
 
 #Add hub_ID format to match stats program output, then grep out the public hubs from the track list
-get_ipython().system(u' cat /hive/users/"$USER"/ErrorLogsOutput/publicIDs.txt | sed s/^/hub_/g > /hive/users/"$USER"/ErrorLogsOutput/hubPublicIDs.txt')
-get_ipython().system(u' grep "hub_" /hive/users/"$USER"/ErrorLogsOutput/trackCounts.tsv | sort -rnk3 > /hive/users/"$USER"/ErrorLogsOutput/allTracksOrderedUsage.txt')
+bash('cat /hive/users/'+user+'/ErrorLogsOutput/publicIDs.txt | sed s/^/hub_/g > /hive/users/'+user+'/ErrorLogsOutput/hubPublicIDs.txt')
+bash('grep "hub_" /hive/users/'+user+'/ErrorLogsOutput/trackCounts.tsv | sort -rnk3 > /hive/users/'+user+'/ErrorLogsOutput/allTracksOrderedUsage.txt')
 
 #Pull out whole fields from euro and RR hubStatus in order to collect the info for matching IDs
-get_ipython().system(u' ssh qateam@genome-euro "hgsql -e \'select id,hubUrl,shortLabel,lastOkTime from hubStatus\' hgcentral" > /hive/users/"$USER"/ErrorLogsOutput/genomeEuroHubStatus.txt')
-get_ipython().system(u' /cluster/bin/x86_64/hgsql -h genome-centdb -e "select id,hubUrl,shortLabel,lastOkTime from hubStatus where lastOkTime !=\'\'" hgcentral > /hive/users/"$USER"/ErrorLogsOutput/RRHubStatus.txt')
+bash('ssh qateam@genome-euro "hgsql -e \'select id,hubUrl,shortLabel,lastOkTime from hubStatus\' hgcentral" > /hive/users/'+user+'/ErrorLogsOutput/genomeEuroHubStatus.txt')
+bash('/cluster/bin/x86_64/hgsql -h genome-centdb -e "select id,hubUrl,shortLabel,lastOkTime from hubStatus where lastOkTime !=\'\'" hgcentral > /hive/users/'+user+'/ErrorLogsOutput/RRHubStatus.txt')
 #The genome-asia hubStatus is automatically generated via cron by qateam on asia and copied to dev. Use line below to create a new file from personal user
-#! ssh "$USER"@genome-asia "hgsql -e 'select id,hubUrl,shortLabel,lastOkTime from hubStatus' hgcentral" > /hive/users/"$USER"/ErrorLogsOutput/genomeAsiaHubStatus.txt
+if user != 'qateam':
+    bash("ssh "+user+"@genome-asia \"hgsql -e 'select id,hubUrl,shortLabel,lastOkTime from hubStatus' hgcentral\" > /hive/users/"+user+"/ErrorLogsOutput/genomeAsiaHubStatus.txt")
 
-hubs = get_ipython().getoutput(u'cat /hive/users/"$USER"/ErrorLogsOutput/allTracksOrderedUsage.txt')
-pubHubUrls = get_ipython().getoutput(u'cat /hive/users/"$USER"/ErrorLogsOutput/hubPublicHubUrl.txt')
+hubs = bash('cat /hive/users/'+user+'/ErrorLogsOutput/allTracksOrderedUsage.txt').rstrip().split("\n")
+pubHubUrls = bash('cat /hive/users/'+user+'/ErrorLogsOutput/hubPublicHubUrl.txt').rstrip().split("\n")
 lastMonth = today - datetime.timedelta(days=30)
 lastMonthFormat = lastMonth.strftime('%Y-%m')
 hubsDic = OrderedDict()
 
 for hub in hubs: #Start iterating through track lines, stop pulling items at 20
     if len(hubsDic.keys()) < 20:
         #Pull out the use number, associated db, and hubkey
         hub = hub.split("\t")
         if 'hub' in hub[1]:
             hubKey = hub[1].split("_")[1]
         elif 'hub' in hub[0]:
             hubKey = hub[0].split("_")[1]
         else:
             print(hub)
         
         hubCount = hub[2]
         hubDb = hub[0]
         if hubKey not in hubsDic.keys(): #Check that this key has not yet been counted
             entryMade = False #Reset entryMade variable, made to check RR and then Euro if RR has no match
             # Grep out the proper hubstatus line, returns an empty list if no matches
             #        ^1017$'\t'
-            rrHub = get_ipython().getoutput(u"grep ^'$hubKey'$'\\t' /hive/users/'$user'/ErrorLogsOutput/RRHubStatus.txt")
+            rrHub = bashNoErrorCatch("grep ^"+hubKey+"$'\\t' /hive/users/"+user+"/ErrorLogsOutput/RRHubStatus.txt")
 
             if rrHub != []:
                 #Pull out matching hubURL, ShortLabel, lastOKtime (used to see compare against)
                 #recurring hubIDs between RR and euro
                 rrHub = rrHub[0].split("\t")
+                if len(rrHub) > 2:
                     rrHubURL = rrHub[1]
                     rrHubShortLabel = rrHub[2]
                     rrHubLastOk = rrHub[3]
                     if rrHubLastOk != '':
                         rrHubLastOk = datetime.datetime.strptime(rrHubLastOk.split(" ")[0], '%Y-%m-%d')
                         if rrHubLastOk > lastMonth: #If  hub has been OK in last month, assume it's correct
                             if rrHubURL in pubHubUrls:
                                 entryMade = True
                             else: #Pull out all relevant info and save to dictionary
                                 hubsDic[hubKey] = {}
                                 hubsDic[hubKey]['shortLabel'] = rrHubShortLabel
                                 hubsDic[hubKey]['hubURL'] = rrHubURL
                                 hubsDic[hubKey]['hubCount'] = hubCount
                                 hubsDic[hubKey]['hubDb'] = hubDb
                                 hubsDic[hubKey]['machine'] = "RR"
                                 entryMade = True #Set true so that the hubID isn't searched for in Euro
                         
             if entryMade is False: #This assumes that the hubID was either not present in the RR hubStatus, or it pointed to a hub not used in last month
-                euroHub = get_ipython().getoutput(u"grep ^'$hubKey'$'\\t' /hive/users/'$user'/ErrorLogsOutput/genomeEuroHubStatus.txt")
+                euroHub = bashNoErrorCatch("grep ^"+hubKey+"$'\\t' /hive/users/"+user+"/ErrorLogsOutput/genomeEuroHubStatus.txt")
                 if euroHub != []:
                     euroHub = euroHub[0].split("\t")
+                    if len(euroHub) > 2:
                         euroHubURL = euroHub[1]
                         euroHubShortLabel = euroHub[2]
                         euroHubLastOk = euroHub[3]
                         if euroHubLastOk != '':
                             euroHubLastOk = datetime.datetime.strptime(euroHubLastOk.split(" ")[0], '%Y-%m-%d')
                             if euroHubLastOk > lastMonth:
                                 if euroHub[1] in pubHubUrls:
                                     entryMade = True
                                 else:
                                     hubsDic[hubKey] = {}
                                     hubsDic[hubKey]['shortLabel'] = euroHubShortLabel
                                     hubsDic[hubKey]['hubURL'] = euroHubURL
                                     hubsDic[hubKey]['hubCount'] = hubCount
                                     hubsDic[hubKey]['hubDb'] = hubDb
                                     hubsDic[hubKey]['machine'] = "Euro"
                                     entryMade = True
                             
             if entryMade is False: #This assumes that the hubID was either not present in the RR hubStatus, or it pointed to a hub not used in last month
-                asiaHub = get_ipython().getoutput(u"grep ^'$hubKey'$'\\t' /hive/users/qateam/ErrorLogsOutput/genomeAsiaHubStatus.txt")
+                asiaHub = bashNoErrorCatch("grep ^"+hubKey+"$'\\t' /hive/users/"+user+"/ErrorLogsOutput/genomeAsiaHubStatus.txt")
                 if asiaHub != []:
                     asiaHub = asiaHub[0].split("\t")
+                    if len(asiaHub) > 2:
                         asiaHubURL = asiaHub[1]
                         asiaHubShortLabel = asiaHub[2]
                         asiaHubLastOk = asiaHub[3]
                         if asiaHubLastOk != '':
                             asiaHubLastOk = datetime.datetime.strptime(asiaHubLastOk.split(" ")[0], '%Y-%m-%d')
                             if asiaHubLastOk > lastMonth:
                                 if asiaHub[1] in pubHubUrls:
                                     pass
                                 else:
                                     hubsDic[hubKey] = {}
                                     hubsDic[hubKey]['shortLabel'] = asiaHubShortLabel
                                     hubsDic[hubKey]['hubURL'] = asiaHubURL
                                     hubsDic[hubKey]['hubCount'] = hubCount
                                     hubsDic[hubKey]['hubDb'] = hubDb
                                     hubsDic[hubKey]['machine'] = "Asia"
                                     entryMade = True
                     else:
-                        get_ipython().system(u" echo The following hub: '$hub' was not found in the RR/euro/asia hubStatus with a lastOkTime within the last month. This likely means an error has occurred. >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-
+                        bash("echo The following hub: "+hub+" was not found in the RR/euro/asia hubStatus with a lastOkTime within the last month. This likely means an error has occurred. >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
 
 #Create new file to write out the top 20 most used hubs
 hubsNotPublic = open("/hive/users/"+user+"/ErrorLogsOutput/hubsNotPublic.txt", "a")
 for key in hubsDic.keys():
     hubsNotPublic.write(hubsDic[key]['hubDb']+"\t"+hubsDic[key]['machine']+"\t"+str(hubsDic[key]['hubCount'])+"\t"+hubsDic[key]['shortLabel']+"\t"+hubsDic[key]['hubURL']+"\n")
 hubsNotPublic.close()
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo "List of hub uses that are not public hubs. Some public hubs sneak in due to alternative hubUrl:" >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u" echo db$'\\t'machine$'\\t'trackUse$'\\t'shortLabel$'\\t'hubUrl >> /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u' echo -------------------------------------------------------------------------------------- >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo "List of hub uses that are not public hubs. Some public hubs sneak in due to alternative hubUrl. This includes curated hubs:" >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash("echo db$'\\t'machine$'\\t'trackUse$'\\t'shortLabel$'\\t'hubUrl >> /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash('echo -------------------------------------------------------------------------------------- >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' cat /hive/users/"$USER"/ErrorLogsOutput/hubsNotPublic.txt | ~markd/bin/tabFmt >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('cat /hive/users/'+user+'/ErrorLogsOutput/hubsNotPublic.txt | ~markd/bin/tabFmt >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo Previous outputs of this cron can be found here: https://genecats.gi.ucsc.edu/qa/test-results/usageStats/ >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
-get_ipython().system(u' echo Archive of monthly raw data can be found here: /hive/users/qateam/assemblyStatsCronArchive/ >> /hive/users/"$USER"/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo Previous outputs of this cron can be found here: https://genecats.gi.ucsc.edu/qa/test-results/usageStats/ >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
+bash('echo Archive of monthly raw data can be found here: /hive/users/qateam/assemblyStatsCronArchive/ >> /hive/users/'+user+'/ErrorLogsOutput/results.txt')
 
-get_ipython().system(u" mkdir /hive/users/'$user'/assemblyStatsCronArchive/'$lastMonthFormat'")
-get_ipython().system(u" cp /hive/users/'$user'/ErrorLogsOutput/* /hive/users/'$user'/assemblyStatsCronArchive/'$lastMonthFormat'")
+bash("mkdir -p /hive/users/"+user+"/assemblyStatsCronArchive/"+lastMonthFormat)
+bash("cp /hive/users/"+user+"/ErrorLogsOutput/* /hive/users/"+user+"/assemblyStatsCronArchive/"+lastMonthFormat)
 
 if user == 'qateam':
-    get_ipython().system(u" cat /hive/users/'$user'/ErrorLogsOutput/results.txt > /usr/local/apache/htdocs-genecats/qa/test-results/usageStats/'$lastMonthFormat'")
+    bash("cat /hive/users/"+user+"/ErrorLogsOutput/results.txt > /usr/local/apache/htdocs-genecats/qa/test-results/usageStats/"+lastMonthFormat)
 
-get_ipython().system(u" cat /hive/users/'$user'/ErrorLogsOutput/results.txt")
-get_ipython().system(u" rm /hive/users/'$user'/ErrorLogs/*")
-get_ipython().system(u" rm /hive/users/'$user'/ErrorLogsOutput/*")
+bash("cat /hive/users/"+user+"/ErrorLogsOutput/results.txt")
+bash("rm /hive/users/"+user+"/ErrorLogs/*")
+bash("rm /hive/users/"+user+"/ErrorLogsOutput/*")