src/utils/qa/errorLogKeywordSearch.py 00290c3ab85e01370f80f4e23caf6309072ad908

00290c3ab85e01370f80f4e23caf6309072ad908
lrnassar
  Wed Feb 19 15:33:53 2025 -0800
Tweaking a few things in the keyword search, notably making the search term a raw string so it doesn't give a warning and fixing the log file fetch to try and match euro + asia, also I needed to update the year which was stuck in 2024. No RM.

diff --git src/utils/qa/errorLogKeywordSearch.py src/utils/qa/errorLogKeywordSearch.py
index 1cb9abf2956..0fdf9fdd4a1 100755
--- src/utils/qa/errorLogKeywordSearch.py
+++ src/utils/qa/errorLogKeywordSearch.py
@@ -1,155 +1,163 @@
 #Looks through the error logs and graphs out the occurence of various keywords
 
 import matplotlib
 #Don't try to display the plot
 matplotlib.use('Agg')
 
 import datetime
 from collections import OrderedDict
 import getpass
 import subprocess
 import matplotlib.pyplot as plt
 import matplotlib.ticker as mticker
 import matplotlib.dates as mdates
 
 def bash(cmd):
     """Run the cmd in bash subprocess"""
     try:
         rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
                                        stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
         bashStdoutt = rawBashOutput.stdout
     except subprocess.CalledProcessError as e:
         raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
     return(bashStdoutt)
 
 def bashNoErrorCatch(cmd):
     """Run the cmd in bash subprocess, don't catch error since grep returns exit code 1 when no match is found"""
     try:
         rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
                                        stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
         bashStdoutt = rawBashOutput.stdout.rstrip().split("\n")
     except:
         bashStdoutt = []
     return(bashStdoutt)
 
 def copyLatestLogs():
     user = getpass.getuser()
 
     # Get the year to query proper wwwstats directory
     today = datetime.datetime.today()
     year = str(today).split('-')[0]
 
     # Get latest error logs from the RR
-    latestLogs = bash('ls /hive/data/inside/wwwstats/RR/2024/hgw1').rstrip().split("\n")
-    latestLogs = latestLogs[len(latestLogs)-11:len(latestLogs)-1]
-
     nodes = ['RR', 'asiaNode', 'euroNode'] #Add nodes with error logs, nodes can be added or removed
     machines = ['hgw1','hgw2'] #Add hgw machines to check
 
+    latestLogs = bash('ls /hive/data/inside/wwwstats/RR/'+year+'/hgw1/ | grep error').rstrip().split("\n")
+    latestLogs = latestLogs[max(0, len(latestLogs) - 13) : len(latestLogs) - 1]
+    
     for node in nodes:
         if node == 'RR':
             for machine in machines:
                 for log in latestLogs:
-                    bash("ln -sf /hive/data/inside/wwwstats/RR/"+year+"/"+machine+"/"+log+' /hive/users/'+user+'/ErrorLogs/'+node+machine+log)
+                    if machine == "hgw1":
+                        bash("cp /hive/data/inside/wwwstats/RR/"+year+"/"+machine+"/"+log+' /hive/users/'+user+'/ErrorLogs/'+log)
+                    else:
+                        bash("cat /hive/data/inside/wwwstats/RR/"+year+"/"+machine+"/"+log+' >> /hive/users/'+user+'/ErrorLogs/'+log)
         else:
             for log in latestLogs:
-                bash("ln -sf /hive/data/inside/wwwstats/"+node+"/"+year+"/"+log+' /hive/users/'+user+'/ErrorLogs/'+node+log)
+                try:
+                    bash("cat /hive/data/inside/wwwstats/"+node+"/"+year+"/"+log+' >> /hive/users/'+user+'/ErrorLogs/'+log)
+                except:
+                    continue
+
     return(user,latestLogs)
 
+
 def createDicOfSearchTerms():
     totalLinesInLog = dict(label='Total lines in logs', description='Total number of lines seen in the logs', value=[], searchKeyWord="wc -l")
-    totalUniqueIPs = dict(label='Total unique IPs', description='Total number of unique IPs without port number, e.g. N.N.N and not N.N.N:NNN', value=[], searchKeyWord='grep "\[client" | cut -f4 -d "]" | cut -f3 -d " " | cut -f1 -d ":" | sort | uniq | wc -l')
-    totalUniqueIPsSubnets = dict(label='Total unique IP subnets', description='Total number of unique IPs with only partial subnet, e.g. NNN.NNN and not NNN.NNN.N.NN', value=[], searchKeyWord='grep "\[client" | cut -f4 -d "]" | cut -f3 -d " " | cut -f1 -d ":" | cut -f1-2 -d "." | sort | uniq | wc -l')
+    totalUniqueIPs = dict(label='Total unique IPs', description='Total number of unique IPs without port number, e.g. N.N.N and not N.N.N:NNN', value=[], searchKeyWord=r'grep "\[client" | cut -f4 -d "]" | cut -f3 -d " " | cut -f1 -d ":" | sort | uniq | wc -l')
+    totalUniqueIPsSubnets = dict(label='Total unique IP subnets', description='Total number of unique IPs with only partial subnet, e.g. NNN.NNN and not NNN.NNN.N.NN', value=[], searchKeyWord=r'grep "\[client" | cut -f4 -d "]" | cut -f3 -d " " | cut -f1 -d ":" | cut -f1-2 -d "." | sort | uniq | wc -l')
     totalUniqueHgsids = dict(label='Total unique hgsIDs', description='Total number of unique hgsIDs', value=[], searchKeyWord=r"grep 'hgsid' | sed -n 's/.*[?&]hgsid=\([0-9A-Za-z_]*\).*/\1/p' | sort | uniq | wc -l")
     totalLoadedSessions = dict(label='Total loaded sessions', description='Total number of loaded sessions', value=[], searchKeyWord=' grep "CGI_TIME: hgTracks" | grep "/cgi-bin/hgSession?" | wc -l')
     totalSavedCTs = dict(label='Total saved CTs', description='Total number of saved custom tracks', value=[], searchKeyWord='grep "customTrack: saved" | wc -l')
     totalCTerrors = dict(label='Total CT errors', description='Total number of custom track load errors', value=[], searchKeyWord='grep "hgCustom load error" | wc -l')
     totalStackDumps = dict(label='Total stack dumps', description='Total number of stack dumps', value=[], searchKeyWord='grep "Stack dump" | wc -l')
     totalTryingToAllocate = dict(label='Total 500Mb allocate memory', description="Happens if code tries to allocate a chunk bigger than hard-wired limit of 500m. Could indicate naughty CGI", value=[], searchKeyWord='grep "needMem: trying to allocate" | wc -l')
     totalOutOfMemory = dict(label='Total out of memory', description='Happens if malloc() fails because the OS native limits (or hg.conf maxMem limits)', value=[], searchKeyWord='grep "needMem: Out of memory" | wc -l')
     totalHogExits = dict(label='hogExit', description='hogExit: Total number of people that hit the bottleneck', value=[], searchKeyWord='grep "hogExit" | wc -l')
     totalHgCollectionsExpire = dict(label='hgCollections', description='Total number of expired hgCollections', value=[], searchKeyWord='grep "Track Collections expire 48" | wc -l')
     totalWarnTimings = dict(label='warnTiming', description='warnTiming: Number of people that hit the warnSeconds hg.conf var. Warns them about image taking too long to load', value=[], searchKeyWord='grep "warnTiming" | wc -l')
 
     itemsToFind = [totalLinesInLog,totalUniqueIPs,totalUniqueIPsSubnets,totalUniqueHgsids,totalLoadedSessions,totalSavedCTs,totalCTerrors,totalHgCollectionsExpire,totalHogExits,totalStackDumps,totalTryingToAllocate,totalOutOfMemory,totalWarnTimings]
     return(itemsToFind)
 
 def searchForTermsInLogs():
     user,latestLogs = copyLatestLogs()
     itemsToFind = createDicOfSearchTerms()
 
-    # n=0 Uncomment these lines to see progress
+    #n=0 ##### Uncomment these lines to see progress
     for log in latestLogs:
-#         n+=1
+        #n+=1 ### Progress
         logPath = "zcat /hive/users/"+user+"/ErrorLogs/*"+log+" | "
         for searchTerm in itemsToFind:
             searchTerm['value'].append(int(bash(logPath+searchTerm['searchKeyWord'])))
-#         print("Current progress:", n/len(latestLogs))
+        #print("Current progress:", n/len(latestLogs)) ### Progress
 
     bash("rm /hive/users/"+user+"/ErrorLogs/*")
     return(user,latestLogs,itemsToFind)
 
 def generateGraphs(user,latestLogs,itemsToFind):
     logDates = [log.split(".")[1] for log in latestLogs]
     dateRange = str(logDates[0])+"-"+str(logDates[len(logDates)-1])
     saveDir = "/hive/users/"+user+"/errorLogSearchCronResults/"+dateRange
     bash("mkdir -p "+saveDir)
     htmlPageOutput = open(saveDir+"/index.html",'w')
 
     n=0
     for report in itemsToFind:
         n+=1
         # x axis values
         x_dates = [datetime.datetime.strptime(date, "%Y%m%d") for date in logDates]
         # corresponding y axis values
         y = report['value']
 
         # plotting the points 
         plt.plot(x_dates, y, marker='o')
 
         # Format the x-axis to show dates, with one point per week
         plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator())  # Major ticks: weekly
         plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y%m%d'))  # Format as "YYYYMMDD"
 
         # Rotate date labels for better readability
         plt.gcf().autofmt_xdate()
 
         # naming the x axis
         plt.xlabel('Error log week yearMonthDay')
         # naming the y axis
         plt.ylabel(report['label'])
         plt.xticks(x_dates)
         # giving a title to my graph
         plt.title(report['label'])
 
         # Add a caption
         plt.text(0.5, -0.35, report['description']+" \nsearch term: "+report['searchKeyWord'], ha='center', va='center', fontsize=10, transform=plt.gca().transAxes)
 
         # Ensure the figure is fully rendered before saving
         plt.gcf().canvas.draw()  # Force rendering of the canvas
 
         # Save the plot to a file
         plt.savefig(saveDir + "/" + str(n) + ".png", bbox_inches='tight')
         htmlPageOutput.write('<img src="'+str(n) + '.png">')
 
         # Clear the current plot to avoid overlaps with the next plot
         plt.clf()
 
     htmlPageOutput.close()
 
     if user == 'qateam':
         bash("mkdir -p /usr/local/apache/htdocs-genecats/qa/test-results/errorLogSearchResults/"+dateRange)
         bash("ln -sf "+saveDir+"/* /usr/local/apache/htdocs-genecats/qa/test-results/errorLogSearchResults/"+dateRange+"/")
-        print("See the latest error log search results over the last 10 weeks:\n")
+        print("See the latest error log search results over the last 12 weeks:\n")
         print("https://genecats.gi.ucsc.edu/qa/test-results/errorLogSearchResults/")
     else:
         bash("mkdir -p /cluster/home/"+user+"/public_html/cronResults/errorLogSearchResults/"+dateRange)
         bash("ln -sf "+saveDir+"/* /cluster/home/"+user+"/public_html/cronResults/errorLogSearchResults/"+dateRange+"/")
-        print("See the latest error log search results over the last 10 weeks:\n")
+        print("See the latest error log search results over the last 12 weeks:\n")
         print("https://hgwdev.gi.ucsc.edu/~"+user+"/cronResults/errorLogSearchResults/"+dateRange+"/")    
     
 def main():
     user,latestLogs,itemsToFind = searchForTermsInLogs()
     generateGraphs(user,latestLogs,itemsToFind)
 
 main()