9efd3ec91a0196806fd934222628b48ad501d937 lrnassar Wed Mar 19 16:39:42 2025 -0700 Updating hgTracksRandom with a QA script that logs server response times, and tracks performance. Refs #32284 diff --git src/utils/qa/hgTracksTiming.py src/utils/qa/hgTracksTiming.py new file mode 100644 index 00000000000..d575d3e68ce --- /dev/null +++ src/utils/qa/hgTracksTiming.py @@ -0,0 +1,273 @@ +# Meant as a substitute for hgTracksRandom +# Queries a list of GB servers (crontab set to every 15m) and documents their load time +# as well as their status code, if they took too long to load, or if hgTracks display did not +# fully load. Alerts with a printed error when a negative condition is encountered. +# Each run it reads the list of all observed times and regenerates a table and graphs displaying +# the change of server load times over time. Once per month is reports as a reminder to check for abnormalities +# If running on a new user, you will need to copy the index.html page from qateam and run the function here once: makeSymLinks(user,save_dir) + +import requests, subprocess, time, datetime, getpass, os, urllib3, matplotlib +import matplotlib.pyplot as plt +import matplotlib.ticker as mticker +import matplotlib.dates as mdates +import matplotlib +from collections import defaultdict +from collections import deque + +def bash(cmd): + """Run the cmd in bash subprocess""" + try: + rawBashOutput = subprocess.run(cmd, check=True, shell=True,\ + stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT) + bashStdoutt = rawBashOutput.stdout + except subprocess.CalledProcessError as e: + raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output)) + return(bashStdoutt) + +def makeSymLinks(user,save_dir): + if user == 'qateam': + bash("ln -sf "+save_dir+"*.html /usr/local/apache/htdocs-genecats/qa/test-results/hgTracksTiming/") + bash("ln -sf "+save_dir+"*.png /usr/local/apache/htdocs-genecats/qa/test-results/hgTracksTiming/") + else: + bash("ln -sf "+save_dir+"*.html /cluster/home/"+user+"/public_html/cronResults/hgTracksTiming/") + bash("ln -sf "+save_dir+"*.png /cluster/home/"+user+"/public_html/cronResults/hgTracksTiming/") + +def getLastLinesAndMakeList(file_path, num_lines=20): + with open(file_path, "r") as file: + if num_lines == 'all': + all_lines = file.readlines() # Read all lines in the file + else: + last_lines = deque(file, maxlen=num_lines) # Read only the last 'num_lines' lines + + dates = [] + times = [] + + if num_lines == 20: + for line in last_lines: + # Split the line and extract the date and time + parts = line.rstrip().split("\t") + dates.append(parts[0]) # First part is the date + times.append(float(parts[1].split("s")[0])) # Second part is the time, removing the 's' + + elif num_lines == 80: + for i, line in enumerate(last_lines): + # Apply logic to every 4th line (0, 4, 8, ...) + if i % 4 == 0: + + parts = line.rstrip().split("\t") + dates.append(parts[0]) # First part is the date + times.append(float(parts[1].split("s")[0])) # Second part is the time, removing the 's' + + elif num_lines == 'AllTime/20': + total_lines = len(all_lines) + + # Determine 20 evenly spaced line indices + indices = [int(i * total_lines / 20) for i in range(20)] + + for i in indices: + line = all_lines[i] + parts = line.rstrip().split("\t") + dates.append(parts[0]) # First part is the date + times.append(float(parts[1].split("s")[0])) # Second part is the time, removing the 's' + + return(dates,times) + +def generateGraphs(user,save_dir,filePath,server): + #Create the 3 time scale graphs for each server + + reportsToGenerate = ['Last 5h','Last 20h','AllTime/20'] + n=0 + + for report in reportsToGenerate: + n+=1 + # x axis values + if report == "Last 5h": + dates,times = getLastLinesAndMakeList(filePath, num_lines=20) + elif report == "Last 20h": + dates,times = getLastLinesAndMakeList(filePath, num_lines=80) + elif report == "All": + dates,times = getLastLinesAndMakeList(filePath, num_lines='all') + + x_dates = dates + y = times + + # plotting the points + plt.plot(x_dates, y, marker='o') + + # Rotate date labels for better readability + plt.gcf().autofmt_xdate() + + # naming the x axis + plt.xlabel('Date/time') + # naming the y axis + plt.ylabel("Load time in s") + plt.xticks(x_dates) + plt.title(report + " - " + server) # giving a title to my graph + + # Ensure the figure is fully rendered before saving + plt.gcf().canvas.draw() # Force rendering of the canvas + + # Save the plot to a file + plt.savefig(save_dir + "/" + server + "." + str(n) + ".png", bbox_inches='tight') + + # Clear the current plot to avoid overlaps with the next plot + plt.clf() + +def create_save_dir(user): + save_dir = f"/hive/users/{user}/hgTracksTiming/" + os.makedirs(save_dir, exist_ok=True) # Creates the directory if it doesn't exist + return save_dir + +def createTableOfTimeChanges(filePath,save_dir,server,n,totalN): + tableFilePath = save_dir + "timeChangesTable.html" + monthly_data = defaultdict(list) + + # Parse the input file + with open(filePath, "r") as file: + for line in file: + parts = line.rstrip().split("\t") + date_str = parts[0] # First part is the date + time = float(parts[1].split("s")[0]) # Second part is the time, removing the 's' + + # Extract year and month + date_obj = datetime.datetime.strptime(date_str, "%Y-%m-%d-%H:%M") + year_month = (date_obj.year, date_obj.month) + + # Store time value for the year-month combination + monthly_data[year_month].append(time) + + # Calculate averages + averages = {ym: sum(times) / len(times) for ym, times in monthly_data.items()} + + # Generate HTML + if server =="hgwdev": + writeMode = "w" + else: + writeMode = "a" + with open(tableFilePath, writeMode) as output_file: + if server =="hgwdev": + output_file.write("<div>\n<table>\n<tr>\n<td valign='top'>\n") + + elif n%2!=0: + output_file.write("</td>\n</tr>\n</table>\n<td valign='top'>\n") + else: + output_file.write("<table>\n<tr>\n<td valign='top'>\n") + + output_file.write("<h3>"+server+"</h3>\n<table border=\"1\">\n") + + # Create table header (Months) + months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + output_file.write("<tr><th>Year</th>" + "".join(f"<th>{month}</th>" for month in months) + "</tr>\n") + + # Populate table rows with only available months + years = sorted(set(year for year, _ in averages.keys())) + for year in years: + output_file.write(f"<tr><td>{year}</td>") + prev_value = None + for month in range(1, 13): + value = averages.get((year, month), "-") + if isinstance(value, float): + value = f"{value:.3f}" # Format to 3 decimal places + output_file.write(f"<td>{value}</td>") + output_file.write("</tr>\n") + + # Calculate and display % change row + output_file.write(f"<tr><td>Change</td>") + for month in range(1, 13): + current_value = averages.get((year, month)) + if prev_value is not None and current_value is not None: + percent_change = ((current_value - prev_value) / prev_value) * 100 + color = "red" if percent_change > 0 else "green" + output_file.write(f"<td style='color:{color}'> {percent_change:.2f}% </td>") + else: + output_file.write("<td>-</td>") + prev_value = current_value + output_file.write("</tr>\n") + + output_file.write("</table>\n") + if n==totalN: + output_file.write("</div>\n") + +def checkFileExistsForMonthlyReport(save_dir,user): + month = datetime.datetime.today().strftime("%m") + fileToCheckAndReport = save_dir + "monthFile" + month + if not os.path.isfile(fileToCheckAndReport): + # The month check file does not exist, report the monthly output + # Delete all files matching "monthFile*" in the current directory + for filename in os.listdir(save_dir): + if filename.startswith("monthFile") and os.path.isfile(filename): + os.remove(filename) + + # Create a new blank file with the specified path + with open(fileToCheckAndReport, 'w') as new_file: + pass # Creates an empty file + + print("Monthly reminder to check the hgTracksTiming information for any abnormalities:\n") + if user == 'qateam': + print("https://genecats.gi.ucsc.edu/qa/test-results/hgTracksTiming/") + else: + print("https://hgwdev.gi.ucsc.edu/~"+user+"/cronResults/hgTracksTiming/") + +def queryServersAndReport(server,url,filePath,today,n,user): + start_time = time.time() + response = requests.get(url, verify=False) # Disable SSL verification + end_time = time.time() + load_time = end_time - start_time + page_content = response.text # Get the page content + + # Check if the expected string is in the response + if "END hgTracks" in page_content: + if load_time < 10: + problem = False + status = "SUCCESS" + else: + problem = True + status = "FAIL - hgTracks page loaded, but load time over 10s" + else: + problem = True + status = "FAIL - Got status 200 return, but missing the 'END hgTracks' page string of a successful load" + + if problem == True: + print("Potential problem with Genome Browser server.") + print(f"URL: {url} | Status: {response.status_code} | Load Time: {load_time:.3f}s | Check: {status}") + print("\nSee the latest timing numbers:") + if user == 'qateam': + print("https://genecats.gi.ucsc.edu/qa/test-results/hgTracksTiming/") + else: + print("https://hgwdev.gi.ucsc.edu/~"+user+"/cronResults/hgTracksTiming/") + + with open(filePath, "a") as file: + file.write(f"{today}\t{load_time:.3f}s\t{response.status_code}\n") + +def main(): + #Don't try to display the plot, this is for jupyter + matplotlib.use('Agg') + # Suppress SSL warnings - was due to an asia issue + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + user = getpass.getuser() + save_dir = create_save_dir(user) + today = datetime.datetime.today().strftime("%Y-%m-%d-%H:%M") + + # Dic with all the URLs to test. To temporarily pause testing of URLs for maintenance, expected outage, etc. + # Remove it from this dictionary + urls = { + "hgwdev": "https://hgwdev.gi.ucsc.edu/cgi-bin/hgTracks?hgt.trackImgOnly=1&hgt.reset=1", + "hgwbeta": "https://hgwbeta.soe.ucsc.edu/cgi-bin/hgTracks?hgt.trackImgOnly=1&hgt.reset=1", + "hgw1": "https://hgw1.soe.ucsc.edu/cgi-bin/hgTracks?hgt.trackImgOnly=1&hgt.reset=1", + "hgw2": "https://hgw2.soe.ucsc.edu/cgi-bin/hgTracks?hgt.trackImgOnly=1&hgt.reset=1", + "euro": "https://genome-euro.ucsc.edu/cgi-bin/hgTracks?hgt.trackImgOnly=1&hgt.reset=1", + "asia": "https://genome-asia.ucsc.edu/cgi-bin/hgTracks?hgt.trackImgOnly=1&hgt.reset=1" + } + + n=0 + for server, url in urls.items(): + n+=1 + filePath = save_dir + server + ".txt" + queryServersAndReport(server,url,filePath,today,n,user) + createTableOfTimeChanges(filePath,save_dir,server,n,len(urls)) + generateGraphs(user,save_dir,filePath,server) + + checkFileExistsForMonthlyReport(save_dir,user) + +main()