6aab74a2672b23a62ef96216f4ab5c0189e3478f gperez2 Mon May 27 13:57:19 2024 -0700 Adding a script that gets user sessions that load on hgw0 then checks if the sessions load on hgwdev and hgwbeta to check if they crash, refs #32593 diff --git src/utils/qa/checkSessionsFromRR.py src/utils/qa/checkSessionsFromRR.py new file mode 100755 index 0000000..51f4e85 --- /dev/null +++ src/utils/qa/checkSessionsFromRR.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 + +# Program Header +# Name: Gerardo Perez +# Description: A program that gets user sessions from the RR and checks if sessions crash on hgwdev/hgwbeta +# +# +# +# checkSessionsFromRR.py +# +# +# Development Environment: VIM - Vi IMproved version 7.4.629 +# Version: Python 3.6.5 +# +# IGNORE the following error notes: +# For error: +# hExtendedRangeQuery: table t1_genome_65e19_d5ded0 doesn't exist in customTrash database, or hFindTableInfoWithConn failed +# bedGraphLoadItems: table t1_genome_65e19_d5ded0 only has 0 data columns, must be at least 4 +#This error is due to custom tracks in track collections expiring which were stored as a hub. +# ^IGNORE + +import os # Module to interact with the operating system +import getpass # Module to get the username of the person running the script +import sys # Module to access system-specific parameters and functions +import re # Module for regular expression matching operations +import json # Module for working with JSON data +import io # Module for handling I/O operations +import requests # Module to make HTTP requests +import random # Module to generate random numbers and select random items +import subprocess # Module to run subprocesses +from datetime import datetime # Module to handle dates and times +from urllib.parse import unquote # Module to decode URL-encoded strings +from selenium import webdriver # Module for web browser automation +from selenium.webdriver.chrome.options import Options # Module to set Chrome options for Selenium + +# Get the username of the person running the script +user = getpass.getuser() + +def bash(cmd): + """Input bash cmd and return stdout""" + rawOutput = subprocess.run(cmd,check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT, errors='backslashreplace') + return(rawOutput.stdout.split('\n')[0:-1]) + +def random_session_lines(file_path, num_lines): + """Gets a number of random lines that consist of session contents and encodes as Latin-1 of the Unicode character set""" + with open(file_path, 'r', encoding="latin-1") as file: + lines = file.readlines() + selected_lines = random.sample(lines, num_lines) + return selected_lines + +def parseSessions(session_line): + """Gets the field of the line that has the session contents """ + session=session_line.strip().split('\t') + if len(session)==6: + contents=session[1] + elif len(session)==7: + contents=session[2] + else: + contents='missing' + # Strings to convert in order for the settings to load as a session + contents=unquote(contents) + contents=contents.replace("&", "\n") + contents=contents.replace("=", " ") + contents=contents.replace("group+auto-scale", "group auto-scale") + contents=contents.replace("use+vertical+viewing+range+setting", "use vertical viewing range setting") + contents=contents.replace("auto-scale+to+data+view", "auto-scale to data view") + contents=contents.replace("+/userdata/", " /userdata/") + contents=contents.replace("+../trash", " ../trash") + contents=contents.replace("Normalized+Score", "Normalized Score") + contents=contents.replace("all+genes", "all genes") + contents=contents.replace("Roswell+Park+Cancer+Institute", "Roswell Park Cancer Institute") + contents=contents.replace("Gray+scale", "Gray scale") + return contents + +def getSessionContents(session_contents, line): + """Saves the contents of a session to a file with utf-8 encoding""" + save_sessions='/hive/users/qateam/sessionsFromRR' + session_settings=save_sessions+'/session_settings_'+str(line)+'.txt' + write_session_settings= open(session_settings, 'w', encoding='utf-8') + write_session_settings.write(session_contents) + write_session_settings.close() + return session_settings + +def loadSessionhgw0(session, machine): + """Makes session URL to load on hgw0""" + session=session.split('qateam')[1] + session=machine+server+session + return session + +def checkSessionhgw0(session): + """Checks if session loads on hgw0""" + #String to check when hgTracks finishes loading + check_hgTracks='// END hgTracks' + + try: + checkLoad=bash("curl -Ls '"+session+"'") + try: + checkLoad=bash("curl -Ls '"+session+"'") + checkLoad=str(checkLoad)[1:-1] + #If the string to check when hgTracks finishes loading is present then save the session to a variable + if check_hgTracks in checkLoad: + sessionLoads=session + else: # If string is not present then delete the file with the session contents + sessionLoads='no' + session_dir=session.split('genecats.gi.ucsc.edu')[1] + session_path=myDir+session_dir + # Uncomment the line below to actually remove the session files + #os.system('rm '+session_path) + except subprocess.CalledProcessError as e: + sessionLoads='no' + except subprocess.CalledProcessError as e: + sessionLoads='no' + + return sessionLoads + +def loadSession(session, machine): + """Makes session URL to load on machine""" + session=session.split('cgi-bin')[1] + session=machine+session + return session + +def checkSession(session): + """Checks if session loads on machine""" + + #Strings to check + check_hgTracks='// END hgTracks' + check_error='<!-- ERROR -->' + check_warning="id='warnBox'" + hubid_error="Couldn't connect to database hub_" + hubid_error_dev="can not find any trackDb tables for hub_" + # List to append session load error + error_list=[] + try: + checkLoad=bash("curl -Ls '"+session+"'") + checkLoad=str(checkLoad)[1:-1] + + #If session contains strings to check if session loaded, set a variable that session loaded + if check_hgTracks in checkLoad: + if check_error in checkLoad: #checks if there is an error when session is loaded + if check_warning in checkLoad: #checks if the error is a warning when session is loaded + check3='loads' + else: #If session contains the error string to check, add error to list + error_list.append('error') + check2='loads' + #Pass if session load error is Couldn't connect to database hub error (hub id issue) + elif hubid_error in checkLoad: + pass + #Pass if session load error is can not find any trackDb tables for hub_ ((hub id issue)) + elif hubid_error_dev in checkLoad: + pass + else: #If session does not contains strings to check, add error to list + error_list.append('error') + except subprocess.CalledProcessError as e: + #If session fails to curl, add error to list + error_list.append('error') + #If an error is present in the error list, save the session URL to variable + if 'error' in error_list: + sessionLoad=session + else: #If no error is present in the error list, set variable that session loaeded + sessionLoad='loads' + session_dir=session.split('genecats.gi.ucsc.edu')[1] + session_path=myDir+session_dir + #Uncomment the line below to actually remove the session files + #os.system('rm '+session_path) + + return sessionLoad + +def makeURL(session, url_txt, count): + """Creates a text file that appends crash sessions""" + failed_session=open(url_txt, 'a') + failed_session.write(str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))+'\t'+str(session)+'\n') + failed_session.close() + return session + +def output_if_file_exists(file_path): + """Outputs if text file was created for crash sessions""" + if os.path.exists(file_path): + print("This cronjob outputs sessions that crash on hgwbeta or hgwdev. The session URLs have '#' in ...session_settings_#.txt which is the random number session tested") + with open(file_path, 'r') as file: + print() + print(file.read()) + print("\nErrors that output 'hui::wiggleScaleStringToEnum() - Unknown option' can be ignored.") + +#Remove session contents files +os.system("rm /hive/users/qateam/sessionsFromRR/*") + + +year=datetime.now().strftime("%Y-%m-"+"01") + +server='https://genecats.gi.ucsc.edu/qa/qaCrons' + +#Saves the montly hgcentral session dump to a varaiable +monthly_hgcentral_dump="/usr/local/apache/htdocs-genecats/qa/test-results/hgcentral/"+year+"/rr.namedSessionDb" + + +count=0 + +#Machine partial URL to load session +hgw0='http://hgw0.soe.ucsc.edu/cgi-bin/hgTracks?hgS_doLoadUrl=submit&hgS_loadUrlName=' +hgwbeta='http://hgwbeta.soe.ucsc.edu/cgi-bin' +hgwdev='http://hgwdev.gi.ucsc.edu/cgi-bin' + +#Path to file to append crash sessions +url_txt='/usr/local/apache/htdocs-genecats/qa/qaCrons/sessionsFromRR/crashedSessions.txt' + +#Directory to save files for the script +myDir='/usr/local/apache/htdocs-genecats' + +#num_lines=10000 # Number of random lines to select +num_lines=10000 # Number of random lines to select + + +#Gets a number of random of sessions from the monthly_hgcentral_dump +random_lines = random_session_lines(monthly_hgcentral_dump, num_lines) + + +def main(random_lines, server, count, hgw0, hgwbeta, hgwdev, url_txt, myDir): + """ Gets a number of random of sessions from the RR, if sessions crash on hgwdev/hgwbeta then outputs the crash sessions""" + for line in random_lines: + count=count +1 + session_contents=parseSessions(line) + if session_contents=='missing': + continue + session=getSessionContents(session_contents, count) + hgw0session=loadSessionhgw0(session, hgw0) + session=checkSessionhgw0(hgw0session) + if session=='no': + continue + hgwbetaSession=loadSession(session, hgwbeta) + beta_session=checkSession(hgwbetaSession) + if beta_session=='loads': + pass + else: + makeURL(beta_session, url_txt, count) + hgwdevSession=loadSession(session, hgwdev) + dev_session=checkSession(hgwdevSession) + if dev_session=='loads': + continue + else: + makeURL(dev_session, url_txt, count) + output_if_file_exists(url_txt) + +main(random_lines, server, count, hgw0, hgwbeta, hgwdev, url_txt, myDir) + +# Program Output (Commented out) +#This cronjob outputs sessions that crash on hgwbeta or hgwdev. The session URLs have '#' in ...session_settings_#.txt which is the random number session tested + + +#2024-05-09 00:21:25 http://hgwbeta.soe.ucsc.edu/cgi-bin/hgTracks?hgS_doLoadUrl=submit&hgS_loadUrlName=https://genecats.gi.ucsc.edu/qa/qaCrons/sessionsFromRR/session_settings_4642.txt +#2024-05-09 00:21:56 http://hgwdev.gi.ucsc.edu/cgi-bin/hgTracks?hgS_doLoadUrl=submit&hgS_loadUrlName=https://genecats.gi.ucsc.edu/qa/qaCrons/sessionsFromRR/session_settings_4642.txt + +#Errors that output 'hui::wiggleScaleStringToEnum() - Unknown option' can be ignored.