4d9af30fcf1c2f3ba7b7477b85f02c813f65e169
gperez2
  Sun Oct 6 22:48:24 2024 -0700
Adding errors to skip and updating the script when the monthly hgcentral dump does not exist, refs #32593

diff --git src/utils/qa/checkSessionsFromRR.py src/utils/qa/checkSessionsFromRR.py
index 707b9af..9d95e80 100755
--- src/utils/qa/checkSessionsFromRR.py
+++ src/utils/qa/checkSessionsFromRR.py
@@ -114,52 +114,62 @@
     return sessionLoads
 
 def loadSession(session, machine):
     """Makes session URL to load on machine"""
     session=session.split('cgi-bin')[1]
     session=machine+session
     return session
 
 def checkSession(session):
     """Checks if session loads on machine"""
 
     #Strings to check
     check_hgTracks='// END hgTracks'
     check_error='<!-- ERROR -->'
     check_warning="id='warnBox'"
-    hubid_error="Couldn't connect to database hub_"
+    hubid_error="Couldn't connect to database"
     hubid_error_dev="can not find any trackDb tables for hub_"
+    hubCollection_error= r'doesn\\x27t\\x20exist\\x20in\\x20customTrash\\x20database\\x2C\\x20or\\x20hFindTableInfoWithConn\\x20failed'
+    buffer_error= r'buffer\\x20overflow\\x2C\\x20size\\x204096\\x2C\\x20format\\x3A\\x20Click\\x20to\\x20alter\\x20the\\x20display'
     # List to append session load error
     error_list=[]
     try:
         checkLoad=bash("curl -Ls '%s'" % session)
         checkLoad=str(checkLoad)[1:-1]
         
         #If session contains strings to check if session loaded, set a variable that session loaded
         if check_hgTracks in checkLoad:
             if check_error in checkLoad: #checks if there is an error when session is loaded
                if check_warning in checkLoad: #checks if the error is a warning when session is loaded
                   check3='loads'
                else: #If session contains the error string to check, add error to list 
                   error_list.append('error')
             check2='loads'
        #Pass if session load error is Couldn't connect to database hub error (hub id issue)
         elif hubid_error in checkLoad:
             pass
        #Pass if session load error is can not find any trackDb tables for hub_ ((hub id issue))
         elif hubid_error_dev in checkLoad:
             pass
+        # Check for the hubCollection error by matching the exact string pattern
+        elif re.search(re.escape(hubCollection_error), checkLoad):
+            print("Collections error")
+            pass
+        # Check for buffer overflow error by matching the exact string pattern
+        elif re.search(re.escape(buffer_error), checkLoad):
+            print("Buffer error")
+            pass
         else: #If session does not contains strings to check, add error to list 
              error_list.append('error')
     except subprocess.CalledProcessError as e:
         #If session fails to curl, add error to list 
         error_list.append('error')
     #If an error is present in the error list, save the session URL to variable 
     if 'error' in error_list:
         sessionLoad=session
     else: #If no error is present in the error list, set variable that session loaded
         sessionLoad='loads'
         session_dir=session.split('genecats.gi.ucsc.edu')[1]
         session_path=myDir+session_dir
    
     return sessionLoad
 
@@ -167,57 +177,90 @@
     """Creates a text file that appends crash sessions"""
     failed_session=open(url_txt, 'a')
     failed_session.write(str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))+'\t'+str(session)+'\n')
     failed_session.close()
     return session
 
 def output_if_file_exists(file_path):
     """Outputs if text file was created for crash sessions"""
     if os.path.exists(file_path):
         print("This cronjob outputs sessions that crash on hgwbeta or hgwdev. The session URLs have '#' in ...session_settings_#.txt which is the random number session tested")
         with open(file_path, 'r') as file:
             print()
             print(file.read())
         print("\nErrors that output 'hui::wiggleScaleStringToEnum() - Unknown option' can be ignored.")
 
-year=datetime.now().strftime("%Y-%m-"+"01")
+#year=datetime.now().strftime("%Y-%m-"+"01")
 
 server='https://genecats.gi.ucsc.edu/qa/qaCrons'
 
 #Saves the montly hgcentral session dump to a varaiable
-monthly_hgcentral_dump="/usr/local/apache/htdocs-genecats/qa/test-results/hgcentral/"+year+"/rr.namedSessionDb"
+#monthly_hgcentral_dump="/usr/local/apache/htdocs-genecats/qa/test-results/hgcentral/"+year+"/rr.namedSessionDb"
+
+num_lines=10000 # Number of random lines to select
+
+# Get the current date and format it as 'YYYY-MM-01'
+current_year_month = datetime.now().strftime("%Y-%m-"+"01")
+
+# Construct the path for the current month's file
+monthly_hgcentral_dump = f"/usr/local/apache/htdocs-genecats/qa/test-results/hgcentral/{current_year_month}/rr.namedSessionDb"
+
+# Check if the file exists for the current month
+if not os.path.exists(monthly_hgcentral_dump):
+    # If the file doesn't exist, calculate the previous month
+    current_year = int(datetime.now().strftime("%Y"))
+    current_month = int(datetime.now().strftime("%m"))
+
+    # Handle previous month calculation manually
+    if current_month == 1:
+        previous_year = current_year - 1
+        previous_month = 12
+    else:
+        previous_year = current_year
+        previous_month = current_month - 1
+
+    # Format the previous month as 'YYYY-MM-01'
+    previous_year_month = f"{previous_year:04d}-{previous_month:02d}-01"
+
+    # Construct the path for the previous month's file
+    monthly_hgcentral_dump = f"/usr/local/apache/htdocs-genecats/qa/test-results/hgcentral/{previous_year_month}/rr.namedSessionDb"
+
+    # Check if the file exists for the previous month
+    if not os.path.exists(monthly_hgcentral_dump):
+        print(f"Error: rr.namedSessionDb monthly hgcentral dump file not found for both current and previous month.")
+    else:
+        # File for the previous month found, proceed to get random lines
+        random_lines = random_session_lines(monthly_hgcentral_dump, num_lines)
+else:
+    # File for the current month found, proceed to get random lines
+    random_lines = random_session_lines(monthly_hgcentral_dump, num_lines)
+
 
 
 count=0
 
 #Machine partial URL to load session
 hgw0='http://hgw0.soe.ucsc.edu/cgi-bin/hgTracks?hgS_doLoadUrl=submit&hgS_loadUrlName='
 hgwbeta='http://hgwbeta.soe.ucsc.edu/cgi-bin'
 hgwdev='http://hgwdev.gi.ucsc.edu/cgi-bin'
 
 #Path to file to append  crash sessions
 url_txt='/usr/local/apache/htdocs-genecats/qa/qaCrons/sessionsFromRR/crashedSessions.txt'
 
 #Directory to save files for the script 
 myDir='/usr/local/apache/htdocs-genecats'
 
-num_lines=10000 # Number of random lines to select
-
-
-#Gets a number of random of sessions from the monthly_hgcentral_dump
-random_lines = random_session_lines(monthly_hgcentral_dump, num_lines)
-
 
 def main(random_lines, server, count, hgw0, hgwbeta, hgwdev, url_txt, myDir):
     """ Gets a number of random of sessions from the RR, if sessions crash on hgwdev/hgwbeta then outputs the crash sessions"""
     #Remove session contents files
     os.system("rm /hive/users/qateam/sessionsFromRR/*")
 
     for line in random_lines:
         count=count +1
         session_contents=parseSessions(line)
         if session_contents=='missing':
            continue
         session=getSessionContents(session_contents, count)
         hgw0session=loadSessionhgw0(session, hgw0)
         session=checkSessionhgw0(hgw0session)
         if session=='no':