04cadaf097f071cb3fd88866691fb1133b6d4bc5
gperez2
  Mon Jun 16 22:47:03 2025 -0700
Commenting out the blatServers filtering to remove domain part due to Hiram updating the blatServers.hgcentral table on hgnfs1 for all host names, refs #35846

diff --git src/utils/qa/check_Tables_Metadata.py src/utils/qa/check_Tables_Metadata.py
index 7184aec36d0..63696fbae33 100755
--- src/utils/qa/check_Tables_Metadata.py
+++ src/utils/qa/check_Tables_Metadata.py
@@ -1,156 +1,158 @@
 #!/usr/bin/env python3
 
 # Program Header
 # Name:   Gerardo Perez
 # Description: A program that compares hg38, hg19, mm39, and mm10 tables and metadata tables for hgw1 and hgwbeta discrepancies 
 # check_Tables_Metadata.py
 #
 # Development Environment: VIM - Vi IMproved version 7.4.629
 
 import subprocess
 
 def bash(cmd):
     """Executes a shell command in Bash and returns the output as a list of lines."""
     rawOutput = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True)
     return rawOutput.stdout.split('\n')[0:-1]
 
 
 def run_command(command):
     """Executes a shell command and returns its output as a string. Prints an error message if the command fails."""
     result = subprocess.run(command, shell=True, capture_output=True, text=True)
     if result.returncode != 0:
         print("Error executing: {}".format(command))
         print("Error message: {}".format(result.stderr.strip()))
     return result.stdout.strip() # Remove leading/trailing whitespace.
 
 
 def get_data(host, query, database):
     """Gets and sorts data from a database using a SQL query."""
     command = '{} -Ne "{}" {} 2>/dev/null | sort'.format(host, query, database)
     return run_command(command).splitlines()
 
 
 def filter_blat_servers(data):
     """Splits blat*.soe.ucsc.edu after 'blat' and before '.soe.ucsc.edu', then re-formats for comparison."""
     filtered_entries = []
     for entry in data:
         fields = entry.split()
         if len(fields) > 2:
             server_name = fields[1]
             port = fields[2]
             if server_name.startswith("blat") and ".soe.ucsc.edu" in server_name:
                 base_name = server_name.split(".soe.ucsc.edu")[0]  # Remove domain part
                 new_entry = "{}\t{}\t{}\t{}".format(fields[0], base_name, port, "\t".join(fields[3:]) if len(fields) > 3 else "")
                 filtered_entries.append(new_entry)
     return filtered_entries
 
 
 def compare_table_lists(beta_tables, hgw1_tables, db):
     """Compares table lists from two database sources and prints differences.
     
     Tables that exist only in one of the sources are displayed.
     """
     ignored_beta_tables = {"hgFindSpec_public", "trackDb_public", "relatedTrack_public", "metaDb_public", "metaDb_back", "metaDb_public_back"}
 
     # Filter out ignored tables from beta_tables
     filtered_beta_tables = []
     for table in beta_tables:
         if table not in ignored_beta_tables:
             filtered_beta_tables.append(table)
 
     # Convert lists to sets for comparison
     set1, set2 = set(filtered_beta_tables), set(hgw1_tables)
     only_in_beta_tables = set1 - set2
     only_in_hgw1_tables = set2 - set1
 
     # Print differences only if there are discrepancies
     if only_in_beta_tables or only_in_hgw1_tables:
         print("\nDiscrepancies between hgw1 and beta tables for Database: {}".format(db))
 
     if only_in_beta_tables:
         print("Tables only in hgwbeta:")
         for table in sorted(only_in_beta_tables):
             print(table)
         print()
 
     if only_in_hgw1_tables:
         print("Tables only in hgw1:")
         for table in sorted(only_in_hgw1_tables):
             print(table)
         print()
 
 
 def compare_metatables(db_list, metatables, host1, host2, centdb1, centdb2):
     """Compares metadata tables across two databases in db_list."""
     for db in db_list:
         for metatable in metatables:
             # Construct different SQL queries depending on the metatable type
             if metatable == "liftOverChain":
                 query = "SELECT * FROM {} WHERE fromDb = '{}' OR toDb = '{}'".format(metatable, db, db)
             elif metatable == "blatServers":
                 query = "SELECT * FROM {} WHERE db = '{}'".format(metatable, db)
             else:
                 query = "SELECT * FROM {} WHERE name = '{}'".format(metatable, db)
 
             # Gets and sort data from both databases
             data1 = get_data(host1, query, centdb1)
             data2 = get_data(host2, query, centdb2)
 
             # Apply filtering to blatServers to remove domain part
-            if metatable == "blatServers":
-                data1 = filter_blat_servers(data1)
+            ## Commenting out these lines due to Hiram updating the blatServers.hgcentral table on
+            ## hgnfs1 for all host names
+            #if metatable == "blatServers":
+                #data1 = filter_blat_servers(data1)
             
             # Convert lists to sets for comparison
             set1 = set(data1)
             set2 = set(data2)
 
             # Identify unique entries in each database
             only_in_centdb1 = set1 - set2
             only_in_centdb2 = set2 - set1
 
             # Print differences if any unique entries exist
             if only_in_centdb1:
                 print("Discrepancies between hgw1 and beta metadata tables for Database: {}".format(db))
                 print("{} entries only in {}:".format(metatable, centdb1))
                 for line in sorted(only_in_centdb1):
                     print(line)
                 print()  # Add a newline for better readability
 
             if only_in_centdb2:
                 print("Discrepancies between hgw1 and beta metadata tables for Database: {}".format(db))
                 print("{} entries only in {}:".format(metatable, centdb2))
                 for line in sorted(only_in_centdb2):
                     print(line)
                 print()  # Add a newline for better readability
 
 
 def main():
     """Main function to compare database tables and metadata."""
     db_list = ["hg38", "hg19", "mm39", "mm10"]
     centdb1, centdb2 = "hgcentralbeta", "hgcentral"
     host1 = "/cluster/bin/x86_64/hgsql -h hgwbeta"
     host2 = "/cluster/bin/x86_64/hgsql -h genome-centdb"
     metatables = ["liftOverChain", "blatServers", "dbDb"]
 
     # Compare table lists for each database in db_list
     for db in db_list:
         beta_tables = bash("/cluster/bin/x86_64/hgsql -h hgwbeta -Ne 'show tables' {}".format(db))
         hgw1_tables = bash("ssh qateam@hgw1 'HGDB_CONF=`pwd`/.hg.local.conf /home/qateam/bin/x86_64/hgsql -N -e \"show tables\" \"{}\"'".format(db))
         compare_table_lists(beta_tables, hgw1_tables, db)
 
     # Compare metadata tables
     compare_metatables(db_list, metatables, host1, host2, centdb1, centdb2)
 
 
 if __name__ == "__main__":
     main()
 
 # Program Output (Commented out)
 #Discrepancies between hgw1 and beta tables for Database: rn6
 #Tables only in hgwbeta:
 #refSeqSummary
 #
 #Discrepancies between hgw1 and beta metadata tables for Database: hg38
 #blatServers entries only in hgcentralbeta:
 #hg38    blat1c  17780   1       0       0
 #hg38    blat1c  17781   0       1       0