5a5d63d41c03e0f384569bd1b18b7c70f1158e69
gperez2
  Mon Dec 8 15:16:22 2025 -0800
Code review edits, refs #36815

diff --git src/utils/qa/checkStaticDocsDiff.py src/utils/qa/checkStaticDocsDiff.py
index 3500d75e1fe..8311ed64f5d 100755
--- src/utils/qa/checkStaticDocsDiff.py
+++ src/utils/qa/checkStaticDocsDiff.py
@@ -1,197 +1,180 @@
 #!/usr/bin/env python3
 
-# Program Header
-# Name:   Gerardo Perez
-# Description: A program that reports differences in static HTML docs
-#              across hgwdev, hgwbeta, and hgw0.
-#
-#
-# checkStaticDocsDiff.py
-#
-#
-
 import subprocess
 import argparse
 
-# Run a bash command locally
-def bash(cmd):
-    """Executes a shell command locally and returns output as a list of lines."""
-    rawOutput = subprocess.run(cmd, check=True, shell=True,
-                               stdout=subprocess.PIPE, universal_newlines=True)
-    return rawOutput.stdout.split('\n')[0:-1]
-
 # Run a command on a remote host via SSH
 def get_checksums_via_ssh(host, command):
     """Executes a checksum command on a remote host via SSH."""
     result = subprocess.run(
         ["ssh", "qateam@" + host, command],
         capture_output=True, text=True
     )
     return result.stdout
 
 # Parse md5sum output into a dict. Example line: "d41d8cd98f00b204e9800998ecf8427e  /path/to/file.html"
 # Produces: { "/path/to/file.html": "d41d8cd..." }
 def parse_checksums(output):
     """Parses md5sum output into a dictionary mapping filename -> checksum."""
     checksums = {}
     for line in output.splitlines():
         parts = line.split(None, 1)
         if len(parts) == 2:
             md5sum, file = parts
             checksums[file] = md5sum
     return checksums
 
 # Format output as a URL (optional - controlled by --links)
 # hgwdev uses ".gi.ucsc.edu". Others use ".soe.ucsc.edu"
 def format_file_link(host, file, use_links):
     """Formats the file as a clickable URL when --links is enabled."""
     if use_links:
         if host == "hgwdev":
             domain = "hgwdev.gi.ucsc.edu"
         else:
             domain = host + ".soe.ucsc.edu"
         return "https://" + domain + file.replace("/usr/local/apache/htdocs", "")
     return file
 
 # Ignore unwanted files and directories
 def should_ignore(file, ignore_list):
     """Returns True if the file path should be ignored."""
     return any(term in file for term in ignore_list)
 
 
 # MAIN PROGRAM
 def main():
 
     # Argument Parsing
     parser = argparse.ArgumentParser(
         description="Compare static HTML docs across hgwdev, hgwbeta, and hgw0."
     )
     parser.add_argument(
         "-l", "--links", action="store_true",
         help="Format output as clickable https:// URLs"
     )
     args = parser.parse_args()
     
     
     
     # Files/directories to ignore during comparison
     ignore_files = [
         "/index.html",
         "admin/",
         "thumbNailLinks.html",
         "gbPageStartHardcoded.html",
         "ENCODE/",
         "/docs/",
         "allTipsRaw.html",
         "/mirrorDocs/staticPage.html",
         "googleAnalytics.html",
         "google09a546cf57abf548.html",
         "googlef42dd384148a3435.html",
         "/tipOfDay.html",
     ]
     
     # Commands to list HTML files and calculate md5sum
     # grep -rl + xargs md5sum is used because find does not work on hgwbeta
     command = (
         "grep -rl --include='*.html' '' /usr/local/apache/htdocs 2>/dev/null | "
         "xargs -d '\n' md5sum | sort -k 2"
     )
 
     # Fetch checksums hgwdev, hgwbeta, and hgw0
     checksums_hgwdev  = parse_checksums(get_checksums_via_ssh('hgwdev', command))
     checksums_hgwbeta = parse_checksums(get_checksums_via_ssh('hgwbeta', command))
     checksums_hgw0    = parse_checksums(get_checksums_via_ssh('hgw0', command))
     
     # 1. Missing on hgwdev but present on hgwbeta
     missing_from_hgwdev = [
         file for file in checksums_hgwbeta.keys() - checksums_hgwdev.keys()
         if not should_ignore(file, ignore_files)
     ]
 
     if missing_from_hgwdev:
         print("Files present on hgwbeta but missing from hgwdev:")
         for file in missing_from_hgwdev:
             print(format_file_link("hgwbeta", file, args.links))
         print()
 
     # 2. Files on both hgwdev & hgwbeta but with different md5sum
     diff_hgwbeta_hgwdev = [
         file for file in (checksums_hgwdev.keys() & checksums_hgwbeta.keys())
         if checksums_hgwdev[file] != checksums_hgwbeta[file]
         and not should_ignore(file, ignore_files)
     ]
 
     if diff_hgwbeta_hgwdev:
         print("Files present on both hgwbeta and hgwdev but different md5sum:")
         for file in diff_hgwbeta_hgwdev:
             print(format_file_link("hgwbeta", file, args.links), checksums_hgwbeta[file])
             print(format_file_link("hgwdev",  file, args.links), checksums_hgwdev[file], "\n")
         print()
 
     # 3. Present only on hgwbeta (missing on hgw0)
     missing_from_hgw0 = [
         file for file in checksums_hgwbeta.keys() - checksums_hgw0.keys()
         if not should_ignore(file, ignore_files)
     ]
 
     if missing_from_hgw0:
         print("Files present only on hgwbeta and missing on hgw0:")
         for file in missing_from_hgw0:
             print(format_file_link("hgwbeta", file, args.links))
         print()
 
     # 4. Present only on hgw0 (missing on hgwbeta)
     missing_from_hgwbeta = [
         file for file in checksums_hgw0.keys() - checksums_hgwbeta.keys()
         if not should_ignore(file, ignore_files)
     ]
 
     if missing_from_hgwbeta:
         print("Files present only on hgw0 and missing on hgwbeta:")
         for file in missing_from_hgwbeta:
             print(format_file_link("hgw0", file, args.links))
         print()
 
     # 5. Different md5sum between hgwbeta and hgw0
     diff_hgw0_hgwbeta = [
         file for file in (checksums_hgw0.keys() & checksums_hgwbeta.keys())
         if checksums_hgw0[file] != checksums_hgwbeta[file]
         and not should_ignore(file, ignore_files)
     ]
 
     if diff_hgw0_hgwbeta:
         print("Files present on both hgw0 and hgwbeta but different md5sum:")
         for file in diff_hgw0_hgwbeta:
             print(format_file_link("hgwbeta", file, args.links), checksums_hgwbeta[file])
             print(format_file_link("hgw0",    file, args.links), checksums_hgw0[file], "\n")
         print()
 
 # Run the program
 if __name__ == "__main__":
     main()
 
 # Program Output (Commented out)
 #Files present on hgwbeta but missing from hgwdev:
 #/usr/local/apache/htdocs/admin/maintenance.html
 #/usr/local/apache/htdocs/ancestors/index.html
 #
 #Files present on both hgwbeta and hgwdev but different md5sum:
 #/usr/local/apache/htdocs/docs/index.html 9c1340981564d95cabb906eb15415476
 #/usr/local/apache/htdocs/docs/index.html 234def442a25eead78ccf7caf09cce4c
 #
 #/usr/local/apache/htdocs/docs/tableBrowserTutorial.html c6f36e979edc4912c1d4a80f0aa6eba4
 #/usr/local/apache/htdocs/docs/tableBrowserTutorial.html 5c08659421aea9dc1408b0ac7cc266f3
 #
 #Files present only on hgwbeta and missing on hgw0:
 #/usr/local/apache/htdocs/admin/maintenance.html
 #/usr/local/apache/htdocs/ENCODE/controlledVocabulary.html
 #
 #Files present only on hgw0 and missing on hgwbeta:
 #/usr/local/apache/htdocs/admin/stats/Report.html
 #
 #Files present on both hgw0 and hgwbeta but different md5sum:
 #/usr/local/apache/htdocs/admin/jk-install.html 9a561100775c6c730342e2d94a02fa08
 #/usr/local/apache/htdocs/admin/jk-install.html c97be8e421ce0ad9159e87cda40345d4
 #
 #/usr/local/apache/htdocs/admin/404.html 95c7561df8631b27f9fa1e76d75e7270
 #/usr/local/apache/htdocs/admin/404.html 3a1049daecd22bff590ebf0f2e6c2602