c638753eb8b699b35d0e795403b3f6481ed40198
lrnassar
  Tue Aug 31 12:10:04 2021 -0700
Adding util to automatically update hubPublic table refs #28008

diff --git src/utils/qa/hubPublicAutoUpdate src/utils/qa/hubPublicAutoUpdate
new file mode 100755
index 0000000..86ec395
--- /dev/null
+++ src/utils/qa/hubPublicAutoUpdate
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+#Lou Aug 2021
+"""
+Given a server (dev/hgwbeta/rr) will query hubPublic table URLs and verify that the shortLabel,
+longLabel, dbCount and dbList matches what is in the hubPublic table. Any inconsistencies will be
+automatically updated. Will print out the commands as well as run them.
+
+Can optionally be run in test mode where it will only print commands without executing.
+
+Example run:
+    hubPublicAutoUpdate dev
+    hubPublicAutoUpdate hgwbeta -t
+    hubPublicAutoUpdate rr
+    
+"""
+
+import subprocess,requests,sys,argparse
+
+def parseArgs():
+    """
+    Parse the command line arguments.
+    """
+    parser = argparse.ArgumentParser(description = __doc__, 
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    optional = parser._action_groups.pop()
+    
+    required = parser.add_argument_group('required arguments')
+    
+    required.add_argument ("server",
+        help = "Server from which to query the hubPublic table. Can be dev, hgwbeta, or rr.")
+    optional.add_argument ("-t", "--test", dest = "testMode", default = False, action = "store_true",
+        help = "Optional: Run in test mode. Print out any discrepancies but do not run any " + \
+            "commands.")
+    if (len(sys.argv) == 1):
+        parser.print_usage()
+        print("\nGiven a server (dev/hgwbeta/rr) will query hubPublic table URLs and verify that\n" + \
+              "the shortLabel, longLabel, dbCount and dbList matches what is in the hubPublic table.\n" + \
+              "Any inconsistencies will be automatically updated. Will print out the commands as\n" + \
+              "well as run them\n" + \
+              "Can optionally be run in test mode where it will only print commands without executing.\n\n" + \
+              "Example run:\n" + \
+              "    hubPublicAutoUpdate dev\n" + \
+              "    hubPublicAutoUpdate hgwbeta -t\n" + \
+              "    hubPublicAutoUpdate rr\n")
+        exit(0)
+    parser._action_groups.append(optional)
+    options = parser.parse_args()
+    return  options
+
+def evaluateServer(serverToQuery):
+    """Evaluate the input server, ensure it is valid and return corresponding hgsqlInputs"""
+    if serverToQuery == 'dev':
+        hgsqlInput = ['','hgcentraltest']
+    elif serverToQuery == 'hgwbeta':
+        hgsqlInput = ['-h hgwbeta ','hgcentralbeta']
+    elif serverToQuery == 'rr':
+        hgsqlInput = ['-h genome-centdb ','hgcentral']
+    else:
+        sys.exit("Server called must be either 'dev', 'hgwbeta', or 'rr'")
+    return(hgsqlInput)
+
+def buildPubHubDic(hgsqlInput):
+    """Build a dictionary out of the requested server's hubPublic table"""
+    hubPublicDic = {}    
+    hubPublic = subprocess.run("hgsql "+hgsqlInput[0]+"-e 'select * from hubPublic' "+hgsqlInput[1],\
+                         check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True)
+    hubPublicOutput = hubPublic.stdout.split('\n')[1:-1]
+
+    for hub in hubPublicOutput:
+        hub = hub.split('\t')
+        hubPublicDic[hub[0]] = {}
+        hubPublicDic[hub[0]] = {'hubUrl':hub[0],'shortLabel':hub[1],'longLabel':hub[2],\
+                            'registrationTime':hub[3],'dbCount':hub[4],\
+                            'dbList':hub[5],'descriptionUrl':hub[5]}
+    return(hubPublicDic)
+
+def buildCurrentHubTxtDic(hub):
+    """Query hub.txt file and build dic of values"""
+    currentHub = {}
+    response = requests.get(hub).text
+    for line in response.splitlines():
+        if "\t" in line.rstrip():
+            line = line.split("\t")
+            currentHub[line[0]] = line[1]
+        else:
+            line = line.split(" ")
+            currentHub[line[0]] = " ".join(line[1:])
+    return(currentHub)
+                    
+def queryHubTxt(currentHub,hub):
+    """Query genomes.txt file and fill out dbList and dbCount values"""
+    
+    currentHub['dbList'] = []
+    genomeFileLocation = currentHub['genomesFile'].rstrip().lstrip()
+    if genomeFileLocation.startswith("http"):
+        genomeUrl = genomeFileLocation
+    else:
+        genomeUrl = "/".join(hub.split('/')[:-1])+"/"+genomeFileLocation
+    genomeInfo = requests.get(genomeUrl).text
+    
+    for line in genomeInfo.splitlines():
+        if "\t" in line:
+            line = line.split("\t")
+            if line[0].rstrip().lstrip() == 'genome':
+                while "" in line:
+                    line.remove("")
+                currentHub['dbList'].append(line[1].rstrip().lstrip())
+        else:
+            line = line.split(" ")
+            if line[0].rstrip().lstrip() == 'genome':
+                while "" in line:
+                    line.remove("")
+                currentHub['dbList'].append(line[1].rstrip().lstrip())
+    currentHub['dbCount'] = len(currentHub['dbList'])
+    return(currentHub)
+
+def printHgsql(hub,varToEdit,newVarValue,hgsqlInput,testMode):
+    """hgsql command to fix the difference"""
+    cmd = "hgsql "+hgsqlInput[0]+"-e \"update hubPublic set "+varToEdit+" = '"+str(newVarValue)+ \
+    "' where hubUrl = '"+hub+"'\" "+hgsqlInput[1]
+    if not testMode:
+        subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE, universal_newlines=True)
+    print(cmd)
+
+def compareResults(hubPublicDic,currentHub,hub,hgsqlInput,testMode):
+    """Compare the hubPublic values to the queried currentHub values and report"""
+
+    if hubPublicDic[hub]['shortLabel'] != currentHub['shortLabel'].rstrip().lstrip():
+        printHgsql(hub,'shortLabel',currentHub['shortLabel'].rstrip().lstrip(),hgsqlInput,testMode)
+        
+    if hubPublicDic[hub]['longLabel'] != currentHub['longLabel'].rstrip().lstrip():     
+        printHgsql(hub,'longLabel',currentHub['longLabel'].rstrip().lstrip(),hgsqlInput,testMode)
+
+    if int(hubPublicDic[hub]['dbCount']) != int(currentHub['dbCount']):
+        printHgsql(hub,'dbCount',currentHub['dbCount'],hgsqlInput,testMode)
+        
+    if set(hubPublicDic[hub]['dbList'][:-1].split(',')) != set(currentHub['dbList']):
+        printHgsql(hub,'dbList',",".join(currentHub['dbList'])+",",hgsqlInput,testMode)
+    
+def hubPublicCompare(hubPublicDic,hgsqlInput,testMode):
+    """Query hub.txt files and compare values to hubPublic values"""
+    for hub in hubPublicDic.keys():
+        try: #Try for timeout connections
+            currentHub = buildCurrentHubTxtDic(hub)
+            currentHub = queryHubTxt(currentHub,hub)
+            compareResults(hubPublicDic,currentHub,hub,hgsqlInput,testMode)
+        except:
+            print("The following hub has an error or is not responsive: "+str(hub))            
+            
+def main():
+    """Initialize options and call other functions"""
+    options = parseArgs()
+    serverToQuery = options.server
+    testMode = options.testMode
+    hgsqlInput = evaluateServer(serverToQuery)
+    hubPublicDic = buildPubHubDic(hgsqlInput)
+    hubPublicCompare(hubPublicDic,hgsqlInput,testMode)
+
+main()