b3068c5987f47d7ccc728cb23e1786bcc8d45bdb
steve
  Mon Nov 25 13:49:55 2013 -0800
Added new copyHgcentral script and added to makefile (redmine #6508)
diff --git src/utils/qa/copyHgcentral src/utils/qa/copyHgcentral
new file mode 100755
index 0000000..16a6201
--- /dev/null
+++ src/utils/qa/copyHgcentral
@@ -0,0 +1,440 @@
+#!/usr/bin/env python2.7
+
+##################################################################
+#
+#  11-08-13
+#  Steve Heitner
+#
+#  Copies entries from hgcentraltest -> hgcentralbeta -> hgcentral
+#
+##################################################################
+
+import sys
+import time
+import os.path
+import subprocess
+import pipes
+import re
+
+
+def parseCommandLine(args):
+    """This function parses the command line to ensure that there are either
+       3 or 5 arguments.  Test mode requires only 3 arguments, but will still
+       run with 5 arguments (it just ignores the last 2).  Execute mode
+       requires 5 arguments.  Any number of arguments other than 3 or 5 will
+       invoke the usage statement.  A return code of 0 will prevent the
+       script from going any further."""
+    if len(args) not in [4, 6]:
+        print "\n  copies entries from hgcentraltest -> hgcentralbeta\n" \
+              "              or from hgcentralbeta -> hgcentral\n" \
+              "\n  usage: copyHgcentral mode assembly table [origin] [destination]\n" \
+              "\n  - valid entries for mode are:\n" \
+              "    - test\n" \
+              "    - execute\n" \
+              "\n    - test mode will display the current status of hgcentraltest,\n" \
+              "      hgcentralbeta and hgcentral and will alert the user if any\n" \
+              "      differences exist\n" \
+              "\n    - when running in test mode, it is not necessary to specify\n" \
+              "      an origin or destination\n" \
+              "\n    - execute mode will copy entries from the origin to the destination\n" \
+              "      for the assembly and table specified\n" \
+              "\n    - test mode MUST be run before running in execute mode;\n" \
+              "      if the specified table has changed since test mode was\n" \
+              "      run, test mode must be run again\n" \
+              "\n  - assembly is the assembly whose entries you want to copy\n" \
+              "    (e.g., hg19, mm10, etc.)\n" \
+              "\n  - valid entries for table are:\n" \
+              "    - blatServers\n" \
+              "    - dbDb\n" \
+              "    - defaultDb\n" \
+              "    - genomeClade\n" \
+              "    - all (copies entries from all of the above tables)\n" \
+              "\n  - valid entries for origin/destination are:\n" \
+              "    - dev\n" \
+              "    - beta\n" \
+              "    - rr\n"
+        return 0
+    else:
+        return 1
+
+
+def errorChecker(args):
+    """This function parses the command line to find any invalid entries and
+       if it finds them, outputs the appropriate error message.  A return
+       code of 0 will prevent the script from going any further."""
+    if args[1] not in ["test", "execute"]:
+        print "\n *** Mode must be 'test' or 'execute' ***\n"
+        return 0
+    elif args[3] not in ["blatServers", "dbDb", "defaultDb", "genomeClade", "all"]:
+        print "\n *** Valid values for table are: blatServers, dbDb, defaultDb, genomeClade, " \
+              "all ***\n"
+        return 0
+    elif args[1] == "execute":
+        if len(args) != 6:
+            print "\n *** An origin and destination must be specified in execute mode ***\n"
+            return 0
+        elif args[4] not in ["dev", "beta"]:
+            print "\n *** The origin must be 'dev' or 'beta' ***\n"
+            return 0
+        elif args[4] == "dev":
+            if args[5] != "beta":
+                print "\n *** When the origin is 'dev', the destination must be 'beta' ***\n"
+                return 0
+            else:
+                return 1
+        elif args[4] == "beta":
+            if args[5] != "rr":
+                print "\n *** When the origin is 'beta', the destination must be 'rr' ***\n"
+                return 0
+            else:
+                return 1
+        else:
+            return 1
+    else:
+        return 1
+
+
+def callHgsql(database, command):
+    """ Run hgsql command using subprocess, return stdout data if no error."""
+    cmd = ["hgsql", database, "-Ne", command]
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    cmdout, cmderr = p.communicate()
+    if p.returncode != 0:
+        # keep command arguments nicely quoted
+        cmdstr = " ".join([pipes.quote(arg) for arg in cmd])
+        raise Exception("Error from: " + cmdstr + ": " + cmderr)
+    return cmdout
+
+
+def callHgsql2(database, command, server):
+    """ Run hgsql command using subprocess, return stdout data if no error."""
+    cmd = ["hgsql", database, "-Ne", command, "-h", server]
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    cmdout, cmderr = p.communicate()
+    if p.returncode != 0:
+        # keep command arguments nicely quoted
+        cmdstr = " ".join([pipes.quote(arg) for arg in cmd])
+        raise Exception("Error from: " + cmdstr + ": " + cmderr)
+    return cmdout
+
+
+def printOutput(tableName, dbName, queryResult, count):
+    if count == 1:
+        print "--------------------------------------------------\n" \
+              "--------------------------------------------------\n" \
+              "<<<", tableName, ">>>\n"
+    print dbName + "\n-------------\n" + queryResult + "\n"
+
+
+def createFile(fileName, queryResult):
+    f = open(fileName, 'w')
+    f.write(queryResult)
+    f.close
+
+
+def parseDbDb(dbDbOrigin, dbDbDestination, dbDbAssembly):
+    """This function parses dbDb data specifically.  It loads the contents
+       of dbDb into an array and compares the contents of the origin and
+       destination arrays.  When it finds differences, it saves the index
+       numbers where differences exist into a diff array.  When it is
+       finished comparing, it outputs the contents of the diff array."""
+    dbDbDev = callHgsql("hgcentraltest", "select * from dbDb where name='" + dbDbAssembly + "'")
+    dbDbDev = dbDbDev.rstrip()
+    dbDbDev = dbDbDev.split("\t")
+    dbDbBeta = callHgsql2("hgcentralbeta", "select * from dbDb where name='" + dbDbAssembly + "'",
+                          "mysqlbeta")
+    dbDbBeta = dbDbBeta.rstrip()
+    dbDbBeta = dbDbBeta.split("\t")
+    dbDbRr = callHgsql2("hgcentral", "select * from dbDb where name='" + dbDbAssembly + "'",
+                        "genome-centdb")
+    dbDbRr = dbDbRr.rstrip()
+    dbDbRr = dbDbRr.split("\t")
+    dbDbArray = ["name", "description", "nibPath", "organism", "defaultPos", "active", "orderKey",
+                 "genome", "scientificName", "htmlPath", "hgNearOk", "hgPbOk", "sourceName",
+                 "taxId"]
+    diffs = 0
+    diffArray = []
+    if dbDbOrigin == "dev":
+        dbDb1 = dbDbDev
+        dbDb2 = dbDbBeta
+    else:
+        dbDb1 = dbDbBeta
+        dbDb2 = dbDbRr
+    for i in range(0,13):
+        if dbDb1[i] != dbDb2[i]:
+            diffArray.append(i)
+            diffs += 1
+    print dbDbOrigin + ":"
+    for i in range(0,diffs):
+        print dbDbArray[diffArray[i]], "=", dbDb1[diffArray[i]]
+    print "\n" + dbDbDestination + ":"
+    for i in range(0,diffs):
+        print dbDbArray[diffArray[i]], "=", dbDb2[diffArray[i]]
+    print
+
+
+def runQuery(queryMode, queryTable, queryField, queryAssembly, queryOrder, assembly2, queryOrigin,
+             queryDestination):
+    """This is the main function.  In test mode, it queries all three servers
+       and tests for special cases.  It outputs the state of all three servers
+       and any relevant error messages.  In execute mode, it checks to make
+       sure test mode has been run first.  If test mode has not been run, it
+       forces the user to run test mode.  If the data has changed on any server
+       since test mode was run, it forces the user to run test mode again.  If
+       the data is identical between the origin and destination, it doesn't copy
+       anything.  If the data is different between the origin and destination,
+       it informs the user and makes the user respond before copying.  If the
+       destination server is blank, it copies the data from the origin to the
+       destination."""
+    fileDev = queryTable + "." + assembly2 + ".hgcentraltest"
+    fileBeta = queryTable + "." + assembly2 + ".hgcentralbeta"
+    fileRr = queryTable + "." + assembly2 + ".hgcentral"
+    if queryMode == "test":
+        queryDev = callHgsql("hgcentraltest", "select * from " + queryTable + " where " +
+                             queryField + "='" + queryAssembly + "' order by " + queryOrder)
+        queryDev = queryDev.rstrip()
+        createFile(fileDev, queryDev)
+        queryBeta = callHgsql2("hgcentralbeta", "select * from " + queryTable + " where " +
+                               queryField + "='" + queryAssembly + "' order by " +
+                               queryOrder, "mysqlbeta")
+        queryBeta = queryBeta.rstrip()
+        createFile(fileBeta, queryBeta)
+        queryRr = callHgsql2("hgcentral", "select * from " + queryTable + " where " + queryField +
+                             "='" + queryAssembly + "' order by " + queryOrder, "genome-centdb")
+        queryRr = queryRr.rstrip()
+        createFile(fileRr, queryRr)
+        if queryDev == "" and queryBeta == "" and queryRr == "":
+            print "--------------------------------------------------\n" \
+                  "--------------------------------------------------\n" \
+                  "<<<", queryTable, ">>>\n\n" \
+                  "*** There is no", queryTable, "data for", assembly2 + ".  Check to make sure " \
+                  "you typed the\nassembly name properly. ***\n\n"
+        else:
+            printOutput(queryTable, "hgcentraltest", queryDev, 1)
+            printOutput(queryTable, "hgcentralbeta", queryBeta, 2)
+            printOutput(queryTable, "hgcentral", queryRr, 3)
+            if queryDev != queryBeta:
+                print "*** There are", queryTable, "differences between dev and beta ***"
+                if queryTable == "dbDb" and queryDev != "" and queryBeta != "":
+                    parseDbDb("dev", "beta", assembly2)
+                else:
+                    print
+            else:
+                print "*** The", queryTable, "data on dev and beta is identical ***\n"
+            if queryBeta != queryRr:
+                print "*** There are", queryTable, "differences between beta and rr ***"
+                if queryTable == "dbDb" and queryBeta != "" and queryRr != "":
+                    parseDbDb("beta", "rr", assembly2)
+                else:
+                    print
+            else:
+                print "*** The", queryTable, "data on beta and rr is identical ***\n"
+            print
+    if queryMode == "execute":
+        if not os.path.isfile(fileDev) or not os.path.isfile(fileBeta) or not \
+                os.path.isfile(fileRr):
+            print "--------------------------------------------------\n" \
+                  "--------------------------------------------------\n" \
+                  "<<<", queryTable, ">>>\n\n" \
+                  "*** Test mode must be run before execute mode.  If test mode was already " \
+                  "run, it is\npossible that one of the output files was deleted.  Please " \
+                  "re-run test mode. ***\n\n"
+        else:
+            f = open(fileDev, 'r')
+            queryTestDev = f.read()
+            queryExecuteDev = callHgsql("hgcentraltest", "select * from " + queryTable +
+                                        " where " + queryField + "='" + queryAssembly +
+                                        "' order by " + queryOrder)
+            queryExecuteDev = queryExecuteDev.rstrip()
+            f = open(fileBeta, 'r')
+            queryTestBeta = f.read()
+            queryExecuteBeta = callHgsql2("hgcentralbeta", "select * from " + queryTable +
+                                          " where " + queryField + "='" + queryAssembly +
+                                          "' order by " + queryOrder, "mysqlbeta")
+            queryExecuteBeta = queryExecuteBeta.rstrip()
+            f = open(fileRr, 'r')
+            queryTestRr = f.read()
+            queryExecuteRr = callHgsql2("hgcentral", "select * from " + queryTable + " where " +
+                                        queryField + "='" + queryAssembly + "' order by " +
+                                        queryOrder, "genome-centdb")
+            queryExecuteRr = queryExecuteRr.rstrip()
+            f.close()
+            if queryTestDev != queryExecuteDev or queryTestBeta != queryExecuteBeta or \
+                    queryTestRr != queryExecuteRr:
+                print "--------------------------------------------------\n" \
+                      "--------------------------------------------------\n" \
+                      "<<<", queryTable, ">>>\n\n" \
+                      "*** The", queryTable, "data has changed since test mode was run.  " \
+                      "Please re-run test mode. ***\n"
+            else:
+                if queryOrigin == "dev":
+                    queryExecuteOrigin = queryExecuteDev
+                    queryExecuteDestination = queryExecuteBeta
+                else:
+                    queryExecuteOrigin = queryExecuteBeta
+                    queryExecuteDestination = queryExecuteRr
+                if queryExecuteOrigin == "":
+                    printOutput(queryTable, "hgcentraltest", queryExecuteDev, 1)
+                    printOutput(queryTable, "hgcentralbeta", queryExecuteBeta, 2)
+                    printOutput(queryTable, "hgcentral", queryExecuteRr, 3)
+                    print "\n*** There is no", queryTable, "data on", queryOrigin, \
+                          "to be copied ***\n\n"
+                elif queryExecuteOrigin == queryExecuteDestination:
+                    printOutput(queryTable, "hgcentraltest", queryExecuteDev, 1)
+                    printOutput(queryTable, "hgcentralbeta", queryExecuteBeta, 2)
+                    printOutput(queryTable, "hgcentral", queryExecuteRr, 3)
+                    print "\n*** The", queryTable, "data on", queryOrigin, "and", queryDestination, \
+                          "is identical.  Nothing was copied. ***\n"
+                else:
+                    proceed = 1
+                    eraseFirst = 0
+                    clade = ""
+                    cladeListBeta = ""
+                    if queryTable == "genomeClade":
+                        clade = callHgsql("hgcentraltest", "select clade from genomeClade where "
+                                          "genome='" + genomeClade + "' limit 1")
+                        clade = clade.rstrip()
+                        cladeListBeta = callHgsql2("hgcentralbeta", "select distinct(clade) from"
+                                       " genomeClade order by clade", "mysqlbeta")
+                        cladeListBeta = cladeListBeta.rstrip()
+                        cladeListBeta = cladeListBeta.split()
+                    if queryOrigin == "dev" and queryTable == "genomeClade" and \
+                            queryExecuteBeta == "" and clade not in cladeListBeta:
+                        proceed = 0
+                        printOutput(queryTable, "hgcentraltest", queryExecuteDev, 1)
+                        printOutput(queryTable, "hgcentralbeta", queryExecuteBeta, 2)
+                        printOutput(queryTable, "hgcentral", queryExecuteRr, 3)
+                        print "*** Copying aborted.  The", clade, "clade exists only on " \
+                              "dev.  Please consult with the\nappropriate engineer to find " \
+                              "the correct clade and priority value for", assembly2, "and\n" \
+                              "manually add this entry to the proper clade on beta. ***\n"
+# Uncomment the following line and delete the line below that once copying from beta -> rr is active
+#                    if queryExecuteDestination != "":
+                    if queryOrigin == "dev" and queryExecuteDestination != "":
+                        input = ""
+                        while input not in ["yes", "no"]:
+                            input = raw_input("The existing " + queryTable + " data on " +
+                                              queryDestination + " differs from the " +
+                                              queryTable + " data on " + queryOrigin +
+                                              ".  If you proceed,\nthis data will be overwritten.  "
+                                              "Are you sure you wish to proceed? (yes/no): ");
+                            if input not in ["yes", "no"]:
+                                print "\n*** Please respond with 'yes' or 'no' ***\n"
+                        if input == "yes":
+                            eraseFirst = 1
+                            print
+                        else:
+                            proceed = 0
+                            print
+                            printOutput(queryTable, "hgcentraltest", queryExecuteDev, 1)
+                            printOutput(queryTable, "hgcentralbeta", queryExecuteBeta, 2)
+                            printOutput(queryTable, "hgcentral", queryExecuteRr, 3)
+                            print "*** Copying aborted for", queryTable, "***\n"
+                    if proceed == 1:
+                        if queryDestination == "beta":
+                            if eraseFirst == 1:
+                                callHgsql2("hgcentralbeta", "delete from " + queryTable +
+                                           " where " + queryField + "='" + queryAssembly +
+                                           "'", "mysqlbeta")
+                            callHgsql2("hgcentralbeta", "load data local infile '" + fileDev +
+                                       "' into table " + queryTable, "mysqlbeta")
+                            queryExecuteBeta = callHgsql2("hgcentralbeta", "select * from " +
+                                                          queryTable + " where " + queryField +
+                                                          "='" + queryAssembly + "' order by " +
+                                                          queryOrder, "mysqlbeta")
+                            queryExecuteBeta = queryExecuteBeta.rstrip()
+# Delete the following 5 lines when copying from beta -> rr is active
+                            printOutput(queryTable, "hgcentraltest", queryExecuteDev, 1)
+                            printOutput(queryTable, "hgcentralbeta", queryExecuteBeta, 2)
+                            printOutput(queryTable, "hgcentral", queryExecuteRr, 3)
+                            print "***", queryTable, "successfully copied from", queryOrigin, "to", \
+                                  queryDestination, "***\n"
+                        else:
+# Delete the following 21 lines and uncomment the rest when copying from beta -> rr is active
+                            print "--------------------------------------------------\n" \
+                                  "--------------------------------------------------\n" \
+                                  "<<<", queryTable, ">>>\n\n" \
+                                  "\n *** Automatic copying to hgcentral is currently disabled " \
+                                  "***\n\n *** If you would like to manually copy", queryTable, \
+                                  "entries from beta to rr," \
+                                  "\n     run the following command: ***\n\n\n" \
+                                  "  hgsql -e \"load data local infile '" + fileBeta + "' into " \
+                                  "table", queryTable + "\" hgcentral -h genome-centdb\n\n"
+                            if queryExecuteRr != "":
+                                print " *** WARNING:", queryTable, "on hgcentral already " \
+                                      "contains data for", assembly2 + ".\n     If you run " \
+                                      "the above command without first deleting the existing " \
+                                      "\n     data from hgcentral, it will create duplicate " \
+                                      "entries. ***\n\n"
+                            if queryTable == "dbDb":
+                                print " *** If you manually copy dbDb from beta to rr, be sure to " \
+                                      "also set\n     active=0 with the following " \
+                                      "command: ***\n\n\n" \
+                                      "  hgsql -e \"update dbDb set active=0 where name='" + \
+                                      assembly2 + "'\" hgcentral -h " \
+                                      "genome-centdb\n\n\n"
+                            print " *** If you manually copy", queryTable, "from beta to rr, be " \
+                                  "sure to run test mode\n     again to verify your changes ***\n"
+#                            if eraseFirst == 1:
+#                                callHgsql2("hgcentral", "delete from " + queryTable + " where " +
+#                                           queryField + "='" + queryAssembly + "'", "genome-centdb")
+#                            callHgsql2("hgcentral", "load data local infile '" + fileBeta +
+#                                       "' into table " + queryTable, "genome-centdb")
+#                            if queryTable == "dbDb":
+#                                callHgsql2("hgcentral", "update dbDb set active=0 where name='" +
+#                                           queryAssembly + "'", "genome-centdb")
+#                            queryExecuteRr = callHgsql2("hgcentral", "select * from " +
+#                                                        queryTable + " where " + queryField +
+#                                                        "='" + queryAssembly + "' order by " +
+#                                                        queryOrder, "genome-centdb")
+#                            queryExecuteRr = queryExecuteRr.rstrip()
+#                        printOutput(queryTable, "hgcentraltest", queryExecuteDev, 1)
+#                        printOutput(queryTable, "hgcentralbeta", queryExecuteBeta, 2)
+#                        printOutput(queryTable, "hgcentral", queryExecuteRr, 3)
+#                        print "***", queryTable, "successfully copied from", queryOrigin, "to", \
+#                              queryDestination, "***\n"
+#                        if queryDestination == "rr" and queryTable == "dbDb":
+#                            print "*** active set to 0 on rr ***\n"
+            print
+
+
+def sqlRetrieve(retrieveAssembly, retrieveTable, retrieveField):
+    """Querying defaultDb and genomeClade relies on the genome name rather
+       than the assembly name, so this function is run to obtain the
+       appropriate values to plug into the runQuery function."""
+    assemblyChop = re.sub(r'\d', '', retrieveAssembly)
+    queryRetrieve = callHgsql("hgcentraltest", "select genome from " + retrieveTable + " where " +
+                              retrieveField + " like '" + assemblyChop + "%' limit 1")
+    return queryRetrieve
+
+
+"""This is the beginning of the script main body"""
+valid = parseCommandLine(sys.argv)
+if valid == 1:
+    valid = errorChecker(sys.argv)
+    if valid == 1:
+        mode = sys.argv[1]
+        assembly = sys.argv[2]
+        table = sys.argv[3]
+        origin = ""
+        destination = ""
+        if len(sys.argv) == 6:
+            origin = sys.argv[4]
+            destination = sys.argv[5]
+        print
+        if table in ["blatServers", "all"]:
+            runQuery(mode, "blatServers", "db", assembly, "port", assembly, origin,
+                     destination)
+        if table in ["dbDb", "all"]:
+            runQuery(mode, "dbDb", "name", assembly, "name", assembly, origin,
+                     destination)
+        if table in ["defaultDb", "all"]:
+            genome = sqlRetrieve(assembly, "defaultDb", "name")
+            genome = genome.rstrip()
+            runQuery(mode, "defaultDb", "genome", genome, "genome", assembly, origin,
+                     destination)
+        if table in ["genomeClade", "all"]:
+            genomeClade = sqlRetrieve(assembly, "defaultDb", "name")
+            genomeClade = genomeClade.rstrip()
+            runQuery(mode, "genomeClade", "genome", genomeClade, "genome", assembly, origin,
+                     destination)