0fffa3c31de4845a9bd3f06c0992f971e4d8a7a3
angie
  Fri Oct 28 15:08:06 2022 -0700
Performance improvements for trees with millions of sequences:
* Use @yceh's usher-sampled-server if configured; it preloads protobufs and can start placing sequences immediately using usher-sampled, a faster version of usher
* Use usher-sampled instead of usher if server is not configured but usher-sampled is available
* Load sample metadata file in a pthread while usher(-sampled(-server)) or matUtils is running
* Skip checking for sample name clashes in uploaded fasta when using usher-sampled(-server)'s new --no-ignore-prefix option (but look for the prefix when parsing results)
* Avoid parsing the protobuf and traversing the big tree unless absolutely necessary
** Subtrees from usher/matUtils have not included condensed nodes in a long time; remove lots of condensedNodes/summarization code from phyloPlace.c, runUsher.c, writeCustomTracks.c
** Use subtrees instead of big tree when possible (in findNearestNeighbor, treeToBaseAlleles, uploadedSamplesTree)
** Skip the informativeBases stuff that inhibits masking of sites from Problematic Sites set when the tree was built with an earlier version -- that pretty much never applies anymore now that only daily-updated trees are offered, not a range from old to new.
** Allow config.ra to specify a flat file of sample names (needed for searching user's uploaded names/IDs before calling matUtils) instead of getting names from the big tree

diff --git src/hg/hgPhyloPlace/hgPhyloPlace.c src/hg/hgPhyloPlace/hgPhyloPlace.c
index bbf9f63..d78765c 100644
--- src/hg/hgPhyloPlace/hgPhyloPlace.c
+++ src/hg/hgPhyloPlace/hgPhyloPlace.c
@@ -1,52 +1,60 @@
 /* hgPhyloPlace - Upload SARS-CoV-2 or MPXV sequence for placement in phylo tree. */
 
-/* Copyright (C) 2020 The Regents of the University of California */
+/* Copyright (C) 2020-2022 The Regents of the University of California */
 
 #include "common.h"
 #include "botDelay.h"
 #include "cart.h"
 #include "cgiApoptosis.h"
 #include "cheapcgi.h"
 #include "hCommon.h"
 #include "hash.h"
+#include "hgConfig.h"
+#include "htmshell.h"
 #include "hui.h"
 #include "jsHelper.h"
 #include "knetUdc.h"
 #include "linefile.h"
+#include "md5.h"
 #include "net.h"
 #include "options.h"
 #include "phyloPlace.h"
 #include "portable.h"
 #include "trackLayout.h"
 #include "udc.h"
 #include "web.h"
+#include "wikiLink.h"
 
 /* Global Variables */
 struct cart *cart = NULL;      // CGI and other variables
 struct hash *oldVars = NULL;   // Old contents of cart before it was updated by CGI
 boolean measureTiming = FALSE; // Print out how long things take
 char *leftLabelWidthForLongNames = "55";// Leave plenty of room for tree and long virus strain names
 
 /* for botDelay call, 10 second for warning, 20 second for immediate exit */
 #define delayFraction   0.25
 static boolean issueBotWarning = FALSE;
 static long enteredMainTime = 0;
 
 #define seqFileVar "sarsCoV2File"
 #define pastedIdVar "namesOrIds"
 #define remoteFileVar "remoteFile"
+#define serverCommandVar "hgpp_serverCommand"
+#define serverCommentVar "hgpp_serverComment"
+#define serverPlainVar "hgpp_serverPlain"
+#define serverSaltyVar "hgpp_serverSalty"
 
 static struct lineFile *lineFileFromFileInput(struct cart *cart, char *fileVar)
 /* Return a lineFile on data from an uploaded file with cart variable name fileVar.
  * If the file is binary, attempt to decompress it.  Return NULL if no data are found
  * or if there is a problem decompressing binary data.  If retFileName is not NULL */
 {
 struct lineFile *lf = NULL;
 // Depending on whether the file is plain text or binary, different cart variables are present.
 char *filePlainContents = cartOptionalString(cart, fileVar);
 char cartVar[2048];
 safef(cartVar, sizeof cartVar, "%s__binary", fileVar);
 char *fileBinaryCoords = cartOptionalString(cart, cartVar);
 // Also get the file name for error reporting.
 safef(cartVar, sizeof cartVar, "%s__filename", fileVar);
 char *fileName = cartOptionalString(cart, cartVar);
@@ -486,30 +494,164 @@
     }
 else
     {
     warn("Unable to read your uploaded data - please choose a file and try again, or click the "
          ""try example" button.");
     // Let the user try again:
     puts("  </div>");
     puts("</form>");
     inputForm(db);
     }
 puts("</div>\n");
 
 newPageEndStuff();
 }
 
+static boolean serverAuthOk(char *plain, char *salty)
+/* Construct a salted hash of plain and compare it to salty. */
+{
+char *salt = cfgOption(CFG_LOGIN_COOKIE_SALT);
+if (! salt)
+    salt = "";
+char *plainMd5 = md5HexForString(plain);
+struct dyString *dySalted = dyStringCreate("%s-%s", salt, plainMd5);
+char *rightSalty = md5HexForString(dySalted->string);
+boolean ok = sameOk(salty, rightSalty);
+dyStringFree(&dySalted);
+return ok;
+}
+
+INLINE void maybeComment(char *comment)
+/* If comment is nonempty, append it to stderr.  Then print a newline regardless of comment. */
+{
+if (isNotEmpty(comment))
+    fprintf(stderr, ": %s", comment);
+fputc('\n', stderr);
+}
+
+#define CONTENT_TYPE "Content-Type: text/plain\n\n"
+
+static void sendServerCommand(char *db)
+/* If a recognized server command is requested (with minimal auth to prevent DoS), and usher server
+ * is configured, then send the command to the usher server's manager fifo. */
+{
+pushWarnHandler(htmlVaBadRequestAbort);
+pushAbortHandler(htmlVaBadRequestAbort);
+char *plain = cgiOptionalString(serverPlainVar);
+char *salty = cgiOptionalString(serverSaltyVar);
+if (isNotEmpty(plain) && isNotEmpty(salty) && serverAuthOk(plain, salty))
+    {
+    if (serverIsConfigured(db))
+        {
+        char *command = cgiString(serverCommandVar);
+        char *comment = cgiOptionalString(serverCommentVar);
+        struct tempName tnCheckServer;
+        trashDirFile(&tnCheckServer, "ct", "usher_check_server", ".txt");
+        FILE *errFile = mustOpen(tnCheckServer.forCgi, "w");
+        boolean serverUp = serverIsRunning(db, errFile);
+        carefulClose(&errFile);
+        if (sameString(command, "start"))
+            {
+            // This one is really a command for the CGI not the server manager fifo (because the
+            // server is not yet running and needs to be started at this point), but uses the
+            // same CGI interface.
+            struct treeChoices *treeChoices = loadTreeChoices(db);
+            if (treeChoices != NULL)
+                {
+                if (serverUp)
+                    errAbort("Server is already running for db %s, see %s",
+                             db, tnCheckServer.forCgi);
+                struct tempName tnServerStartup;
+                trashDirFile(&tnServerStartup, "ct", "usher_server_startup", ".txt");
+                errFile = mustOpen(tnServerStartup.forCgi, "w");
+                fprintf(stderr, "Usher server start for %s", db);
+                maybeComment(comment);
+                boolean success = startServer(db, treeChoices, errFile);
+                carefulClose(&errFile);
+                if (success)
+                    {
+                    fprintf(stderr, "Spawned usher server background process, details in %s",
+                            tnServerStartup.forCgi);
+                    printf(CONTENT_TYPE"Started server for %s\n", db);
+                    }
+                else
+                    errAbort("Unable to spawn usher server background process, details in %s",
+                             tnServerStartup.forCgi);
+                }
+            else
+                errAbort("No treeChoices for db=%s", db);
+            }
+        else if (serverUp)
+            {
+            if (sameString(command, "reload"))
+                {
+                struct treeChoices *treeChoices = loadTreeChoices(db);
+                fprintf(stderr, "Usher server reload for %s", db);
+                maybeComment(comment);
+                serverReloadProtobufs(db, treeChoices);
+                printf(CONTENT_TYPE"Sent reload command for %s\n", db);
+                }
+            else if (sameString(command, "stop"))
+                {
+                fprintf(stderr, "Usher server stop for %s", db);
+                maybeComment(comment);
+                serverStop(db);
+                printf(CONTENT_TYPE"Sent stop command for %s\n", db);
+                }
+            else
+                {
+                char commandCopy[16];
+                safecpy(commandCopy, sizeof commandCopy, command);
+                char *words[3];
+                int wordCount = chopLine(commandCopy, words);
+                int val;
+                if (wordCount == 2 && (val = atol(words[1])) > 0)
+                    {
+                    if (sameString(words[0], "thread"))
+                        {
+                        fprintf(stderr, "Usher server thread count set to %d", val);
+                        maybeComment(comment);
+                        serverSetThreadCount(db, val);
+                        printf(CONTENT_TYPE"Sent thread %d command for %s\n", val, db);
+                        }
+                    else if (sameString(words[0], "timeout"))
+                        {
+                        fprintf(stderr, "Usher server timeout set to %d", val);
+                        maybeComment(comment);
+                        serverSetTimeout(db, val);
+                        printf(CONTENT_TYPE"Sent timeout %d command for %s\n", val, db);
+                        }
+                    else
+                        errAbort("Unrecognized command '%s'", command);
+                    }
+                else
+                    errAbort("Unrecognized command '%s'", command);
+                }
+            }
+        else
+            errAbort("Server for %s is down (see %s), cannot send command '%s'",
+                     db, tnCheckServer.forCgi, command);
+        }
+    else
+        errAbort("Usher server mode not configured for db=%s", db);
+    }
+else
+    errAbort("Bad request");
+popWarnHandler();
+popAbortHandler();
+}
+
 static void doMiddle(struct cart *theCart)
 /* Set up globals and make web page */
 {
 cart = theCart;
 char *db = NULL, *genome = NULL;
 // Get the current db from the cart
 getDbAndGenome(cart, &db, &genome, oldVars);
 
 int timeout = cartUsualInt(cart, "udcTimeout", 300);
 if (udcCacheTimeout() < timeout)
     udcSetCacheTimeout(timeout);
 knetUdcInstall();
 
 measureTiming = cartUsualBoolean(cart, "measureTiming", measureTiming);
 
@@ -527,30 +669,34 @@
     char *url = cgiString(remoteFileVar);
     struct lineFile *lf = netLineFileOpen(url);
     resultsPage(db, lf);
     }
 else if (isNotEmpty(trimSpaces(cgiOptionalString(pastedIdVar))))
     {
     char *pastedIds = cgiString(pastedIdVar);
     struct lineFile *lf = lineFileOnString("pasted names/IDs", TRUE, pastedIds);
     resultsPage(db, lf);
     }
 else if (cgiOptionalString(seqFileVar) || cgiOptionalString(seqFileVar "__filename"))
     {
     struct lineFile *lf = lineFileFromFileInput(cart, seqFileVar);
     resultsPage(db, lf);
     }
+else if (isNotEmpty(cgiOptionalString(serverCommandVar)))
+    {
+    sendServerCommand(db);
+    }
 else
     mainPage(db);
 }
 
 #define LD_LIBRARY_PATH "LD_LIBRARY_PATH"
 
 static void addLdLibraryPath()
 /* usher requires a tbb lib that is not in the yum package tbb-devel, so for now
  * I'm adding the .so files to hgPhyloPlaceData.  Set environment variable LD_LIBRARY_PATH
  * to pick them up from there. */
 {
 char *oldValue = getenv(LD_LIBRARY_PATH);
 struct dyString *dy = dyStringNew(0);
 if (startsWith("/", PHYLOPLACE_DATA_DIR))
     dyStringAppend(dy, PHYLOPLACE_DATA_DIR);
@@ -559,27 +705,29 @@
     char cwd[4096];
     getcwd(cwd, sizeof cwd);
     dyStringPrintf(dy, "%s/%s", cwd, PHYLOPLACE_DATA_DIR);
     }
 if (isNotEmpty(oldValue))
     dyStringPrintf(dy, ":%s", oldValue);
 setenv(LD_LIBRARY_PATH, dyStringCannibalize(&dy), TRUE);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 /* Null terminated list of CGI Variables we don't want to save to cart */
 char *excludeVars[] = {"submit", "Submit",
                        seqFileVar, seqFileVar "__binary", seqFileVar "__filename",
-                       pastedIdVar,
+                       pastedIdVar, remoteFileVar,
+                       serverCommandVar, serverCommentVar, serverPlainVar, serverSaltyVar,
                        NULL};
 enteredMainTime = clock1000();
 issueBotWarning = earlyBotCheck(enteredMainTime, "hgPhyloPlace", delayFraction, 0, 0, "html");
 
 cgiSpoof(&argc, argv);
 oldVars = hashNew(10);
 addLdLibraryPath();
+
 cartEmptyShellNoContent(doMiddle, hUserCookie(), excludeVars, oldVars);
 cgiExitTime("hgPhyloPlace", enteredMainTime);
 return 0;
 }