0fffa3c31de4845a9bd3f06c0992f971e4d8a7a3 angie Fri Oct 28 15:08:06 2022 -0700 Performance improvements for trees with millions of sequences: * Use @yceh's usher-sampled-server if configured; it preloads protobufs and can start placing sequences immediately using usher-sampled, a faster version of usher * Use usher-sampled instead of usher if server is not configured but usher-sampled is available * Load sample metadata file in a pthread while usher(-sampled(-server)) or matUtils is running * Skip checking for sample name clashes in uploaded fasta when using usher-sampled(-server)'s new --no-ignore-prefix option (but look for the prefix when parsing results) * Avoid parsing the protobuf and traversing the big tree unless absolutely necessary ** Subtrees from usher/matUtils have not included condensed nodes in a long time; remove lots of condensedNodes/summarization code from phyloPlace.c, runUsher.c, writeCustomTracks.c ** Use subtrees instead of big tree when possible (in findNearestNeighbor, treeToBaseAlleles, uploadedSamplesTree) ** Skip the informativeBases stuff that inhibits masking of sites from Problematic Sites set when the tree was built with an earlier version -- that pretty much never applies anymore now that only daily-updated trees are offered, not a range from old to new. ** Allow config.ra to specify a flat file of sample names (needed for searching user's uploaded names/IDs before calling matUtils) instead of getting names from the big tree diff --git src/hg/hgPhyloPlace/hgPhyloPlace.c src/hg/hgPhyloPlace/hgPhyloPlace.c index bbf9f63..d78765c 100644 --- src/hg/hgPhyloPlace/hgPhyloPlace.c +++ src/hg/hgPhyloPlace/hgPhyloPlace.c @@ -1,52 +1,60 @@ /* hgPhyloPlace - Upload SARS-CoV-2 or MPXV sequence for placement in phylo tree. */ -/* Copyright (C) 2020 The Regents of the University of California */ +/* Copyright (C) 2020-2022 The Regents of the University of California */ #include "common.h" #include "botDelay.h" #include "cart.h" #include "cgiApoptosis.h" #include "cheapcgi.h" #include "hCommon.h" #include "hash.h" +#include "hgConfig.h" +#include "htmshell.h" #include "hui.h" #include "jsHelper.h" #include "knetUdc.h" #include "linefile.h" +#include "md5.h" #include "net.h" #include "options.h" #include "phyloPlace.h" #include "portable.h" #include "trackLayout.h" #include "udc.h" #include "web.h" +#include "wikiLink.h" /* Global Variables */ struct cart *cart = NULL; // CGI and other variables struct hash *oldVars = NULL; // Old contents of cart before it was updated by CGI boolean measureTiming = FALSE; // Print out how long things take char *leftLabelWidthForLongNames = "55";// Leave plenty of room for tree and long virus strain names /* for botDelay call, 10 second for warning, 20 second for immediate exit */ #define delayFraction 0.25 static boolean issueBotWarning = FALSE; static long enteredMainTime = 0; #define seqFileVar "sarsCoV2File" #define pastedIdVar "namesOrIds" #define remoteFileVar "remoteFile" +#define serverCommandVar "hgpp_serverCommand" +#define serverCommentVar "hgpp_serverComment" +#define serverPlainVar "hgpp_serverPlain" +#define serverSaltyVar "hgpp_serverSalty" static struct lineFile *lineFileFromFileInput(struct cart *cart, char *fileVar) /* Return a lineFile on data from an uploaded file with cart variable name fileVar. * If the file is binary, attempt to decompress it. Return NULL if no data are found * or if there is a problem decompressing binary data. If retFileName is not NULL */ { struct lineFile *lf = NULL; // Depending on whether the file is plain text or binary, different cart variables are present. char *filePlainContents = cartOptionalString(cart, fileVar); char cartVar[2048]; safef(cartVar, sizeof cartVar, "%s__binary", fileVar); char *fileBinaryCoords = cartOptionalString(cart, cartVar); // Also get the file name for error reporting. safef(cartVar, sizeof cartVar, "%s__filename", fileVar); char *fileName = cartOptionalString(cart, cartVar); @@ -486,30 +494,164 @@ } else { warn("Unable to read your uploaded data - please choose a file and try again, or click the " ""try example" button."); // Let the user try again: puts(" </div>"); puts("</form>"); inputForm(db); } puts("</div>\n"); newPageEndStuff(); } +static boolean serverAuthOk(char *plain, char *salty) +/* Construct a salted hash of plain and compare it to salty. */ +{ +char *salt = cfgOption(CFG_LOGIN_COOKIE_SALT); +if (! salt) + salt = ""; +char *plainMd5 = md5HexForString(plain); +struct dyString *dySalted = dyStringCreate("%s-%s", salt, plainMd5); +char *rightSalty = md5HexForString(dySalted->string); +boolean ok = sameOk(salty, rightSalty); +dyStringFree(&dySalted); +return ok; +} + +INLINE void maybeComment(char *comment) +/* If comment is nonempty, append it to stderr. Then print a newline regardless of comment. */ +{ +if (isNotEmpty(comment)) + fprintf(stderr, ": %s", comment); +fputc('\n', stderr); +} + +#define CONTENT_TYPE "Content-Type: text/plain\n\n" + +static void sendServerCommand(char *db) +/* If a recognized server command is requested (with minimal auth to prevent DoS), and usher server + * is configured, then send the command to the usher server's manager fifo. */ +{ +pushWarnHandler(htmlVaBadRequestAbort); +pushAbortHandler(htmlVaBadRequestAbort); +char *plain = cgiOptionalString(serverPlainVar); +char *salty = cgiOptionalString(serverSaltyVar); +if (isNotEmpty(plain) && isNotEmpty(salty) && serverAuthOk(plain, salty)) + { + if (serverIsConfigured(db)) + { + char *command = cgiString(serverCommandVar); + char *comment = cgiOptionalString(serverCommentVar); + struct tempName tnCheckServer; + trashDirFile(&tnCheckServer, "ct", "usher_check_server", ".txt"); + FILE *errFile = mustOpen(tnCheckServer.forCgi, "w"); + boolean serverUp = serverIsRunning(db, errFile); + carefulClose(&errFile); + if (sameString(command, "start")) + { + // This one is really a command for the CGI not the server manager fifo (because the + // server is not yet running and needs to be started at this point), but uses the + // same CGI interface. + struct treeChoices *treeChoices = loadTreeChoices(db); + if (treeChoices != NULL) + { + if (serverUp) + errAbort("Server is already running for db %s, see %s", + db, tnCheckServer.forCgi); + struct tempName tnServerStartup; + trashDirFile(&tnServerStartup, "ct", "usher_server_startup", ".txt"); + errFile = mustOpen(tnServerStartup.forCgi, "w"); + fprintf(stderr, "Usher server start for %s", db); + maybeComment(comment); + boolean success = startServer(db, treeChoices, errFile); + carefulClose(&errFile); + if (success) + { + fprintf(stderr, "Spawned usher server background process, details in %s", + tnServerStartup.forCgi); + printf(CONTENT_TYPE"Started server for %s\n", db); + } + else + errAbort("Unable to spawn usher server background process, details in %s", + tnServerStartup.forCgi); + } + else + errAbort("No treeChoices for db=%s", db); + } + else if (serverUp) + { + if (sameString(command, "reload")) + { + struct treeChoices *treeChoices = loadTreeChoices(db); + fprintf(stderr, "Usher server reload for %s", db); + maybeComment(comment); + serverReloadProtobufs(db, treeChoices); + printf(CONTENT_TYPE"Sent reload command for %s\n", db); + } + else if (sameString(command, "stop")) + { + fprintf(stderr, "Usher server stop for %s", db); + maybeComment(comment); + serverStop(db); + printf(CONTENT_TYPE"Sent stop command for %s\n", db); + } + else + { + char commandCopy[16]; + safecpy(commandCopy, sizeof commandCopy, command); + char *words[3]; + int wordCount = chopLine(commandCopy, words); + int val; + if (wordCount == 2 && (val = atol(words[1])) > 0) + { + if (sameString(words[0], "thread")) + { + fprintf(stderr, "Usher server thread count set to %d", val); + maybeComment(comment); + serverSetThreadCount(db, val); + printf(CONTENT_TYPE"Sent thread %d command for %s\n", val, db); + } + else if (sameString(words[0], "timeout")) + { + fprintf(stderr, "Usher server timeout set to %d", val); + maybeComment(comment); + serverSetTimeout(db, val); + printf(CONTENT_TYPE"Sent timeout %d command for %s\n", val, db); + } + else + errAbort("Unrecognized command '%s'", command); + } + else + errAbort("Unrecognized command '%s'", command); + } + } + else + errAbort("Server for %s is down (see %s), cannot send command '%s'", + db, tnCheckServer.forCgi, command); + } + else + errAbort("Usher server mode not configured for db=%s", db); + } +else + errAbort("Bad request"); +popWarnHandler(); +popAbortHandler(); +} + static void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { cart = theCart; char *db = NULL, *genome = NULL; // Get the current db from the cart getDbAndGenome(cart, &db, &genome, oldVars); int timeout = cartUsualInt(cart, "udcTimeout", 300); if (udcCacheTimeout() < timeout) udcSetCacheTimeout(timeout); knetUdcInstall(); measureTiming = cartUsualBoolean(cart, "measureTiming", measureTiming); @@ -527,30 +669,34 @@ char *url = cgiString(remoteFileVar); struct lineFile *lf = netLineFileOpen(url); resultsPage(db, lf); } else if (isNotEmpty(trimSpaces(cgiOptionalString(pastedIdVar)))) { char *pastedIds = cgiString(pastedIdVar); struct lineFile *lf = lineFileOnString("pasted names/IDs", TRUE, pastedIds); resultsPage(db, lf); } else if (cgiOptionalString(seqFileVar) || cgiOptionalString(seqFileVar "__filename")) { struct lineFile *lf = lineFileFromFileInput(cart, seqFileVar); resultsPage(db, lf); } +else if (isNotEmpty(cgiOptionalString(serverCommandVar))) + { + sendServerCommand(db); + } else mainPage(db); } #define LD_LIBRARY_PATH "LD_LIBRARY_PATH" static void addLdLibraryPath() /* usher requires a tbb lib that is not in the yum package tbb-devel, so for now * I'm adding the .so files to hgPhyloPlaceData. Set environment variable LD_LIBRARY_PATH * to pick them up from there. */ { char *oldValue = getenv(LD_LIBRARY_PATH); struct dyString *dy = dyStringNew(0); if (startsWith("/", PHYLOPLACE_DATA_DIR)) dyStringAppend(dy, PHYLOPLACE_DATA_DIR); @@ -559,27 +705,29 @@ char cwd[4096]; getcwd(cwd, sizeof cwd); dyStringPrintf(dy, "%s/%s", cwd, PHYLOPLACE_DATA_DIR); } if (isNotEmpty(oldValue)) dyStringPrintf(dy, ":%s", oldValue); setenv(LD_LIBRARY_PATH, dyStringCannibalize(&dy), TRUE); } int main(int argc, char *argv[]) /* Process command line. */ { /* Null terminated list of CGI Variables we don't want to save to cart */ char *excludeVars[] = {"submit", "Submit", seqFileVar, seqFileVar "__binary", seqFileVar "__filename", - pastedIdVar, + pastedIdVar, remoteFileVar, + serverCommandVar, serverCommentVar, serverPlainVar, serverSaltyVar, NULL}; enteredMainTime = clock1000(); issueBotWarning = earlyBotCheck(enteredMainTime, "hgPhyloPlace", delayFraction, 0, 0, "html"); cgiSpoof(&argc, argv); oldVars = hashNew(10); addLdLibraryPath(); + cartEmptyShellNoContent(doMiddle, hUserCookie(), excludeVars, oldVars); cgiExitTime("hgPhyloPlace", enteredMainTime); return 0; }