d060b70a11099fb693b769c781796b6b577715fb angie Fri Nov 1 12:47:25 2024 -0700 If sessionDataDir is in hg.conf, make a new UI option to request that the Auspice JSON output files don't expire after two days, using sessionDataDir/hgPhyloPlace/ for storage. diff --git src/hg/hgPhyloPlace/hgPhyloPlace.c src/hg/hgPhyloPlace/hgPhyloPlace.c index b8a242d..242ea18 100644 --- src/hg/hgPhyloPlace/hgPhyloPlace.c +++ src/hg/hgPhyloPlace/hgPhyloPlace.c @@ -1,756 +1,764 @@ /* hgPhyloPlace - Upload SARS-CoV-2 or MPXV sequence for placement in phylo tree. */ /* Copyright (C) 2020-2024 The Regents of the University of California */ #include "common.h" #include "botDelay.h" #include "cart.h" #include "cgiApoptosis.h" #include "cheapcgi.h" #include "hCommon.h" #include "hash.h" #include "hgConfig.h" #include "htmshell.h" #include "hui.h" #include "jsHelper.h" #include "knetUdc.h" #include "linefile.h" #include "md5.h" #include "net.h" #include "options.h" #include "phyloPlace.h" #include "portable.h" #include "trackLayout.h" #include "udc.h" #include "web.h" #include "wikiLink.h" /* Global Variables */ struct cart *cart = NULL; // CGI and other variables struct hash *oldVars = NULL; // Old contents of cart before it was updated by CGI boolean measureTiming = FALSE; // Print out how long things take /* for botDelay call, 10 second for warning, 20 second for immediate exit */ #define delayFraction 0.25 static boolean issueBotWarning = FALSE; static long enteredMainTime = 0; #define orgVar "hgpp_org" #define seqFileVar "sarsCoV2File" #define pastedIdVar "namesOrIds" #define remoteFileVar "remoteFile" #define serverCommandVar "hgpp_serverCommand" #define serverCommentVar "hgpp_serverComment" #define serverPlainVar "hgpp_serverPlain" #define serverSaltyVar "hgpp_serverSalty" static struct lineFile *lineFileFromFileInput(struct cart *cart, char *fileVar) /* Return a lineFile on data from an uploaded file with cart variable name fileVar. * If the file is binary, attempt to decompress it. Return NULL if no data are found * or if there is a problem decompressing binary data. If retFileName is not NULL */ { struct lineFile *lf = NULL; // Depending on whether the file is plain text or binary, different cart variables are present. char *filePlainContents = cartOptionalString(cart, fileVar); char cartVar[2048]; safef(cartVar, sizeof cartVar, "%s__binary", fileVar); char *fileBinaryCoords = cartOptionalString(cart, cartVar); // Also get the file name for error reporting. safef(cartVar, sizeof cartVar, "%s__filename", fileVar); char *fileName = cartOptionalString(cart, cartVar); if (fileName == NULL) fileName = ""; if (isNotEmpty(filePlainContents)) { lf = lineFileOnString(fileName, TRUE, cloneString(trimSpaces(filePlainContents))); } else if (isNotEmpty(fileBinaryCoords)) { fprintf(stderr, "%s=%s fileBinaryCoords=%s\n", cartVar, fileName, fileBinaryCoords); char *binInfo = cloneString(fileBinaryCoords); char *words[2]; char *mem; unsigned long size; chopByWhite(binInfo, words, ArraySize(words)); mem = (char *)sqlUnsignedLong(words[0]); size = sqlUnsignedLong(words[1]); lf = lineFileDecompressMem(TRUE, mem, size); } return lf; } static void selectOrg(char **pOrg, char **pLabel) /* Search for config files in hgPhyloPlaceData. If there is more than one * supported organism, then make a menu / select input for supported organisms; * reload the page on change. */ { struct slPair *orgLabelList = phyloPlaceOrgList(cart); if (orgLabelList == NULL) errAbort("Sorry, this server is not configured to perform phylogenetic placement."); if (!slPairFind(orgLabelList, *pOrg)) { *pOrg = cloneString(orgLabelList->name); } *pLabel = phyloPlaceOrgSetting(*pOrg, "name"); if (isEmpty(*pLabel)) *pLabel = *pOrg; char *selectVar = orgVar; int orgCount = slCount(orgLabelList); if (orgCount > 1) { char *labels[orgCount]; char *values[orgCount]; struct slPair *orgLabel; int i; for (orgLabel = orgLabelList, i = 0; i < orgCount; orgLabel = orgLabel->next, i++) { values[i] = orgLabel->name; labels[i] = orgLabel->val; } struct dyString *dy = jsOnChangeStart(); jsDropDownCarryOver(dy, selectVar); char *js = jsOnChangeEnd(&dy); puts("

Choose your pathogen: "); cgiMakeDropListFull(selectVar, labels, values, orgCount, *pOrg, "change", js); puts("

"); } else cgiMakeHiddenVar(selectVar, *pOrg); slPairFreeList(&orgLabelList); } static void newPageStartStuff() { // Copied these from hgGtexTrackSettings.c which says "// NOTE: This will likely go to web.c". puts(""); puts(""); //#*** TODO: move this out to a CSS (hardcoding for now because we're doing a standalone push //#*** independent of the release cycle). puts("\n" ); // Container for bootstrap grid layout puts( "
\n"); } static void newPageEndStuff() { puts( "
"); jsIncludeFile("utils.js", NULL); webIncludeFile("inc/gbFooter.html"); webEndJWest(); } #define CHECK_FILE_OR_PASTE_INPUT_JS(fileVarName, pasteVarName) \ "{ var $fileInput = $('input[name="fileVarName"]');" \ " var $pasteInput = $('textarea[name="pasteVarName"]');" \ " if ($fileInput && $fileInput[0] && $fileInput[0].files && !$fileInput[0].files.length &&" \ " $pasteInput && !$pasteInput.val()) {" \ " alert('Please either choose a file or paste in sequence names/IDs first, ' +" \ " 'and then click the upload button.');" \ " return false; " \ " } else if ($fileInput && $fileInput[0] && $fileInput[0].files && " \ " !!$fileInput[0].files.length &&" \ " $pasteInput && !!$pasteInput.val()) {" \ " alert('Sorry, unable to process both a file and pasted-in sequence names/IDs at the ' +" \ " 'same time. Please clear one or the other and then click the upload button.');" \ " return false; " \ " } else { loadingImage.run(); return true; } }" static void inputForm(char *org) /* Ask the user for FASTA or VCF. */ { printf("
\n\n", "hgPhyloPlace"); cartSaveSession(cart); puts("
"); puts("
"); puts("
" "Place your sequences in a global phylogenetic tree
"); char *label = NULL; selectOrg(&org, &label); printf("

Select your FASTA, VCF or list of sequence names/IDs: "); printf("", seqFileVar, seqFileVar); printf("

or paste in sequence names/IDs:
\n"); cgiMakeTextArea(pastedIdVar, "", 10, 70); if (phyloPlaceOrgSetting(org, "nextcladeIndex") == NULL) { // This is not a multi-reference organism, this is an old-style single-reference setup for // which the user can directly choose the tree (i.e. SARS-CoV-2). struct treeChoices *treeChoices = loadTreeChoices(org, org); puts("

"); printf("Phylogenetic tree version: "); char *phyloPlaceTree = cartOptionalString(cart, "phyloPlaceTree"); cgiMakeDropListWithVals("phyloPlaceTree", treeChoices->descriptions, treeChoices->protobufFiles, treeChoices->count, phyloPlaceTree); } puts("

"); printf("Number of samples per subtree showing sample placement: "); int subtreeSize = cartUsualInt(cart, "subtreeSize", 50); struct dyString *dy = dyStringCreate("Number of samples in subtree showing neighborhood of " "placement (max: %d", MAX_SUBTREE_SIZE); if (microbeTraceHost() != NULL) dyStringPrintf(dy, "; max for MicrobeTrace: %d)", MAX_MICROBETRACE_SUBTREE_SIZE); else dyStringAppend(dy, ")"); cgiMakeIntVarWithLimits("subtreeSize", subtreeSize, dy->string, 5, 10, MAX_SUBTREE_SIZE); puts("

"); +char *sessionDataDir = cfgOption("sessionDataDir"); +if (isNotEmpty(sessionDataDir)) + { + puts("Prevent subtree Auspice JSON files from expiring after two days: "); + boolean subtreePersist = cartUsualBoolean(cart, "subtreePersist", FALSE); + cgiMakeCheckBox("subtreePersist", subtreePersist); + puts("

"); + } cgiMakeOnClickSubmitButton(CHECK_FILE_OR_PASTE_INPUT_JS(seqFileVar, pastedIdVar), "submit", "Upload"); char *exampleFile = phyloPlaceOrgSettingPath(org, "exampleFile"); if (isNotEmpty(exampleFile)) { puts("  "); cgiMakeOnClickSubmitButton("{ loadingImage.run(); return true; }", "exampleButton", "Upload Example File"); if (sameString(org, "wuhCor1")) { puts("  "); puts("More example files"); } } puts("

"); // Add a loading image to reassure people that we're working on it when they upload a big file printf("
\n"); printf("
\n"); jsInline("$(document).ready(function() {\n" " loadingImage.init($('#loadingImg'), $('#loadingMsg'), " "'

Uploading and processing your sequences " "may take some time. Please leave this window open while we work on your sequences.

');" "});\n"); puts("
"); puts("
"); puts("
"); puts("
"); puts("

More information

"); printf("

Upload your %s sequence (FASTA or VCF file) to find the most similar\n" "complete, high-coverage samples from \n", label); if (sameString(org, "wuhCor1")) { puts("GISAID\n" "or from public sequence databases (INSDC: GenBank/ENA/DDBJ accessed using " "NCBI Virus,\n" "COG-UK and the\n" "China National Center for Bioinformation), " "and your sequence's placement in the phylogenetic tree generated by the\n" "sarscov2phylo\n" "pipeline.\n"); } else { //#*** TODO get NCBI Virus link that is not hardcoded to MPXV puts("public sequence databases (INSDC: GenBank/ENA/DDBJ accessed using " "NCBI Virus)\n" "and your sequence's placement in a global phylogenetic tree.\n" ); } puts("Placement is performed by\n" "" "Ultrafast Sample placement on Existing tRee (UShER) " "(" "Turakhia et al.). UShER also generates local subtrees to show samples " "in the context of the most closely related sequences. The subtrees can be visualized " "as Genome Browser custom tracks and/or using " "Nextstrain's interactive display " "which supports " "drag-and-drop of local metadata that remains on your computer.\n"); if (microbeTraceHost()) printf("If the subtree size is set to %d or smaller, then subtrees can also be visualized in " "MicrobeTrace, " "a network visualization tool that integrates and overlays genomic, laboratory, and " "epidemiologic data and offers multiple visualization options of your combined data.\n", MAX_MICROBETRACE_SUBTREE_SIZE); puts("

"); if (sameString(org, "wuhCor1")) { puts("

\n" "GISAID data displayed in the Genome Browser are subject to GISAID's\n" "" "Terms and Conditions.\n" "SARS-CoV-2 genome sequences and metadata are available for download from\n" "GISAID EpiCoV™.\n" "

"); puts("

\n" "COVID-19 Pandemic Resources at UCSC

\n"); } puts("
"); puts("
"); puts("
"); puts("
"); puts("

Privacy and sharing

"); puts("

Please do not upload " "Protected Health Information (PHI).

\n" "If even virus sequence files must remain local on your computer, then you can try " "ShUShER " "which runs entirely in your web browser so that no files leave your computer." "

\n" "

We do not store your information " "(aside from the information necessary to display results)\n" "and will not share it with others unless you choose to share your Genome Browser view.

\n" "

In order to enable rapid progress in pandemic research and genomic contact tracing,\n" "please share your sequences by submitting them to an " "INSDC member institution\n" "(NCBI,\n" "EMBL-EBI\n" "or DDBJ)\n"); if (sameString(org, "wuhCor1")) puts("and GISAID\n"); puts(".

\n"); puts("
"); puts("
"); puts("
"); puts(""); puts("
"); puts("
"); } static void mainPage(char *org) { // Start web page with new-style header webStartGbNoBanner(cart, org, "UShER: Upload"); jsInit(); jsIncludeFile("jquery.js", NULL); jsIncludeFile("ajax.js", NULL); newPageStartStuff(); // Hidden form for reloading page when hpp_org select is changed static char *saveVars[] = { orgVar }; jsCreateHiddenForm(cart, cgiScriptName(), saveVars, ArraySize(saveVars)); puts("
" "
\n" "
UShER: Ultrafast Sample placement on Existing tRee
\n" "
\n" "
\n" "
\n" "
\n"); if (hgPhyloPlaceEnabled()) { inputForm(org); } else { puts("
"); puts(" Sorry, this server is not configured to perform phylogenetic placement."); puts("
"); } puts("
\n"); newPageEndStuff(); } static void resultsPage(char *db, char *org, struct lineFile *lf) /* QC the user's uploaded sequence(s) or VCF; if input looks valid then run usher * and display results. */ { // If org is a real database or hub then set db to org. if (hDbExists(org)) db = org; else { // Not a db -- see if it's a hub that is already connected: struct trackHubGenome *hubGenome = trackHubGetGenomeUndecorated(org); if (hubGenome != NULL) db = org; // Otherwise we're counting on the config to specify a .2bit file and we won't make CTs. } webStartGbNoBanner(cart, db, "UShER: Results"); jsIncludeFile("jquery.js", NULL); jsIncludeFile("ajax.js", NULL); newPageStartStuff(); if (issueBotWarning) { char *ip = getenv("REMOTE_ADDR"); botDelayMessage(ip, botDelayMillis); } // Allow 10 minutes for big sets of sequences lazarusLives(15 * 60); puts("
" "
\n" "
UShER: Ultrafast Sample placement on Existing tRee
\n" "
\n" "
\n" "
\n" "
\n"); puts("
"); fflush(stdout); if (lf != NULL) { // Use trackLayout to get hgTracks parameters relevant to displaying trees: struct trackLayout tl; trackLayoutInit(&tl, cart); // Do our best to place the user's samples, make custom tracks if successful: char *phyloPlaceTree = cartOptionalString(cart, "phyloPlaceTree"); int subtreeSize = cartUsualInt(cart, "subtreeSize", 50); boolean success = phyloPlaceSamples(lf, db, org, phyloPlaceTree, measureTiming, subtreeSize, &tl, cart); if (! success) { puts("

"); puts("
"); // Let the user upload something else and try again: inputForm(org); } } else { warn("Unable to read your uploaded data - please choose a file and try again, or click the " ""try example" button."); // Let the user try again: puts("
"); inputForm(org); } puts("\n"); newPageEndStuff(); } static boolean serverAuthOk(char *plain, char *salty) /* Construct a salted hash of plain and compare it to salty. */ { char *salt = cfgOption(CFG_LOGIN_COOKIE_SALT); if (! salt) salt = ""; char *plainMd5 = md5HexForString(plain); struct dyString *dySalted = dyStringCreate("%s-%s", salt, plainMd5); char *rightSalty = md5HexForString(dySalted->string); boolean ok = sameOk(salty, rightSalty); dyStringFree(&dySalted); return ok; } INLINE void maybeComment(char *comment) /* If comment is nonempty, append it to stderr. Then print a newline regardless of comment. */ { if (isNotEmpty(comment)) fprintf(stderr, ": %s", comment); fputc('\n', stderr); } #define CONTENT_TYPE "Content-Type: text/plain\n\n" static void sendServerCommand(char *org) /* If a recognized server command is requested (with minimal auth to prevent DoS), and usher server * is configured, then send the command to the usher server's manager fifo. */ { pushWarnHandler(htmlVaBadRequestAbort); pushAbortHandler(htmlVaBadRequestAbort); char *plain = cgiOptionalString(serverPlainVar); char *salty = cgiOptionalString(serverSaltyVar); if (isNotEmpty(plain) && isNotEmpty(salty) && serverAuthOk(plain, salty)) { if (serverIsConfigured(org)) { char *command = cgiString(serverCommandVar); char *comment = cgiOptionalString(serverCommentVar); struct tempName tnCheckServer; trashDirFile(&tnCheckServer, "ct", "usher_check_server", ".txt"); FILE *errFile = mustOpen(tnCheckServer.forCgi, "w"); boolean serverUp = serverIsRunning(org, errFile); carefulClose(&errFile); if (sameString(command, "start")) { // This one is really a command for the CGI not the server manager fifo (because the // server is not yet running and needs to be started at this point), but uses the // same CGI interface. //#*** TODO implement this at the org level, descending into ref subdirs. For now //#*** this is working because only SARS-CoV-2 has a server and org==ref for it. struct treeChoices *treeChoices = loadTreeChoices(org, org); if (treeChoices != NULL) { if (serverUp) errAbort("Server is already running for org %s, see %s", org, tnCheckServer.forCgi); struct tempName tnServerStartup; trashDirFile(&tnServerStartup, "ct", "usher_server_startup", ".txt"); errFile = mustOpen(tnServerStartup.forCgi, "w"); fprintf(stderr, "Usher server start for %s", org); maybeComment(comment); boolean success = startServer(org, treeChoices, errFile); carefulClose(&errFile); if (success) { fprintf(stderr, "Spawned usher server background process, details in %s", tnServerStartup.forCgi); printf(CONTENT_TYPE"Started server for %s\n", org); } else errAbort("Unable to spawn usher server background process, details in %s", tnServerStartup.forCgi); } else errAbort("No treeChoices for org=%s", org); } else if (serverUp) { if (sameString(command, "reload")) { struct treeChoices *treeChoices = loadTreeChoices(org, org); fprintf(stderr, "Usher server reload for %s", org); maybeComment(comment); serverReloadProtobufs(org, treeChoices); printf(CONTENT_TYPE"Sent reload command for %s\n", org); } else if (sameString(command, "stop")) { fprintf(stderr, "Usher server stop for %s", org); maybeComment(comment); serverStop(org); printf(CONTENT_TYPE"Sent stop command for %s\n", org); } else { char commandCopy[16]; safecpy(commandCopy, sizeof commandCopy, command); char *words[3]; int wordCount = chopLine(commandCopy, words); int val; if (wordCount == 2 && (val = atol(words[1])) > 0) { if (sameString(words[0], "thread")) { fprintf(stderr, "Usher server thread count set to %d", val); maybeComment(comment); serverSetThreadCount(org, val); printf(CONTENT_TYPE"Sent thread %d command for %s\n", val, org); } else if (sameString(words[0], "timeout")) { fprintf(stderr, "Usher server timeout set to %d", val); maybeComment(comment); serverSetTimeout(org, val); printf(CONTENT_TYPE"Sent timeout %d command for %s\n", val, org); } else errAbort("Unrecognized command '%s'", command); } else errAbort("Unrecognized command '%s'", command); } } else errAbort("Server for %s is down (see %s), cannot send command '%s'", org, tnCheckServer.forCgi, command); } else errAbort("Usher server mode not configured for org=%s", org); } else errAbort("Bad request"); popWarnHandler(); popAbortHandler(); } static void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { cart = theCart; char *db = NULL, *genome = NULL; // Get the current db from the cart getDbAndGenome(cart, &db, &genome, oldVars); // The currently selected organism may or may not be a db/hub. char *org = cartOptionalString(cart, orgVar); if (isEmpty(org)) { // If orgVar is not found but old cart var is set, use it and then remove it to tidy up. org = cartOptionalString(cart, "hpp_ref"); if (isNotEmpty(org)) cartRemove(cart, "hpp_ref"); } if (isEmpty(org)) { // Default to db org = cloneString(db); } int timeout = cartUsualInt(cart, "udcTimeout", 300); if (udcCacheTimeout() < timeout) udcSetCacheTimeout(timeout); knetUdcInstall(); measureTiming = cartUsualBoolean(cart, "measureTiming", measureTiming); char *submitLabel = cgiOptionalString("submit"); char *newExampleButton = cgiOptionalString("exampleButton"); if ((submitLabel && sameString(submitLabel, "try example")) || (newExampleButton && sameString(newExampleButton, "Upload Example File"))) { char *exampleFile = phyloPlaceOrgSettingPath(org, "exampleFile"); struct lineFile *lf = lineFileOpen(exampleFile, TRUE); resultsPage(db, org, lf); } else if (cgiOptionalString(remoteFileVar)) { char *url = cgiString(remoteFileVar); struct lineFile *lf = netLineFileOpen(url); resultsPage(db, org, lf); } else if (isNotEmpty(trimSpaces(cgiOptionalString(pastedIdVar)))) { char *pastedIds = cgiString(pastedIdVar); struct lineFile *lf = lineFileOnString("pasted names/IDs", TRUE, pastedIds); resultsPage(db, org, lf); } else if (cgiOptionalString(seqFileVar) || cgiOptionalString(seqFileVar "__filename")) { struct lineFile *lf = lineFileFromFileInput(cart, seqFileVar); resultsPage(db, org, lf); } else if (isNotEmpty(cgiOptionalString(serverCommandVar))) { sendServerCommand(org); } else mainPage(org); } #define LD_LIBRARY_PATH "LD_LIBRARY_PATH" static void addLdLibraryPath() /* usher requires a tbb lib that is not in the yum package tbb-devel, so for now * I'm adding the .so files to hgPhyloPlaceData. Set environment variable LD_LIBRARY_PATH * to pick them up from there. */ { char *oldValue = getenv(LD_LIBRARY_PATH); struct dyString *dy = dyStringNew(0); if (startsWith("/", PHYLOPLACE_DATA_DIR)) dyStringAppend(dy, PHYLOPLACE_DATA_DIR); else { char cwd[4096]; getcwd(cwd, sizeof cwd); dyStringPrintf(dy, "%s/%s", cwd, PHYLOPLACE_DATA_DIR); } if (isNotEmpty(oldValue)) dyStringPrintf(dy, ":%s", oldValue); setenv(LD_LIBRARY_PATH, dyStringCannibalize(&dy), TRUE); } int main(int argc, char *argv[]) /* Process command line. */ { /* Null terminated list of CGI Variables we don't want to save to cart */ char *excludeVars[] = {"submit", "Submit", seqFileVar, seqFileVar "__binary", seqFileVar "__filename", pastedIdVar, remoteFileVar, serverCommandVar, serverCommentVar, serverPlainVar, serverSaltyVar, NULL}; enteredMainTime = clock1000(); issueBotWarning = earlyBotCheck(enteredMainTime, "hgPhyloPlace", delayFraction, 0, 0, "html"); cgiSpoof(&argc, argv); oldVars = hashNew(10); addLdLibraryPath(); cartEmptyShellNoContent(doMiddle, hUserCookie(), excludeVars, oldVars); cgiExitTime("hgPhyloPlace", enteredMainTime); return 0; }