7e58340888377874edaad1dbc5174e20295f890c angie Mon Feb 22 14:17:33 2021 -0800 Support upload of more sequences, add TSV file summarizing sample variants and placements. Requested by Joe de Risi (UCSF). Increase timeout to 10 minutes; make TSV with each sample's ID, nuc muts, AA muts, imputed bases and path from root to sample. Also use Yatish's new -K subtree algorithm in usher: one subtree encompassing all uploaded samples, plus the specified number of samples randomly selected from the rest of the tree. Don't show every single sample name in the title because there can be 1000 samples in the same subtree now. :) diff --git src/hg/hgPhyloPlace/hgPhyloPlace.c src/hg/hgPhyloPlace/hgPhyloPlace.c index d2485e7..1c5c27c 100644 --- src/hg/hgPhyloPlace/hgPhyloPlace.c +++ src/hg/hgPhyloPlace/hgPhyloPlace.c @@ -1,450 +1,466 @@ /* hgPhyloPlace - Upload SARS-CoV-2 sequence for placement in phylo tree. */ /* Copyright (C) 2020 The Regents of the University of California */ #include "common.h" #include "botDelay.h" #include "cart.h" +#include "cgiApoptosis.h" #include "cheapcgi.h" #include "hCommon.h" #include "hash.h" #include "hui.h" #include "jsHelper.h" #include "knetUdc.h" #include "linefile.h" #include "net.h" #include "options.h" #include "phyloPlace.h" #include "portable.h" #include "trackLayout.h" #include "udc.h" #include "web.h" /* Global Variables */ struct cart *cart = NULL; // CGI and other variables struct hash *oldVars = NULL; // Old contents of cart before it was updated by CGI boolean measureTiming = FALSE; // Print out how long things take char *leftLabelWidthForLongNames = "55";// Leave plenty of room for tree and long virus strain names #define seqFileVar "sarsCoV2File" #define remoteFileVar "remoteFile" static struct lineFile *lineFileFromFileInput(struct cart *cart, char *fileVar) /* Return a lineFile on data from an uploaded file with cart variable name fileVar. * If the file is binary, attempt to decompress it. Return NULL if no data are found * or if there is a problem decompressing binary data. If retFileName is not NULL */ { struct lineFile *lf = NULL; // Depending on whether the file is plain text or binary, different cart variables are present. char *filePlainContents = cartOptionalString(cart, fileVar); char cartVar[2048]; safef(cartVar, sizeof cartVar, "%s__binary", fileVar); char *fileBinaryCoords = cartOptionalString(cart, cartVar); // Also get the file name for error reporting. safef(cartVar, sizeof cartVar, "%s__filename", fileVar); char *fileName = cartOptionalString(cart, cartVar); if (fileName == NULL) fileName = ""; if (isNotEmpty(filePlainContents)) { lf = lineFileOnString(fileName, TRUE, cloneString(trimSpaces(filePlainContents))); } else if (isNotEmpty(fileBinaryCoords)) { fprintf(stderr, "%s=%s fileBinaryCoords=%s\n", cartVar, fileName, fileBinaryCoords); char *binInfo = cloneString(fileBinaryCoords); char *words[2]; char *mem; unsigned long size; chopByWhite(binInfo, words, ArraySize(words)); mem = (char *)sqlUnsignedLong(words[0]); size = sqlUnsignedLong(words[1]); lf = lineFileDecompressMem(TRUE, mem, size); } return lf; } static void newPageStartStuff() { // Copied these from hgGtexTrackSettings.c which says "// NOTE: This will likely go to web.c". puts(""); puts(""); //#*** TODO: move this out to a CSS (hardcoding for now because we're doing a standalone push //#*** independent of the release cycle). puts("\n" ); // Container for bootstrap grid layout puts( "
\n"); } static void newPageEndStuff() { puts( "
"); jsIncludeFile("utils.js", NULL); webIncludeFile("inc/gbFooter.html"); webEndJWest(); } #define CHECK_FILE_INPUT_JS "{ var $fileInput = $('input[name="seqFileVar"]'); " \ "if ($fileInput && $fileInput[0] && $fileInput[0].files && !$fileInput[0].files.length) {" \ " alert('Please choose a file first, then click the upload button.');" \ " return false; " \ - "} else { return true; } }" + "} else { loadingImage.run(); return true; } }" static void inputForm() /* Ask the user for FASTA or VCF. */ { printf("
\n\n", "hgPhyloPlace"); cartSaveSession(cart); char *db = "wuhCor1"; cgiMakeHiddenVar("db", db); puts("
"); puts("
"); puts("

Upload your SARS-CoV-2 sequence (FASTA or VCF file) to find the most similar\n" "complete, high-coverage samples from \n" "GISAID\n" "or from public sequence databases (" "NCBI Virus / GenBank,\n" "COG-UK and the\n" "China National Center for Bioinformation), " "and your sequence's placement in the phylogenetic tree generated by the\n" "sarscov2phylo\n" "pipeline.\n" "Placement is performed by\n" "" "Ultrafast Sample placement on Existing tRee (UShER) " "(" "Turakhia et al.). UShER also generates local subtrees to show samples " "in the context of the most closely related sequences. The subtrees can be visualized " "as Genome Browser custom tracks and/or using " "Nextstrain's interactive display " "which supports " "drag-and-drop of local metadata that remains on your computer.

\n"); puts("

Note: " "Please do not upload any files that contain " "Protected Health Information (PHI) " "to UCSC.

\n" "

We do not store your information " "(aside from the information necessary to display results)\n" "and will not share it with others unless you choose to share your Genome Browser view.

\n" "

In order to enable rapid progress in SARS-CoV-2 research and genomic contact tracing,\n" "please share your SARS-CoV-2 sequences by submitting them to an " "INSDC member institution\n" "(NCBI,\n" "EMBL-EBI\n" "or DDBJ)\n" "and GISAID.\n" "

\n"); puts("
"); puts("
"); puts("
"); printf("

Select your FASTA or VCF file: "); printf("", seqFileVar, seqFileVar); struct treeChoices *treeChoices = loadTreeChoices(db); if (treeChoices) { puts("

"); printf("Phylogenetic tree version: "); char *phyloPlaceTree = cartOptionalString(cart, "phyloPlaceTree"); cgiMakeDropListWithVals("phyloPlaceTree", treeChoices->descriptions, treeChoices->protobufFiles, treeChoices->count, phyloPlaceTree); } puts("

"); printf("Number of samples per subtree showing sample placement: "); int subtreeSize = cartUsualInt(cart, "subtreeSize", 50); cgiMakeIntVarWithLimits("subtreeSize", subtreeSize, "Number of samples in subtree showing neighborhood of placement", - 5, 10, 1000); + 5, 10, 2000); puts("

"); cgiMakeOnClickSubmitButton(CHECK_FILE_INPUT_JS, "submit", "upload"); puts("

"); +// Add a loading image to reassure people that we're working on it when they upload a big file +printf("
\n"); +printf("
\n"); +jsInline("$(document).ready(function() {\n" + " loadingImage.init($('#loadingImg'), $('#loadingMsg'), " + "'

Uploading and processing your sequences " + "may take some time. Please leave this window open while we work on your sequences.

');" + "});\n"); + puts("
"); puts("
"); } static void exampleForm() /* Let the user try Russ's example. */ { printf("
\n\n", "hgPhyloPlace"); cartSaveSession(cart); cgiMakeHiddenVar("db", "wuhCor1"); puts("
"); puts("If you don't have a local file, you can try an " "example: "); cgiMakeButton("submit", "try example"); puts("
"); puts("
"); } static void linkToLandingPage() /* David asked for a link back to our covid19 landing page. */ { puts("
"); puts("
"); puts("

"); puts("

\n" "COVID-19 Pandemic Resources at UCSC

\n"); puts("
"); puts("
"); } static void gisaidFooter() /* GISAID wants this on all pages that have anything to do with GISAID samples. */ { puts("
"); puts("
"); puts("

"); puts("

\n" "GISAID data displayed in the Genome Browser are subject to GISAID's\n" "" "Terms and Conditions.\n" "SARS-CoV-2 genome sequences and metadata are available for download from\n" "GISAID EpiCoV™.\n" "

"); puts("
"); puts("
"); } static void mainPage(char *db) { // Start web page with new-style header webStartGbNoBanner(cart, db, "UShER: Upload"); +jsIncludeFile("jquery.js", NULL); +jsIncludeFile("ajax.js", NULL); newPageStartStuff(); puts("
" "
\n" "
UShER: Ultrafast Sample placement on Existing tRee
\n" "
\n" "
\n" "
\n" "
\n"); if (hgPhyloPlaceEnabled()) { inputForm(); exampleForm(); linkToLandingPage(); gisaidFooter(); } else { puts("
"); puts(" Sorry, this server is not configured to perform phylogenetic placement."); puts("
"); } puts("
\n"); newPageEndStuff(); } static void resultsPage(char *db, struct lineFile *lf) /* QC the user's uploaded sequence(s) or VCF; if input looks valid then run usher * and display results. */ { webStartGbNoBanner(cart, db, "UShER: Results"); newPageStartStuff(); hgBotDelay(); +// Allow 10 minutes for big sets of sequences +lazarusLives(10 * 60); + puts("
" "
\n" "
UShER: Ultrafast Sample placement on Existing tRee
\n" "
\n" "
\n" "
\n" "
\n"); // Form submits subtree custom tracks to hgTracks printf("
\n\n", hgTracksName(), cartUsualString(cart, "formMethod", "POST")); cartSaveSession(cart); puts("
"); +fflush(stdout); if (lf != NULL) { // Use trackLayout to get hgTracks parameters relevant to displaying trees: struct trackLayout tl; trackLayoutInit(&tl, cart); // Do our best to place the user's samples, make custom tracks if successful: char *phyloPlaceTree = cartOptionalString(cart, "phyloPlaceTree"); int subtreeSize = cartUsualInt(cart, "subtreeSize", 50); char *ctFile = phyloPlaceSamples(lf, db, phyloPlaceTree, measureTiming, subtreeSize, tl.fontHeight); if (ctFile) { cgiMakeHiddenVar(CT_CUSTOM_TEXT_VAR, ctFile); if (tl.leftLabelWidthChars < 0 || tl.leftLabelWidthChars == leftLabelWidthDefaultChars) cgiMakeHiddenVar(leftLabelWidthVar, leftLabelWidthForLongNames); cgiMakeButton("submit", "view in Genome Browser"); puts("
"); puts("
"); } else { puts("
"); puts(""); // Let the user upload something else and try again: inputForm(); } } else { warn("Unable to read your uploaded data - please choose a file and try again, or click the " ""try example" button."); // Let the user try again: puts(" "); puts(""); inputForm(); exampleForm(); } puts("\n"); linkToLandingPage(); gisaidFooter(); newPageEndStuff(); } static void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { cart = theCart; char *db = NULL, *genome = NULL, *clade = NULL; getDbGenomeClade(cart, &db, &genome, &clade, oldVars); int timeout = cartUsualInt(cart, "udcTimeout", 300); if (udcCacheTimeout() < timeout) udcSetCacheTimeout(timeout); knetUdcInstall(); measureTiming = cartUsualBoolean(cart, "measureTiming", measureTiming); char *submitLabel = cgiOptionalString("submit"); if (submitLabel && sameString(submitLabel, "try example")) { char *exampleFile = phyloPlaceDbSettingPath(db, "exampleFile"); struct lineFile *lf = lineFileOpen(exampleFile, TRUE); resultsPage(db, lf); } else if (cgiOptionalString(remoteFileVar)) { char *url = cgiString(remoteFileVar); struct lineFile *lf = netLineFileOpen(url); resultsPage(db, lf); } else if (cgiOptionalString(seqFileVar) || cgiOptionalString(seqFileVar "__filename")) { struct lineFile *lf = lineFileFromFileInput(cart, seqFileVar); resultsPage(db, lf); } else mainPage(db); } #define LD_LIBRARY_PATH "LD_LIBRARY_PATH" static void addLdLibraryPath() /* usher requires a tbb lib that is not in the yum package tbb-devel, so for now * I'm adding the .so files to hgPhyloPlaceData. Set environment variable LD_LIBRARY_PATH * to pick them up from there. */ { char *oldValue = getenv(LD_LIBRARY_PATH); struct dyString *dy = dyStringNew(0); if (startsWith("/", PHYLOPLACE_DATA_DIR)) dyStringAppend(dy, PHYLOPLACE_DATA_DIR); else { char cwd[4096]; getcwd(cwd, sizeof cwd); dyStringPrintf(dy, "%s/%s", cwd, PHYLOPLACE_DATA_DIR); } if (isNotEmpty(oldValue)) dyStringPrintf(dy, ":%s", oldValue); setenv(LD_LIBRARY_PATH, dyStringCannibalize(&dy), TRUE); } int main(int argc, char *argv[]) /* Process command line. */ { /* Null terminated list of CGI Variables we don't want to save to cart */ char *excludeVars[] = {"submit", "Submit", seqFileVar, seqFileVar "__binary", seqFileVar "__filename", NULL}; long enteredMainTime = clock1000(); cgiSpoof(&argc, argv); oldVars = hashNew(10); addLdLibraryPath(); cartEmptyShellNoContent(doMiddle, hUserCookie(), excludeVars, oldVars); cgiExitTime("hgPhyloPlace", enteredMainTime); return 0; }