e14c2761ca01d8175578df87bd5311b5c0442bfb angie Thu Nov 19 13:04:01 2020 -0800 New CGI hgPhyloPlace: place SARS-CoV-2 genomes in fasta or VCF on phylogenetic tree using Yatish Turakhia's UShER program; add subtree custom tracks and Nextstrain linkouts to visualize results. refs #25943 diff --git src/hg/hgPhyloPlace/hgPhyloPlace.c src/hg/hgPhyloPlace/hgPhyloPlace.c new file mode 100644 index 0000000..bbe1904 --- /dev/null +++ src/hg/hgPhyloPlace/hgPhyloPlace.c @@ -0,0 +1,408 @@ +/* hgPhyloPlace - Upload SARS-CoV-2 sequence for placement in phylo tree. */ + +/* Copyright (C) 2020 The Regents of the University of California */ + +#include "common.h" +#include "botDelay.h" +#include "cart.h" +#include "cheapcgi.h" +#include "hCommon.h" +#include "hash.h" +#include "hui.h" +#include "jsHelper.h" +#include "knetUdc.h" +#include "linefile.h" +#include "net.h" +#include "options.h" +#include "phyloPlace.h" +#include "portable.h" +#include "trackLayout.h" +#include "udc.h" +#include "web.h" + +/* Global Variables */ +struct cart *cart = NULL; // CGI and other variables +struct hash *oldVars = NULL; // Old contents of cart before it was updated by CGI +boolean measureTiming = FALSE; // Print out how long things take +char *leftLabelWidthForLongNames = "55";// Leave plenty of room for tree and long virus strain names + +#define seqFileVar "sarsCoV2File" +#define remoteFileVar "remoteFile" + +static struct lineFile *lineFileFromFileInput(struct cart *cart, char *fileVar) +/* Return a lineFile on data from an uploaded file with cart variable name fileVar. + * If the file is binary, attempt to decompress it. Return NULL if no data are found + * or if there is a problem decompressing binary data. If retFileName is not NULL */ +{ +struct lineFile *lf = NULL; +// Depending on whether the file is plain text or binary, different cart variables are present. +char *filePlainContents = cartOptionalString(cart, fileVar); +char cartVar[2048]; +safef(cartVar, sizeof cartVar, "%s__binary", fileVar); +char *fileBinaryCoords = cartOptionalString(cart, cartVar); +// Also get the file name for error reporting. +safef(cartVar, sizeof cartVar, "%s__filename", fileVar); +char *fileName = cartOptionalString(cart, cartVar); +if (fileName == NULL) + fileName = "<uploaded data>"; +if (isNotEmpty(filePlainContents)) + { + lf = lineFileOnString(fileName, TRUE, cloneString(trimSpaces(filePlainContents))); + } +else if (isNotEmpty(fileBinaryCoords)) + { + fprintf(stderr, "%s=%s fileBinaryCoords=%s\n", cartVar, fileName, fileBinaryCoords); + char *binInfo = cloneString(fileBinaryCoords); + char *words[2]; + char *mem; + unsigned long size; + chopByWhite(binInfo, words, ArraySize(words)); + mem = (char *)sqlUnsignedLong(words[0]); + size = sqlUnsignedLong(words[1]); + lf = lineFileDecompressMem(TRUE, mem, size); + } +return lf; +} + +static void newPageStartStuff() +{ +// Copied these from hgGtexTrackSettings.c which says "// NOTE: This will likely go to web.c". +puts("<link rel='stylesheet' href='../style/gb.css'>"); +puts("<link rel='stylesheet' href='../style/hgGtexTrackSettings.css'>"); + +//#*** TODO: move this out to a CSS (hardcoding for now because we're doing a standalone push +//#*** independent of the release cycle). +puts("<style>\n" +"#warnBox {\n" +" border: 3px ridge DarkRed;\n" +" width:640px;\n" +" padding:10px; \n" +" margin:10px;\n" +" text-align:left;\n" +"}\n" +"\n" +"#warnHead {\n" +" color: DarkRed;\n" +"}\n" +".readableWidth {\n" +" max-width: 70em;\n" +"}\n" +"table.seqSummary, table.seqSummary th, table.seqSummary td {\n" +" border: 1px gray solid;\n" +" padding: 5px;\n" +"}\n" +".tooltip {\n" +" position: relative;\n" +" display: inline-block;\n" +" border-bottom: 1px dotted black;\n" +"}\n" +"\n" +".tooltip .tooltiptext {\n" +" visibility: hidden;\n" +" background-color: lightgray;\n" +" text-align: center;\n" +" position: absolute;\n" +" z-index: 1;\n" +" opacity: 0;\n" +" width: 220px;\n" +" padding: 5px;\n" +" left: 105%;\n" +" transition: opacity .6s;\n" +" line-height: 1em;\n" +"}\n" +"\n" +".tooltip:hover .tooltiptext {\n" +" visibility: visible;\n" +" opacity: .9;\n" +"}\n" +"td.qcExcellent {\n" +" background-color: #44ff44;\n" +"}\n" +"td.qcGood {\n" +" background-color: #88ff88;\n" +"}\n" +"td.qcMeh {\n" +" background-color: #ffcc44;\n" +"}\n" +"td.qcBad {\n" +" background-color: #ff8888;\n" +"}\n" +"td.qcFail {\n" +" background-color: #ff6666;\n" +"}\n" +"</style>\n" + ); + + + +// Container for bootstrap grid layout +puts( +"<div class='container-fluid'>\n"); +} + +static void newPageEndStuff() +{ +puts( +"</div>"); +jsIncludeFile("utils.js", NULL); +webIncludeFile("inc/gbFooter.html"); +webEndJWest(); +} + +#define CHECK_FILE_INPUT_JS "{ var $fileInput = $('input[name="seqFileVar"]'); " \ + "if ($fileInput && $fileInput[0] && $fileInput[0].files && !$fileInput[0].files.length) {" \ + " alert('Please choose a file first, then click the upload button.');" \ + " return false; " \ + "} else { return true; } }" + +static void inputForm() +/* Ask the user for FASTA or VCF. */ +{ +printf("<form action='%s' name='mainForm' method=POST enctype='multipart/form-data'>\n\n", + "hgPhyloPlace"); +cartSaveSession(cart); +cgiMakeHiddenVar("db", "wuhCor1"); +puts(" <div class='gbControl col-md-12'>"); +puts("<div class='readableWidth'>"); +puts("<p>Upload your SARS-CoV-2 sequence (FASTA or VCF file) to find the most similar\n" + "complete, high-coverage samples from \n" + "<a href='https://www.gisaid.org/' target='_blank'>GISAID</a>\n" + "and your sequence's placement in the phylogenetic tree generated by the\n" + "<a href='https://github.com/roblanf/sarscov2phylo' target='_blank'>sarscov2phylo</a>\n" + "pipeline.\n" + "Placement is performed by\n" + "<a href='https://github.com/yatisht/usher' target=_blank>" + "Ultrafast Sample placement on Existing tRee (UShER)</a> " + "(<a href='https://www.biorxiv.org/content/10.1101/2020.09.26.314971v1' target=_blank>" + "Turakhia <em>et al.</em></a>).</p>\n"); +puts("<p><b>Note:</b> " + "Please do not upload any files that contain " + "<a href='https://en.wikipedia.org/wiki/Protected_health_information#United_States' " + "target=_blank>Protected Health Information (PHI)</a> " + "to UCSC.</p>\n" + "<p>We do not store your information " + "(aside from the information necessary to display results)\n" + "and will not share it with others unless you choose to share your Genome Browser view.</p>\n" + "<p>In order to enable rapid progress in SARS-CoV-2 research and genomic contact tracing,\n" + "please share your SARS-CoV-2 sequences by submitting them to an " + "<a href='https://ncbiinsights.ncbi.nlm.nih.gov/2020/08/17/insdc-covid-data-sharing/' " + "target=_blank>INSDC</a> member institution\n" + "(<a href='https://submit.ncbi.nlm.nih.gov/sarscov2/' target=_blank>NCBI</a> in the U.S.,\n" + "<a href='https://www.covid19dataportal.org/submit-data' target=_blank>EMBL-EBI</a> in Europe\n" + "and <a href='https://www.ddbj.nig.ac.jp/ddbj/websub.html' target=_blank>DDBJ</a> in Japan)\n" + "and <a href='https://www.gisaid.org/' target=_blank>GISAID</a>.\n" + "</p>\n"); +puts("</div>"); +puts(" </div>"); +puts(" <div class='gbControl col-md-12'>"); +printf("<input type='file' id='%s' name='%s' " + "accept='.fa, .fasta, .vcf, .vcf.gz, .fa.gz, .fasta.gz'>", + seqFileVar, seqFileVar); +printf("Number of samples per subtree showing sample placement: "); +int subtreeSize = cartUsualInt(cart, "subtreeSize", 50); +cgiMakeIntVarWithLimits("subtreeSize", subtreeSize, + "Number of samples in subtree showing neighborhood of placement", + 5, 10, 1000); +cgiMakeOnClickSubmitButton(CHECK_FILE_INPUT_JS, "submit", "upload"); +puts(" </div>"); +puts("</form>"); +} + +static void exampleForm() +/* Let the user try Russ's example. */ +{ +printf("<form action='%s' name='exampleForm' method=POST>\n\n", + "hgPhyloPlace"); +cartSaveSession(cart); +cgiMakeHiddenVar("db", "wuhCor1"); +puts(" <div class='gbControl col-md-12'>"); +puts("If you don't have a local file, you can try an " + "<a href='https://github.com/russcd/USHER_DEMO/' target=_blank>example</a>: "); +cgiMakeButton("submit", "try example"); +puts(" </div>"); +puts("</form>"); +} + +static void gisaidFooter() +/* GISAID wants this on all pages that have anything to do with GISAID samples. */ +{ +puts("<div class='gbControl col-md-12'>"); +puts("<div class='readableWidth'>"); +puts("<p></p>"); +puts("<p>\n" + "GISAID data displayed in the Genome Browser are subject to GISAID's\n" + "<a href='https://www.gisaid.org/registration/terms-of-use/' target=_blank>" + "Terms and Conditions</a>.\n" + "SARS-CoV-2 genome sequences and metadata are available for download from\n" + "<a href='https://gisaid.org' target=_blank>GISAID</a> EpiCoV™.\n" + "</p>"); +puts("</div>"); +puts("</div>"); +} + +static void mainPage(char *db) +{ +// Start web page with new-style header +webStartGbNoBanner(cart, db, "UShER: Upload"); +newPageStartStuff(); + +puts("<div class='row'>" + " <div class='row gbSectionBanner'>\n" + " <div class='col-md-11'>UShER: Ultrafast Sample placement on Existing tRee</div>\n" + " <div class='col-md-1'></div>\n" + " </div>\n" + "</div>\n" + "<div class='row'>\n"); +if (hgPhyloPlaceEnabled()) + { + inputForm(); + exampleForm(); + gisaidFooter(); + } +else + { + puts(" <div class='gbControl col-md-12'>"); + puts(" Sorry, this server is not configured to perform phylogenetic placement."); + puts(" </div>"); + } +puts("</div>\n"); + +newPageEndStuff(); +} + +static void resultsPage(char *db, struct lineFile *lf) +/* QC the user's uploaded sequence(s) or VCF; if input looks valid then run usher + * and display results. */ +{ +webStartGbNoBanner(cart, db, "UShER: Results"); +newPageStartStuff(); + +hgBotDelay(); + +puts("<div class='row'>" + " <div class='row gbSectionBanner'>\n" + " <div class='col-md-11'>UShER: Ultrafast Sample placement on Existing tRee</div>\n" + " <div class='col-md-1'></div>\n" + " </div>\n" + "</div>\n" + "<div class='row'>\n"); +// Form submits subtree custom tracks to hgTracks +printf("<form action='%s' name='resultsForm' method=%s>\n\n", + hgTracksName(), cartUsualString(cart, "formMethod", "POST")); +cartSaveSession(cart); +puts(" <div class='gbControl col-md-12'>"); + +if (lf != NULL) + { + // Use trackLayout to get hgTracks parameters relevant to displaying trees: + struct trackLayout tl; + trackLayoutInit(&tl, cart); + // Do our best to place the user's samples, make custom tracks if successful: + int subtreeSize = cartUsualInt(cart, "subtreeSize", 50); + char *ctFile = phyloPlaceSamples(lf, db, measureTiming, subtreeSize, tl.fontHeight); + if (ctFile) + { + cgiMakeHiddenVar(CT_CUSTOM_TEXT_VAR, ctFile); + if (tl.leftLabelWidthChars < 0 || tl.leftLabelWidthChars == leftLabelWidthDefaultChars) + cgiMakeHiddenVar(leftLabelWidthVar, leftLabelWidthForLongNames); + cgiMakeButton("submit", "view in Genome Browser"); + puts(" </div>"); + puts("</form>"); + } + else + { + puts(" </div>"); + puts("</form>"); + // Let the user upload something else and try again: + inputForm(); + } + } +else + { + warn("Unable to read your uploaded data - please choose a file and try again, or click the " + ""try example" button."); + // Let the user try again: + puts(" </div>"); + puts("</form>"); + inputForm(); + exampleForm(); + } +puts("</div>\n"); + +gisaidFooter(); +newPageEndStuff(); +} + +static void doMiddle(struct cart *theCart) +/* Set up globals and make web page */ +{ +cart = theCart; +char *db = NULL, *genome = NULL, *clade = NULL; +getDbGenomeClade(cart, &db, &genome, &clade, oldVars); + +int timeout = cartUsualInt(cart, "udcTimeout", 300); +if (udcCacheTimeout() < timeout) + udcSetCacheTimeout(timeout); +knetUdcInstall(); + +measureTiming = cartUsualBoolean(cart, "measureTiming", measureTiming); + +char *submitLabel = cgiOptionalString("submit"); +if (submitLabel && sameString(submitLabel, "try example")) + { + char *exampleFile = phyloPlaceDbSettingPath(db, "exampleFile"); + struct lineFile *lf = lineFileOpen(exampleFile, TRUE); + resultsPage(db, lf); + } +else if (cgiOptionalString(remoteFileVar)) + { + char *url = cgiString(remoteFileVar); + struct lineFile *lf = netLineFileOpen(url); + resultsPage(db, lf); + } +else if (cgiOptionalString(seqFileVar) || cgiOptionalString(seqFileVar "__filename")) + { + struct lineFile *lf = lineFileFromFileInput(cart, seqFileVar); + resultsPage(db, lf); + } +else + mainPage(db); +} + +#define LD_LIBRARY_PATH "LD_LIBRARY_PATH" + +static void addLdLibraryPath() +/* usher requires a tbb lib that is not in the yum package tbb-devel, so for now + * I'm adding the .so files to hgPhyloPlaceData. Set environment variable LD_LIBRARY_PATH + * to pick them up from there. */ +{ +char *oldValue = getenv(LD_LIBRARY_PATH); +struct dyString *dy = dyStringNew(0); +if (startsWith("/", PHYLOPLACE_DATA_DIR)) + dyStringAppend(dy, PHYLOPLACE_DATA_DIR); +else + { + char cwd[4096]; + getcwd(cwd, sizeof cwd); + dyStringPrintf(dy, "%s/%s", cwd, PHYLOPLACE_DATA_DIR); + } +if (isNotEmpty(oldValue)) + dyStringPrintf(dy, ":%s", oldValue); +setenv(LD_LIBRARY_PATH, dyStringCannibalize(&dy), TRUE); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +/* Null terminated list of CGI Variables we don't want to save to cart */ +char *excludeVars[] = {"submit", "Submit", + seqFileVar, seqFileVar "__binary", seqFileVar "__filename", + NULL}; +long enteredMainTime = clock1000(); +cgiSpoof(&argc, argv); +oldVars = hashNew(10); +addLdLibraryPath(); +cartEmptyShellNoContent(doMiddle, hUserCookie(), excludeVars, oldVars); +cgiExitTime("hgPhyloPlace", enteredMainTime); +return 0; +}