afafa0301ea4b14fbe1fbd5aa379c5351ecd640d
angie
  Tue Aug 2 19:41:44 2022 -0700
Add support for non-wuhCor1 genomes (e.g. monkeypox GenArk hub).
* Search in hgPhyloPlaceData for config.ra files, taking assembly name (minus hub prefix) from directory name.
* Add a menu input to the main page for switching between supported genomes if there are more than one.
* Replace hardcoded values or global vars with dnaSeq attributes, assembly metadata queries or new config.ra settings.
* Separate out SARS-CoV-2-specific help text like GISAID/CNCB descriptions.
* Support metadata columns for GenBank-specific stuff & Nextstrain lineages (for MPXV).
* also a little refactoring in runUsher in preparation for supporting usher server mode: parse new placement info file so we don't have to parse that data form usher stderr output.

TODO: update Nextstrain/Auspice JSON output to use appropriate metadata columns and support monkeypox genes.

diff --git src/hg/hgPhyloPlace/hgPhyloPlace.c src/hg/hgPhyloPlace/hgPhyloPlace.c
index 2dc6bba..64b1e8e 100644
--- src/hg/hgPhyloPlace/hgPhyloPlace.c
+++ src/hg/hgPhyloPlace/hgPhyloPlace.c
@@ -1,16 +1,16 @@
-/* hgPhyloPlace - Upload SARS-CoV-2 sequence for placement in phylo tree. */
+/* hgPhyloPlace - Upload SARS-CoV-2 or MPXV sequence for placement in phylo tree. */
 
 /* Copyright (C) 2020 The Regents of the University of California */
 
 #include "common.h"
 #include "botDelay.h"
 #include "cart.h"
 #include "cgiApoptosis.h"
 #include "cheapcgi.h"
 #include "hCommon.h"
 #include "hash.h"
 #include "hui.h"
 #include "jsHelper.h"
 #include "knetUdc.h"
 #include "linefile.h"
 #include "net.h"
@@ -59,30 +59,78 @@
 else if (isNotEmpty(fileBinaryCoords))
     {
     fprintf(stderr, "%s=%s fileBinaryCoords=%s\n", cartVar, fileName, fileBinaryCoords);
     char *binInfo = cloneString(fileBinaryCoords);
     char *words[2];
     char *mem;
     unsigned long size;
     chopByWhite(binInfo, words, ArraySize(words));
     mem = (char *)sqlUnsignedLong(words[0]);
     size = sqlUnsignedLong(words[1]);
     lf = lineFileDecompressMem(TRUE, mem, size);
     }
 return lf;
 }
 
+static char *labelForDb(char *db)
+/* The assembly hub name is just the accession; make a special label for hMPXV.  Otherwise just
+ * return hGenome(db). */
+{
+char *label = NULL;
+if (sameString(trackHubSkipHubName(db), "GCF_014621545.1"))
+    label = cloneString("hMPXV");
+else
+    label = hGenome(db);
+return label;
+}
+
+static void selectDb(char **pDb, char **pLabel)
+/* Search for assembly config.ra files in hgPhyloPlaceData.  If there is more than one
+ * supported assembly, then make a menu / select input for supported  assemblies;
+ * reload the page on change. */
+{
+struct slName *supportedDbs = phyloPlaceDbList();
+if (supportedDbs == NULL)
+    errAbort("Sorry, this server is not configured to perform phylogenetic placement.");
+if (!slNameInList(supportedDbs, *pDb))
+    {
+    *pDb = cloneString(supportedDbs->name);
+    }
+*pLabel = labelForDb(*pDb);
+int supportedDbCount = slCount(supportedDbs);
+if (supportedDbCount > 1)
+    {
+    char *labels[supportedDbCount];
+    char *values[supportedDbCount];
+    struct slName *sDb;
+    int i;
+    for (sDb = supportedDbs, i = 0;  i < supportedDbCount;  sDb = sDb->next, i++)
+        {
+        values[i] = sDb->name;
+        labels[i] = labelForDb(values[i]);
+        }
+    char *selectVar = "db";
+    struct dyString *dy = jsOnChangeStart();
+    jsDropDownCarryOver(dy, selectVar);
+    char *js = jsOnChangeEnd(&dy);
+    puts("<p>Choose your pathogen: ");
+    cgiMakeDropListFull(selectVar, labels, values, supportedDbCount, *pDb, "change", js);
+    puts("</p>");
+    }
+slNameFreeList(&supportedDbs);
+}
+
 static void newPageStartStuff()
 {
 // Copied these from hgGtexTrackSettings.c which says "// NOTE: This will likely go to web.c".
 puts("<link rel='stylesheet' href='../style/gb.css'>");
 puts("<link rel='stylesheet' href='../style/hgGtexTrackSettings.css'>");
 
 //#*** TODO: move this out to a CSS (hardcoding for now because we're doing a standalone push
 //#*** independent of the release cycle).
 puts("<style>\n"
 "#warnBox {\n"
 "    border: 3px ridge DarkRed;\n"
 "    width:640px;\n"
 "    padding:10px; \n"
 "    margin:10px;\n"
 "    text-align:left;\n"
@@ -171,178 +219,208 @@
     "{ var $fileInput = $('input[name="fileVarName"]');" \
     "  var $pasteInput = $('textarea[name="pasteVarName"]');" \
     "  if ($fileInput && $fileInput[0] && $fileInput[0].files && !$fileInput[0].files.length &&" \
     "      $pasteInput && !$pasteInput.val()) {" \
     "     alert('Please either choose a file or paste in sequence names/IDs first, ' +" \
     "           'and then click the upload button.');" \
     "     return false; " \
     "   } else if ($fileInput && $fileInput[0] && $fileInput[0].files && " \
     "              !!$fileInput[0].files.length &&" \
     "              $pasteInput && !!$pasteInput.val()) {" \
     "     alert('Sorry, unable to process both a file and pasted-in sequence names/IDs at the ' +" \
     "            'same time.  Please clear one or the other and then click the upload button.');" \
     "     return false; " \
     "   } else { loadingImage.run(); return true; } }"
 
-static void inputForm()
+static void inputForm(char *db)
 /* Ask the user for FASTA or VCF. */
 {
 printf("<form action='%s' name='mainForm' method=POST enctype='multipart/form-data'>\n\n",
        "hgPhyloPlace");
 cartSaveSession(cart);
-char *db = "wuhCor1";
-cgiMakeHiddenVar("db", db);
 puts("<div class='readableWidth'>");
 puts("  <div class='gbControl col-md-12'>");
 puts("<div style='font-size: 20px; font-weight: 500; margin-top: 15px; margin-bottom: 10px;'>"
-     "Place your SARS-CoV-2 sequences in a global phylogenetic tree</div>");
+     "Place your sequences in a global phylogenetic tree</div>");
+// If db is not a supported db then switch to the default supported db, and if multiple dbs are
+// supported then make a menu so the user can select.
+char *label = NULL;
+selectDb(&db, &label);
 printf("<p>Select your FASTA, VCF or list of sequence names/IDs: ");
 printf("<input type='file' id='%s' name='%s'>",
        seqFileVar, seqFileVar);
 printf("</p><p>or paste in sequence names/IDs:<br>\n");
 cgiMakeTextArea(pastedIdVar, "", 10, 70);
 struct treeChoices *treeChoices = loadTreeChoices(db);
 if (treeChoices)
     {
     puts("</p><p>");
     printf("Phylogenetic tree version: ");
     char *phyloPlaceTree = cartOptionalString(cart, "phyloPlaceTree");
     cgiMakeDropListWithVals("phyloPlaceTree", treeChoices->descriptions, treeChoices->protobufFiles,
                             treeChoices->count, phyloPlaceTree);
     }
 puts("</p><p>");
 printf("Number of samples per subtree showing sample placement: ");
 int subtreeSize = cartUsualInt(cart, "subtreeSize", 50);
 cgiMakeIntVarWithLimits("subtreeSize", subtreeSize,
                         "Number of samples in subtree showing neighborhood of placement",
                         5, 10, 5000);
 puts("</p><p>");
 cgiMakeOnClickSubmitButton(CHECK_FILE_OR_PASTE_INPUT_JS(seqFileVar, pastedIdVar),
                            "submit", "Upload");
+char *exampleFile = phyloPlaceDbSettingPath(db, "exampleFile");
+if (isNotEmpty(exampleFile))
+    {
     puts("&nbsp;&nbsp;");
     cgiMakeOnClickSubmitButton("{ loadingImage.run(); return true; }",
                                "exampleButton", "Upload Example File");
+    if (sameString(db, "wuhCor1"))
+        {
         puts("&nbsp;&nbsp;");
         puts("<a href='https://github.com/russcd/USHER_DEMO/' target=_blank>More example files</a>");
+        }
+    }
 puts("</p>");
 // Add a loading image to reassure people that we're working on it when they upload a big file
 printf("<div><img id='loadingImg' src='../images/loading.gif' />\n");
 printf("<span id='loadingMsg'></span></div>\n");
 jsInline("$(document).ready(function() {\n"
          "    loadingImage.init($('#loadingImg'), $('#loadingMsg'), "
          "'<p style=\"color: red; font-style: italic;\">Uploading and processing your sequences "
          "may take some time. Please leave this window open while we work on your sequences.</p>');"
          "});\n");
 
 puts("  </div>");
 puts("</div>");
 puts("<div class='readableWidth'>");
 puts("  <div class='gbControl col-md-12'>");
 puts("<h2>More information</h2>");
-puts("<p>Upload your SARS-CoV-2 sequence (FASTA or VCF file) to find the most similar\n"
-     "complete, high-coverage samples from \n"
-     "<a href='https://www.gisaid.org/' target='_blank'>GISAID</a>\n"
-     "or from public sequence databases ("
+printf("<p>Upload your %s sequence (FASTA or VCF file) to find the most similar\n"
+       "complete, high-coverage samples from \n", label);
+if (sameString(db, "wuhCor1"))
+    {
+    puts("<a href='https://www.gisaid.org/' target='_blank'>GISAID</a>\n"
+         "or from public sequence databases (INSDC: GenBank/ENA/DDBJ accessed using "
          "<a href='https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide&VirusLineage_ss=SARS-CoV-2,%20taxid:2697049' "
-     "target=_blank>NCBI Virus / GenBank</a>,\n"
+         "target=_blank>NCBI Virus</a>,\n"
          "<a href='https://www.cogconsortium.uk/data/' target=_blank>COG-UK</a> and the\n"
          "<a href='https://bigd.big.ac.cn/ncov/release_genome' "
          "target=_blank>China National Center for Bioinformation</a>), "
          "and your sequence's placement in the phylogenetic tree generated by the\n"
          "<a href='https://github.com/roblanf/sarscov2phylo' target='_blank'>sarscov2phylo</a>\n"
-     "pipeline.\n"
-     "Placement is performed by\n"
+         "pipeline.\n");
+    }
+else
+    {
+    //#*** TODO get NCBI link from db not hardcoded
+    puts("public sequence databases (INSDC: GenBank/ENA/DDBJ accessed using "
+         "<a href='https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide&VirusLineage_ss=Monkeypox%20virus%20(monkeypox),%20taxid:10244' "
+         "target=_blank>NCBI Virus</a>)\n"
+         "and your sequence's placement in a global phylogenetic tree.\n"
+         );
+    }
+puts("Placement is performed by\n"
      "<a href='https://github.com/yatisht/usher' target=_blank>"
      "Ultrafast Sample placement on Existing tRee (UShER)</a> "
      "(<a href='https://www.nature.com/articles/s41588-021-00862-7' target=_blank>"
      "Turakhia <em>et al.</em></a>).  UShER also generates local subtrees to show samples "
      "in the context of the most closely related sequences.  The subtrees can be visualized "
      "as Genome Browser custom tracks and/or using "
      "<a href='https://nextstrain.org' target=_blank>Nextstrain</a>'s interactive display "
      "which supports "
      "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' "
      "target=_blank>drag-and-drop</a> of local metadata that remains on your computer.</p>\n");
+if (sameString(db, "wuhCor1"))
+    {
     puts("<p>\n"
          "GISAID data displayed in the Genome Browser are subject to GISAID's\n"
          "<a href='https://www.gisaid.org/registration/terms-of-use/' target=_blank>"
          "Terms and Conditions</a>.\n"
          "SARS-CoV-2 genome sequences and metadata are available for download from\n"
          "<a href='https://gisaid.org' target=_blank>GISAID</a> EpiCoV&trade;.\n"
          "</p>");
     puts("<p>\n"
          "<a href='/covid19.html'>COVID-19 Pandemic Resources at UCSC</a></p>\n");
+    }
 puts("</div>");
 puts("</div>");
 puts("<div class='readableWidth'>");
 puts("  <div class='gbControl col-md-12'>");
 puts("<h2>Privacy and sharing</h2>");
 puts("<h3>Please do not upload "
      "<a href='https://en.wikipedia.org/wiki/Protected_health_information#United_States' "
      "target=_blank>Protected Health Information (PHI)</a>.</h3>\n"
      "If even virus sequence files must remain local on your computer, then you can try "
      "<a href='https://shusher.gi.ucsc.edu/' target=_blank>ShUShER</a> "
      "which runs entirely in your web browser so that no files leave your computer."
      "</p>\n"
      "<p>We do not store your information "
      "(aside from the information necessary to display results)\n"
      "and will not share it with others unless you choose to share your Genome Browser view.</p>\n"
-     "<p>In order to enable rapid progress in SARS-CoV-2 research and genomic contact tracing,\n"
-     "please share your SARS-CoV-2 sequences by submitting them to an "
+     "<p>In order to enable rapid progress in pandemic research and genomic contact tracing,\n"
+     "please share your sequences by submitting them to an "
      "<a href='https://ncbiinsights.ncbi.nlm.nih.gov/2020/08/17/insdc-covid-data-sharing/' "
      "target=_blank>INSDC</a> member institution\n"
      "(<a href='https://submit.ncbi.nlm.nih.gov/sarscov2/' target=_blank>NCBI</a>,\n"
      "<a href='https://www.covid19dataportal.org/submit-data' target=_blank>EMBL-EBI</a>\n"
-     "or <a href='https://www.ddbj.nig.ac.jp/ddbj/websub.html' target=_blank>DDBJ</a>)\n"
-     "and <a href='https://www.gisaid.org/' target=_blank>GISAID</a>.\n"
-     "</p>\n");
+     "or <a href='https://www.ddbj.nig.ac.jp/ddbj/websub.html' target=_blank>DDBJ</a>)\n");
+if (sameString(db, "wuhCor1"))
+    puts("and <a href='https://www.gisaid.org/' target=_blank>GISAID</a>\n");
+puts(".</p>\n");
 puts("</div>");
 puts("  </div>");
 puts("<div class='readableWidth'>");
 puts("<div class='gbControl col-md-12'>");
 puts("<h2>Tutorial</h2>");
 puts("<iframe width='267' height='150' src='https://www.youtube.com/embed/humQ1NyZOUM' "
      "frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; "
      "gyroscope; picture-in-picture' allowfullscreen></iframe>\n"
      "<h3><a href='https://www.cdc.gov/amd/pdf/slidesets/ToolkitModule_3.3-508C.pdf' "
      "target=_blank>Slides for tutorial</a></h3>\n"
      "<h3><a href='https://www.cdc.gov/amd/training/covid-19-gen-epi-toolkit.html' target=_blank>"
      "More tutorials from CDC COVID-19 Genomic Epidemiology Toolkit</a></h3>\n"
      "</p>"
      );
 puts("</div>");
 puts("</div>");
 puts("</form>");
 }
 
 static void mainPage(char *db)
 {
 // Start web page with new-style header
 webStartGbNoBanner(cart, db, "UShER: Upload");
+jsInit();
 jsIncludeFile("jquery.js", NULL);
 jsIncludeFile("ajax.js", NULL);
 newPageStartStuff();
 
+// Hidden form for reloading page when db select is changed
+static char *saveVars[] = { "db" };
+jsCreateHiddenForm(cart, cgiScriptName(), saveVars, ArraySize(saveVars));
+
 puts("<div class='row'>"
      "  <div class='row gbSectionBannerLarge'>\n"
      "    <div class='col-md-11'>UShER: Ultrafast Sample placement on Existing tRee</div>\n"
      "    <div class='col-md-1'></div>\n"
      "  </div>\n"
      "</div>\n"
      "<div class='row'>\n");
 if (hgPhyloPlaceEnabled())
     {
-    inputForm();
+    inputForm(db);
     }
 else
     {
     puts("  <div class='gbControl col-md-12'>");
     puts("  Sorry, this server is not configured to perform phylogenetic placement.");
     puts("  </div>");
     }
 puts("</div>\n");
 
 newPageEndStuff();
 }
 
 static void resultsPage(char *db, struct lineFile *lf)
 /* QC the user's uploaded sequence(s) or VCF; if input looks valid then run usher
  * and display results. */
@@ -389,53 +467,54 @@
     if (ctFile)
         {
         cgiMakeHiddenVar(CT_CUSTOM_TEXT_VAR, ctFile);
         if (tl.leftLabelWidthChars < 0 || tl.leftLabelWidthChars == leftLabelWidthDefaultChars)
             cgiMakeHiddenVar(leftLabelWidthVar, leftLabelWidthForLongNames);
         cgiMakeButton("submit", "view in Genome Browser");
         puts("  </div>");
         puts("</form>");
         }
     else if (! success)
         {
         puts("<p></p>");
         puts("  </div>");
         puts("</form>");
         // Let the user upload something else and try again:
-        inputForm();
+        inputForm(db);
         }
     }
 else
     {
     warn("Unable to read your uploaded data - please choose a file and try again, or click the "
          "&quot;try example&quot; button.");
     // Let the user try again:
     puts("  </div>");
     puts("</form>");
-    inputForm();
+    inputForm(db);
     }
 puts("</div>\n");
 
 newPageEndStuff();
 }
 
 static void doMiddle(struct cart *theCart)
 /* Set up globals and make web page */
 {
 cart = theCart;
-char *db = NULL, *genome = NULL, *clade = NULL;
-getDbGenomeClade(cart, &db, &genome, &clade, oldVars);
+char *db = NULL, *genome = NULL;
+// Get the current db from the cart
+getDbAndGenome(cart, &db, &genome, oldVars);
 
 int timeout = cartUsualInt(cart, "udcTimeout", 300);
 if (udcCacheTimeout() < timeout)
     udcSetCacheTimeout(timeout);
 knetUdcInstall();
 
 measureTiming = cartUsualBoolean(cart, "measureTiming", measureTiming);
 
 char *submitLabel = cgiOptionalString("submit");
 char *newExampleButton = cgiOptionalString("exampleButton");
 if ((submitLabel && sameString(submitLabel, "try example")) ||
     (newExampleButton && sameString(newExampleButton, "Upload Example File")))
     {
     char *exampleFile = phyloPlaceDbSettingPath(db, "exampleFile");
     struct lineFile *lf = lineFileOpen(exampleFile, TRUE);