9c4b7225d91f5180fcddbecd843b71c4e6503688
angie
  Tue Mar 30 10:39:22 2021 -0700
Add subtree JSON URLs to single-subtree JSON, using Auspice's new url attribute.  Allow up to 1000 VCF samples and extend timeout to 15 minutes.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 80ed662..0583fc3 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -23,31 +23,31 @@
 #include "pipeline.h"
 #include "psl.h"
 #include "ra.h"
 #include "regexHelper.h"
 #include "trashDir.h"
 #include "vcf.h"
 
 // Globals:
 static boolean measureTiming = FALSE;
 
 // wuhCor1-specific:
 char *chrom = "NC_045512v2";
 int chromSize = 29903;
 
 // Parameter constants:
-int maxGenotypes = 100;        // Upper limit on number of samples user can upload at once.
+int maxGenotypes = 1000;        // Upper limit on number of samples user can upload at once.
 boolean showBestNodePaths = FALSE;
 boolean showParsimonyScore = FALSE;
 
 
 char *phyloPlaceDbSetting(char *db, char *settingName)
 /* Return a setting from hgPhyloPlaceData/<db>/config.ra or NULL if not found. */
 {
 static struct hash *configHash = NULL;
 static char *configDb = NULL;
 if (!sameOk(db, configDb))
     {
     char configFile[1024];
     safef(configFile, sizeof configFile, PHYLOPLACE_DATA_DIR "/%s/config.ra", db);
     if (fileExists(configFile))
         {
@@ -633,30 +633,32 @@
             headerWordCount = chopString(headerLine, "\t", NULL, 0);
             AllocArray(headerWords, headerWordCount);
             chopString(headerLine, "\t", headerWords, headerWordCount);
             }
         else
             errAbort("Missing header line from metadataFile %s", metadataFile);
         }
     int strainIx = stringArrayIx("strain", headerWords, headerWordCount);
     int epiIdIx = stringArrayIx("gisaid_epi_isl", headerWords, headerWordCount);
     int genbankIx = stringArrayIx("genbank_accession", headerWords, headerWordCount);
     int dateIx = stringArrayIx("date", headerWords, headerWordCount);
     int authorIx = stringArrayIx("authors", headerWords, headerWordCount);
     int nCladeIx = stringArrayIx("Nextstrain_clade", headerWords, headerWordCount);
     int gCladeIx = stringArrayIx("GISAID_clade", headerWords, headerWordCount);
     int lineageIx = stringArrayIx("pangolin_lineage", headerWords, headerWordCount);
+    if (lineageIx < 0)
+        lineageIx = stringArrayIx("pango_lineage", headerWords, headerWordCount);
     int countryIx = stringArrayIx("country", headerWords, headerWordCount);
     int divisionIx = stringArrayIx("division", headerWords, headerWordCount);
     int locationIx = stringArrayIx("location", headerWords, headerWordCount);
     int countryExpIx = stringArrayIx("country_exposure", headerWords, headerWordCount);
     int divExpIx = stringArrayIx("division_exposure", headerWords, headerWordCount);
     int origLabIx = stringArrayIx("originating_lab", headerWords, headerWordCount);
     int subLabIx = stringArrayIx("submitting_lab", headerWords, headerWordCount);
     int regionIx = stringArrayIx("region", headerWords, headerWordCount);
     while (lineFileNext(lf, &line, NULL))
         {
         char *words[headerWordCount];
         int wordCount = chopTabs(line, words);
         lineFileExpectWords(lf, headerWordCount, wordCount);
         struct sampleMetadata *met;
         AllocVar(met);
@@ -2175,57 +2177,64 @@
         warn("Sorry, can't recognize your uploaded data as FASTA or VCF.\n");
     }
 lineFileClose(&lf);
 if (vcfTn)
     {
     fflush(stdout);
     int seqCount = slCount(seqInfoList);
     // Don't make smaller subtrees when a large number of sequences are uploaded.
     if (seqCount > MAX_SEQ_DETAILS)
         subtreeSize = 0;
     struct usherResults *results = runUsher(usherPath, usherAssignmentsPath, vcfTn->forCgi,
                                             subtreeSize, sampleIds, bigTree->condensedNodes,
                                             &startTime);
     if (results->singleSubtreeInfo)
         {
+        puts("<p></p>");
         readQcThresholds(db);
         int subtreeCount = slCount(results->subtreeInfoList);
         // Sort subtrees by number of user samples (largest first).
         slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
         // Make Nextstrain/auspice JSON file for each subtree.
         char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
         struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
         struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
         struct hash *sampleMetadata = getSampleMetadata(metadataFile);
-        struct tempName *singleSubtreeJsonTn;
-        AllocVar(singleSubtreeJsonTn);
-        trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
-        treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
-                          singleSubtreeJsonTn->forCgi, source);
+        struct hash *sampleUrls = hashNew(0);
         struct tempName *jsonTns[subtreeCount];
         struct subtreeInfo *ti;
         int ix;
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             AllocVar(jsonTns[ix]);
             char subtreeName[512];
             safef(subtreeName, sizeof(subtreeName), "subtreeAuspice%d", ix+1);
             trashDirFile(jsonTns[ix], "ct", subtreeName, ".json");
-            treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, jsonTns[ix]->forCgi,
-                              source);
+            treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, NULL,
+                              jsonTns[ix]->forCgi, source);
+            // Add a link for every sample to this subtree, so the single-subtree JSON can
+            // link to subtree JSONs
+            char *subtreeUrl = nextstrainUrlFromTn(jsonTns[ix]);
+            struct slName *sample;
+            for (sample = ti->subtreeUserSampleIds;  sample != NULL;  sample = sample->next)
+                hashAdd(sampleUrls, sample->name, subtreeUrl);
             }
-        puts("<p></p>");
+        struct tempName *singleSubtreeJsonTn;
+        AllocVar(singleSubtreeJsonTn);
+        trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
+        treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
+                          sampleUrls, singleSubtreeJsonTn->forCgi, source);
         struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
         if (seqCount > MAX_SEQ_DETAILS || subtreeCount > MAX_SUBTREE_BUTTONS)
             subtreeInfoForButtons = NULL;
         makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta);
         printf("<p>If you have metadata you wish to display, click a 'view subtree in "
                "Nextstrain' button, and then you can drag on a CSV file to "
                "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
                "</p>\n");
 
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
         struct tempName *ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
                                                   source, fontHeight, &sampleTree, &startTime);
 
         // Make a sample summary TSV file and accumulate S gene changes