src/hg/hgPhyloPlace/phyloPlace.c eeac40956b3dd6611f58aeb847ff5c404d3ce883

eeac40956b3dd6611f58aeb847ff5c404d3ce883
angie
  Fri Dec 1 09:05:41 2023 -0800
Support trees whose reference/root is not from a db or hub, but rather a custom .2bit file.
This means that the selected pathogen may or may not also be a db/hub, so the selection interacts with the db cart variable but does not always match it.
Also, in phyloPlace.c, update RSV metadata column headers (RGCC lineages replace Ramaekers 2020 clades).

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 5b12724..116b270 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -26,59 +26,61 @@
 #include "pipeline.h"
 #include "psl.h"
 #include "pthreadWrap.h"
 #include "ra.h"
 #include "regexHelper.h"
 #include "trackHub.h"
 #include "trashDir.h"
 #include "vcf.h"
 
 // Globals:
 static boolean measureTiming = FALSE;
 
 // Parameter constants:
 int maxGenotypes = 1000;        // Upper limit on number of samples user can upload at once.
 boolean showParsimonyScore = FALSE;
+int minSamplesForOwnTree = 3;  // If user uploads at least this many samples, show tree for them.
 
 
 struct slName *phyloPlaceDbList(struct cart *cart)
-/* Each subdirectory of PHYLOPLACE_DATA_DIR that contains a config.ra file is a supported db
- * or track hub name (without the hub_number_ prefix).  Return a list of them, or NULL if none
- * are found. */
+/* Each subdirectory of PHYLOPLACE_DATA_DIR that contains a config.ra file is a supported data
+ * source and might also be a db or track hub name (without the hub_number_ prefix).  Return a
+ * list of them, or NULL if none are found. */
 {
 struct slName *dbList = NULL;
 // I was hoping the pattern would be wildMatch'd only against filenames so I could use "config.ra",
 // but both directories and files must match the pattern so must use "*".
 struct slName *dataDirPaths = pathsInDirAndSubdirs(PHYLOPLACE_DATA_DIR, "*");
 struct slName *path;
 for (path = dataDirPaths;  path != NULL;  path = path->next)
     {
     if (endsWith(path->name, "/config.ra"))
         {
         char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN];
         splitPath(path->name, dir, name, extension);
         if (endsWith(dir, "/"))
             dir[strlen(dir)-1] = '\0';
         char *db = strrchr(dir, '/');
         if (db == NULL)
             db = dir;
         else
             db++;
         if (hDbExists(db))
             slNameAddHead(&dbList, db);
         else
             {
+            // Not a db -- see if it's a hub that is already connected:
             struct trackHubGenome *hubGenome = trackHubGetGenomeUndecorated(db);
             if (hubGenome != NULL)
                 slNameAddHead(&dbList, hubGenome->name);
             else
                 {
                 // Not connected to session currently.  If the name looks like an NCBI Assembly ID
                 // then try connecting to the corresponding UCSC assembly hub.
                 regmatch_t substrs[5];
                 if (regexMatchSubstr(db, "^(GC[AF])_([0-9]{3})([0-9]{3})([0-9]{3})\\.[0-9]$",
                                      substrs, ArraySize(substrs)))
                     {
                     char gcPrefix[4], first3[4], mid3[4], last3[4];
                     regexSubstringCopy(db, substrs[1], gcPrefix, sizeof gcPrefix);
                     regexSubstringCopy(db, substrs[2], first3, sizeof first3);
                     regexSubstringCopy(db, substrs[3], mid3, sizeof mid3);
@@ -87,30 +89,35 @@
                                                          "%s/%s/hub.txt",
                                                          gcPrefix, first3, mid3, last3, db);
                     // Use cart variables to pretend user clicked to connect to this hub.
                     cartSetString(cart, hgHubDataText, dy->string);
                     cartSetString(cart, hgHubGenome, db);
                     struct errCatch *errCatch = errCatchNew();
                     char *hubDb = NULL;
                     if (errCatchStart(errCatch))
                         {
                         hubDb = hubConnectLoadHubs(cart);
                         }
                     errCatchEnd(errCatch);
                     if (hubDb != NULL)
                         slNameAddHead(&dbList, hubDb);
                     }
+                else
+                    {
+                    // Doesn't appear to be a hub; count on its config to specify a .2bit file.
+                    slNameAddHead(&dbList, db);
+                    }
                 }
             }
         }
     }
 // Reverse alphabetical sort to put wuhCor1/SARS-CoV-2 first
 slNameSort(&dbList);
 slReverse(&dbList);
 return dbList;
 }
 
 char *phyloPlaceDbSetting(char *db, char *settingName)
 /* Return a setting from hgPhyloPlaceData/<db>/config.ra or NULL if not found. */
 {
 static struct hash *configHash = NULL;
 static char *configDb = NULL;
@@ -775,34 +782,34 @@
         lineageIx = stringArrayIx("pango_lineage", headerWords, headerWordCount);
     int countryIx = stringArrayIx("country", headerWords, headerWordCount);
     int divisionIx = stringArrayIx("division", headerWords, headerWordCount);
     int locationIx = stringArrayIx("location", headerWords, headerWordCount);
     int countryExpIx = stringArrayIx("country_exposure", headerWords, headerWordCount);
     int divExpIx = stringArrayIx("division_exposure", headerWords, headerWordCount);
     int origLabIx = stringArrayIx("originating_lab", headerWords, headerWordCount);
     int subLabIx = stringArrayIx("submitting_lab", headerWords, headerWordCount);
     int regionIx = stringArrayIx("region", headerWords, headerWordCount);
     int nCladeUsherIx = stringArrayIx("Nextstrain_clade_usher", headerWords, headerWordCount);
     int lineageUsherIx = stringArrayIx("pango_lineage_usher", headerWords, headerWordCount);
     int authorsIx = stringArrayIx("authors", headerWords, headerWordCount);
     int pubsIx = stringArrayIx("publications", headerWords, headerWordCount);
     int nLineageIx = stringArrayIx("Nextstrain_lineage", headerWords, headerWordCount);
     int gnCladeIx = stringArrayIx("goya_nextclade", headerWords, headerWordCount);
-    int rnCladeIx = stringArrayIx("ramaekers_nextclade", headerWords, headerWordCount);
+    int rnCladeIx = stringArrayIx("GCC_nextclade", headerWords, headerWordCount);
     int guCladeIx = stringArrayIx("goya_usher", headerWords, headerWordCount);
-    int ruCladeIx = stringArrayIx("ramaekers_usher", headerWords, headerWordCount);
-    int rtCladeIx = stringArrayIx("ramaekers_tableS1", headerWords, headerWordCount);
+    int ruCladeIx = stringArrayIx("GCC_usher", headerWords, headerWordCount);
+    int rtCladeIx = stringArrayIx("GCC_assigned_2023-11", headerWords, headerWordCount);
     while (lineFileNext(lf, &line, NULL))
         {
         char *words[headerWordCount];
         int wordCount = chopTabs(line, words);
         lineFileExpectWords(lf, headerWordCount, wordCount);
         struct sampleMetadata *met;
         AllocVar(met);
         if (strainIx >= 0)
             met->strain = cloneString(words[strainIx]);
         if (epiIdIx >= 0)
             met->epiId = cloneString(words[epiIdIx]);
         if (genbankIx >= 0 && !sameString("?", words[genbankIx]))
             met->gbAcc = cloneString(words[genbankIx]);
         if (dateIx >= 0)
             met->date = cloneString(words[dateIx]);
@@ -828,41 +835,41 @@
             met->origLab = cloneString(words[origLabIx]);
         if (subLabIx >= 0)
             met->subLab = cloneString(words[subLabIx]);
         if (regionIx >= 0)
             met->region = cloneString(words[regionIx]);
         if (nCladeUsherIx >= 0)
             met->nCladeUsher = cloneString(words[nCladeUsherIx]);
         if (lineageUsherIx >= 0)
             met->lineageUsher = cloneString(words[lineageUsherIx]);
         if (authorsIx >= 0)
             met->authors = cloneString(words[authorsIx]);
         if (pubsIx >= 0)
             met->pubs = cloneString(words[pubsIx]);
         if (nLineageIx >= 0)
             met->nLineage = cloneString(words[nLineageIx]);
-        // For RSV, use lineage for Ramaekers clades and nClade for Goya clades.
+        // For RSV, use lineage for GCC clades and nClade for Goya clades.
         // This is getting ugly and we really should specify metadata columns in config.ra files.
         if (gnCladeIx >= 0)
             met->nClade = cloneString(words[gnCladeIx]);
         if (rnCladeIx >= 0)
             met->lineage = cloneString(words[rnCladeIx]);
         if (guCladeIx >= 0)
             met->nCladeUsher = cloneString(words[guCladeIx]);
         if (ruCladeIx >= 0)
             met->lineageUsher = cloneString(words[ruCladeIx]);
-        // Uglier still, use gClade to store Ramaekers Table S1 designations because it's left over.
+        // Uglier still, use gClade to store GCC designations because it's left over.
         if (rtCladeIx >= 0)
             met->gClade = cloneString(words[rtCladeIx]);
         // If epiId and/or genbank ID is included, we'll probably be using that to look up items.
         if (epiIdIx >= 0 && !isEmpty(words[epiIdIx]))
             hashAdd(sampleMetadata, words[epiIdIx], met);
         if (genbankIx >= 0 && !isEmpty(words[genbankIx]) && !sameString("?", words[genbankIx]))
             {
             if (strchr(words[genbankIx], '.'))
                 {
                 // Index by versionless accession
                 char copy[strlen(words[genbankIx])+1];
                 safecpy(copy, sizeof copy, words[genbankIx]);
                 char *dot = strchr(copy, '.');
                 *dot = '\0';
                 hashAdd(sampleMetadata, copy, met);
@@ -1565,36 +1572,32 @@
              "Goya 2020</a> clade"
              TOOLTIP("The clade described in "
                      "<a href='https://doi.org/10.1111/irv.12715' target=_blank>Goya et al. 2020, "
                      "&quot;Toward unified molecular surveillance of RSV: A proposal for "
                      "genotype definition&quot;</a> "
                      "assigned by placement in the tree"));
     else
         puts("</th>\n<th>Nextstrain lineage"
              TOOLTIP("The Nextstrain lineage assigned by "
                      "placement in the tree"));
     }
 if (gotLineages)
     {
     if (isRsv)
          puts("</th>\n<th><a href='https://doi.org/10.1093/ve/veaa052' target=_blank>"
-              "Ramaekers 2020</a> clade"
-             TOOLTIP("The clade described in "
-                     "<a href='https://doi.org/10.1093/ve/veaa052' target=_blank>"
-                     "Ramaekers et al. 2020, "
-                     "&quot;Towards a unified classification for human respiratory syncytial virus "
-                     "genotypes&quot;</a> "
+              "RGCC 2023</a> clade"
+             TOOLTIP("The RSV Genotyping Consensus Consortium clade (manuscript in preparation)"
                      "assigned by placement in the tree"));
    else
         puts("</th>\n<th>Pango lineage"
              TOOLTIP("The <a href='https://cov-lineages.org/' "
                      "target=_blank>Pango lineage</a> assigned to the sample by UShER"));
     }
 puts("</th>\n<th>Neighboring sample in tree"
      TOOLTIP("A sample already in the tree that is a child of the node at which the uploaded "
              "sample was placed, to give an example of a closely related sample")
      "</th>\n<th>Lineage of neighbor");
 if (sameString(db, "wuhCor1"))
     puts(TOOLTIP("The <a href='https://cov-lineages.org/' target=_blank>"
                  "Pango lineage</a> assigned by pangolin "
                  "to the nearest neighboring sample already in the tree"));
 else
@@ -3034,89 +3037,126 @@
 struct hash *sampleMetadata = getSampleMetadata(metadataFile);
 reportTiming(&startTime, "read sample metadata (in a pthread)");
 return sampleMetadata;
 }
 
 static pthread_t *mayStartLoaderPthread(char *filename, void *(*workerFunction)(void *))
 /* Fork off a child process that parses a file and returns the resulting data structure. */
 {
 pthread_t *pt;
 AllocVar(pt);
 if (! pthreadMayCreate(pt, NULL, workerFunction, filename))
     pt = NULL;
 return pt;
 }
 
-char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
+static struct dnaSeq *getChromSeq(char *db, char *refName)
+/* Get the reference sequence for refName, using a .2bit file if configured,
+ * otherwise hdb lib functions (requires refName happens to be a real db or hub). */
+{
+char *twoBitName = phyloPlaceDbSettingPath(refName, "twoBitFile");
+char *chrom = phyloPlaceDbSetting(refName, "chrom");
+struct dnaSeq *seq = NULL;
+if (isNotEmpty(twoBitName) && fileExists(twoBitName))
+    {
+    struct slName *seqNames = twoBitSeqNames(twoBitName);
+    if (isEmpty(chrom))
+        chrom = cloneString(seqNames->name);
+    struct twoBitFile *tbf = twoBitOpen(twoBitName);
+    seq = twoBitReadSeqFrag(tbf, chrom, 0, 0);
+    // Convert to lower case so genoFind doesn't index it as containing no tiles.
+    tolowers(seq->dna);
+    twoBitClose(&tbf);
+    slNameFreeList(&seqNames);
+    }
+else if (sameString(db, refName))
+    {
+    if (isEmpty(chrom))
+        chrom = hDefaultChrom(db);
+    seq = hChromSeq(db, chrom, 0, hChromSize(db, chrom));
+    }
+else
+    errAbort("No twoBitFile or db/hub found for %s", refName);
+return seq;
+}
+
+static struct phyloTree *uploadedSamplesTree(char *singleSubtreeFile, struct slName *sampleIds)
+/* If the user uploaded enough samples to make a meaningful tree, then read in singleSubtreeFile
+ * and prune all nodes that have no leaf descendants in sampleIds to get the tree of only the
+ * uploaded samples. */
+{
+struct phyloTree *tree = NULL;
+if (slCount(sampleIds) >= minSamplesForOwnTree)
+    {
+    tree = phyloOpenTree(singleSubtreeFile);
+    tree = phyloPruneToIds(tree, sampleIds);
+    }
+return tree;
+}
+
+char *phyloPlaceSamples(struct lineFile *lf, char *db, char *refName, char *defaultProtobuf,
                         boolean doMeasureTiming, int subtreeSize, int fontHeight,
                         boolean *retSuccess)
 /* Given a lineFile that contains either FASTA, VCF, or a list of sequence names/ids:
  * If FASTA/VCF, then prepare VCF for usher; if that goes well then run usher, report results,
  * make custom track files and return the top-level custom track file.
  * If list of seq names/ids, then attempt to find their full names in the protobuf, run matUtils
  * to make subtrees, show subtree results, and return NULL.  Set retSuccess to TRUE if we were
  * able to get at least some results for the user's input. */
 {
 char *ctFile = NULL;
 if (retSuccess)
     *retSuccess = FALSE;
 measureTiming = doMeasureTiming;
 int startTime = clock1000();
 struct tempName *vcfTn = NULL;
 struct slName *sampleIds = NULL;
 char *usherPath = getUsherPath(TRUE);
 char *protobufPath = NULL;
 char *source = NULL;
 char *metadataFile = NULL;
 char *aliasFile = NULL;
 char *sampleNameFile = NULL;
-struct treeChoices *treeChoices = loadTreeChoices(db);
-getProtobufMetadataSource(db, treeChoices, defaultProtobuf,
+struct treeChoices *treeChoices = loadTreeChoices(refName);
+getProtobufMetadataSource(refName, treeChoices, defaultProtobuf,
                           &protobufPath, &metadataFile, &source, &aliasFile, &sampleNameFile);
 reportTiming(&startTime, "start up and find the tree etc. files");
 struct mutationAnnotatedTree *bigTree = NULL;
 lineFileCarefulNewlines(lf);
-char *chrom = hDefaultChrom(db);
-//#*** Hack for influenza
-if (stringIn("GCF_000865085.1", db))
-    chrom = "NC_007366.1";
-else if (stringIn("GCF_001343785.1", db))
-    chrom = "NC_026433.1";
-int chromSize = hChromSize(db, chrom);
-struct slName **maskSites = getProblematicSites(db, chrom, chromSize);
+struct dnaSeq *refGenome = getChromSeq(db, refName);
+struct slName **maskSites = getProblematicSites(refName, refGenome->name, refGenome->size);
 //#*** TODO: add CGI param option for this almost-never-needed tweak:
 if (0)
     {
     bigTree = parseParsimonyProtobuf(protobufPath);
     reportTiming(&startTime, "parse protobuf file (at startup, for excluding informativeBases "
                  "from maskSites)");
-    informativeBasesFromTree(bigTree->tree, maskSites, chromSize);
+    informativeBasesFromTree(bigTree->tree, maskSites, refGenome->size);
     reportTiming(&startTime, "remove any informative bases in tree from maskSites");
     }
-struct dnaSeq *refGenome = hChromSeq(db, chrom, 0, chromSize);
 boolean isFasta = FALSE;
 boolean subtreesOnly = FALSE;
 struct seqInfo *seqInfoList = NULL;
 if (lfLooksLikeFasta(lf))
     {
     struct slPair *failedSeqs;
     struct slPair *failedPsls;
     struct hash *treeNames = NULL;
     // We need to check uploaded names in fasta only for original usher, not usher-sampled(-server).
-    if (!serverIsConfigured(db) && !endsWith(usherPath, "-sampled"))
+    if (!serverIsConfigured(refName) && !endsWith(usherPath, "-sampled"))
         treeNames = getTreeNames(sampleNameFile, protobufPath, &bigTree, FALSE, &startTime);
-    vcfTn = vcfFromFasta(lf, db, refGenome, maskSites, treeNames,
+    vcfTn = vcfFromFasta(lf, refName, refGenome, maskSites, treeNames,
                          &sampleIds, &seqInfoList, &failedSeqs, &failedPsls, &startTime);
     if (failedSeqs)
         {
         puts("<p>");
         struct slPair *fail;
         for (fail = failedSeqs;  fail != NULL;  fail = fail->next)
             printf("%s<br>\n", fail->name);
         puts("</p>");
         }
     if (failedPsls)
         {
         puts("<p>");
         struct slPair *fail;
         for (fail = failedPsls;  fail != NULL;  fail = fail->next)
             printf("%s<br>\n", fail->name);
@@ -3142,166 +3182,171 @@
     reportTiming(&startTime, "look up uploaded sample names");
     }
 lineFileClose(&lf);
 if (sampleIds == NULL)
     {
     return ctFile;
     }
 
 // Kick off child thread to load metadata simultaneously with running usher or matUtils.
 pthread_t *metadataPthread = mayStartLoaderPthread(metadataFile, loadMetadataWorker);
 
 struct usherResults *results = NULL;
 if (vcfTn)
     {
     fflush(stdout);
-    results = runUsher(db, usherPath, protobufPath, vcfTn->forCgi, subtreeSize, &sampleIds,
+    results = runUsher(refName, usherPath, protobufPath, vcfTn->forCgi, subtreeSize, &sampleIds,
                        treeChoices, &startTime);
     }
 else if (subtreesOnly)
     {
     char *matUtilsPath = getMatUtilsPath(TRUE);
-    results = runMatUtilsExtractSubtrees(db, matUtilsPath, protobufPath, subtreeSize,
+    results = runMatUtilsExtractSubtrees(refName, matUtilsPath, protobufPath, subtreeSize,
                                          sampleIds, treeChoices, &startTime);
     }
 
 struct hash *sampleMetadata = NULL;
 if (metadataPthread)
     {
     pthreadJoin(metadataPthread, (void **)(&sampleMetadata));
     reportTiming(&startTime, "wait for sample metadata loading thread to finish");
     }
 else
     {
     // We really need metadata -- load it the slow way.
     sampleMetadata = getSampleMetadata(metadataFile);
     reportTiming(&startTime, "load sample metadata without pthread");
     }
 
 if (results && results->singleSubtreeInfo)
     {
     if (retSuccess)
         *retSuccess = TRUE;
     puts("<p></p>");
-    readQcThresholds(db);
+    readQcThresholds(refName);
     int subtreeCount = slCount(results->subtreeInfoList);
     // Sort subtrees by number of user samples (largest first).
     slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
     // Make Nextstrain/auspice JSON file for each subtree.
-    char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
+    char *bigGenePredFile = phyloPlaceDbSettingPath(refName, "bigGenePredFile");
     struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
-    struct seqWindow *gSeqWin = memSeqWindowNew(chrom, refGenome->dna);
+    struct seqWindow *gSeqWin = memSeqWindowNew(refGenome->name, refGenome->dna);
     struct hash *sampleUrls = hashNew(0);
     struct tempName *jsonTns[subtreeCount];
     struct subtreeInfo *ti;
     int ix;
     for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
         {
         AllocVar(jsonTns[ix]);
         char subtreeName[512];
         safef(subtreeName, sizeof(subtreeName), "subtreeAuspice%d", ix+1);
         trashDirFile(jsonTns[ix], "ct", subtreeName, ".json");
-        treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, NULL,
+        treeToAuspiceJson(ti, refName, geneInfoList, gSeqWin, sampleMetadata, NULL,
                           results->samplePlacements, jsonTns[ix]->forCgi, source);
         // Add a link for every sample to this subtree, so the single-subtree JSON can
         // link to subtree JSONs
         char *subtreeUrl = nextstrainUrlFromTn(jsonTns[ix]);
         struct slName *sample;
         for (sample = ti->subtreeUserSampleIds;  sample != NULL;  sample = sample->next)
             hashAdd(sampleUrls, sample->name, subtreeUrl);
         }
     struct tempName *singleSubtreeJsonTn;
     AllocVar(singleSubtreeJsonTn);
     trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
-    treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
+    treeToAuspiceJson(results->singleSubtreeInfo, refName, geneInfoList, gSeqWin, sampleMetadata,
                       sampleUrls, results->samplePlacements, singleSubtreeJsonTn->forCgi, source);
     reportTiming(&startTime, "make Auspice JSON");
     struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
     if (subtreeCount > MAX_SUBTREE_BUTTONS)
         subtreeInfoForButtons = NULL;
+    boolean canDoCustomTracks = (!subtreesOnly && sameString(db, refName));
     makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta,
-                  !subtreesOnly);
+                  canDoCustomTracks);
     printf("<p>If you have metadata you wish to display, click a 'view subtree in "
            "Nextstrain' button, and then you can drag on a CSV file to "
            "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
            "</p>\n");
     puts("<p><em>Note: "
          "The Nextstrain subtree views, and Download files below, are temporary files and will "
          "expire within two days.  "
          "Please download the Nextstrain subtree JSON files if you will want to view them "
          "again in the future.  The JSON files can be drag-dropped onto "
          "<a href='https://auspice.us/' target=_blank>https://auspice.us/</a>."
          "</em></p>");
 
     struct tempName *tsvTn = NULL, *sTsvTn = NULL;
     struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
                                                 &startTime);
     struct tempName *ctTn = NULL;
     if (subtreesOnly)
         {
-        summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, db, subtreeSize);
+        summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, refName, subtreeSize);
         reportTiming(&startTime, "describe subtrees");
         }
     else
         {
         findNearestNeighbors(results, sampleMetadata);
         reportTiming(&startTime, "find nearest neighbors");
 
+        char *singleSubtreeFile = results->singleSubtreeInfo->subtreeTn->forCgi;
+        struct phyloTree *sampleTree = uploadedSamplesTree(singleSubtreeFile, sampleIds);
+        if (sameString(db, refName))
+            {
             // Make custom tracks for uploaded samples and subtree(s).
-        struct phyloTree *sampleTree = NULL;
             ctTn = writeCustomTracks(db, vcfTn, results, sampleIds, source, fontHeight,
-                                 &sampleTree, &startTime);
+                                     sampleTree, &startTime);
+            }
 
         // Make a sample summary TSV file and accumulate S gene changes
         struct hash *seqInfoHash = hashFromSeqInfoListAndIds(seqInfoList, sampleIds);
         addSampleMutsFromSeqInfo(results->samplePlacements, seqInfoHash);
         struct hash *spikeChanges = hashNew(0);
         tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoHash,
                                 geneInfoList, gSeqWin, spikeChanges, &startTime);
         sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
         downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
                      zipTn->forHtml);
 
         int seqCount = slCount(seqInfoList);
         if (seqCount <= MAX_SEQ_DETAILS)
             {
-            char *refAcc = cloneString(chrom);
+            char *refAcc = cloneString(refGenome->name);
             if (regexMatch(refAcc, "v[0-9]+$"))
                 {
                 char *v = strrchr(refAcc, 'v');
                 assert(v != NULL);
                 *v = '.';
                 }
-            summarizeSequences(seqInfoList, isFasta, results, jsonTns, refAcc, db, subtreeSize);
+            summarizeSequences(seqInfoList, isFasta, results, jsonTns, refAcc, refName, subtreeSize);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
                     printf("%d related samples", subtreeUserSampleCount);
                 else if (subtreeCount > 1)
                     printf("Unrelated sample");
                 printf("</h3>\n");
                 makeNextstrainButtonN("viewNextstrainSub", ix, subtreeUserSampleCount, subtreeSize,
                                       jsonTns);
                 puts("<br>");
                 // Make a sub-subtree with only user samples for display:
                 struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
                 subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);
                 describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements,
-                                         subtree, sampleMetadata, source, refAcc, db);
+                                         subtree, sampleMetadata, source, refAcc, refName);
                 }
             reportTiming(&startTime, "describe placements");
             }
         else
             printf("<p>(Skipping details; "
                    "you uploaded %d sequences, and details are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);
         }
 
     puts("<h3>Downloads</h3>");
     if (! subtreesOnly)
         {
         puts("<ul>");
         // Offer big tree w/new samples for download
@@ -3332,28 +3377,28 @@
         puts(" (Newick file)</a>");
         printf("<li><a href='%s' download>Auspice JSON for subtree with %s",
                jsonTns[ix]->forHtml, ti->subtreeUserSampleIds->name);
         if (subtreeUserSampleCount > 10)
             printf(" and %d other samples", subtreeUserSampleCount - 1);
         else
             {
             struct slName *sln;
             for (sln = ti->subtreeUserSampleIds->next;  sln != NULL;  sln = sln->next)
                 printf(", %s", sln->name);
             }
         puts(" (JSON file)</a>");
         }
     puts("</ul>");
 
-    if (!subtreesOnly)
+    if (ctTn != NULL)
         {
         // Notify in opposite order of custom track creation.
         puts("<h3>Custom tracks for viewing in the Genome Browser</h3>");
         printf("<p>Added custom track of uploaded samples.</p>\n");
         if (subtreeCount > 0 && subtreeCount <= MAX_SUBTREE_CTS)
             printf("<p>Added %d subtree custom track%s.</p>\n",
                    subtreeCount, (subtreeCount > 1 ? "s" : ""));
         ctFile = urlFromTn(ctTn);
         }
     }
 return ctFile;
 }