src/hg/hgPhyloPlace/phyloPlace.c afafa0301ea4b14fbe1fbd5aa379c5351ecd640d

afafa0301ea4b14fbe1fbd5aa379c5351ecd640d
angie
  Tue Aug 2 19:41:44 2022 -0700
Add support for non-wuhCor1 genomes (e.g. monkeypox GenArk hub).
* Search in hgPhyloPlaceData for config.ra files, taking assembly name (minus hub prefix) from directory name.
* Add a menu input to the main page for switching between supported genomes if there are more than one.
* Replace hardcoded values or global vars with dnaSeq attributes, assembly metadata queries or new config.ra settings.
* Separate out SARS-CoV-2-specific help text like GISAID/CNCB descriptions.
* Support metadata columns for GenBank-specific stuff & Nextstrain lineages (for MPXV).
* also a little refactoring in runUsher in preparation for supporting usher server mode: parse new placement info file so we don't have to parse that data form usher stderr output.

TODO: update Nextstrain/Auspice JSON output to use appropriate metadata columns and support monkeypox genes.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index ef70bd4..c7ebb67 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -12,73 +12,139 @@
 #include "hash.h"
 #include "hgConfig.h"
 #include "htmshell.h"
 #include "hui.h"
 #include "iupac.h"
 #include "jsHelper.h"
 #include "linefile.h"
 #include "obscure.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "phyloTree.h"
 #include "pipeline.h"
 #include "psl.h"
 #include "ra.h"
 #include "regexHelper.h"
+#include "trackHub.h"
 #include "trashDir.h"
 #include "vcf.h"
 
 // Globals:
 static boolean measureTiming = FALSE;
 
-// wuhCor1-specific:
-char *chrom = "NC_045512v2";
-int chromSize = 29903;
-
 // Parameter constants:
 int maxGenotypes = 1000;        // Upper limit on number of samples user can upload at once.
 boolean showParsimonyScore = FALSE;
 
 
+struct slName *phyloPlaceDbList()
+/* Each subdirectory of PHYLOPLACE_DATA_DIR that contains a config.ra file is a supported db
+ * or track hub name (without the hub_number_ prefix).  Return a list of them, or NULL if none
+ * are found. */
+{
+struct slName *dbList = NULL;
+// I was hoping the pattern would be wildMatch'd only against filenames so I could use "config.ra",
+// but both directories and files must match the pattern so must use "*".
+struct slName *dataDirPaths = pathsInDirAndSubdirs(PHYLOPLACE_DATA_DIR, "*");
+struct slName *path;
+for (path = dataDirPaths;  path != NULL;  path = path->next)
+    {
+    if (endsWith(path->name, "/config.ra"))
+        {
+        char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN];
+        splitPath(path->name, dir, name, extension);
+        if (endsWith(dir, "/"))
+            dir[strlen(dir)-1] = '\0';
+        char *db = strrchr(dir, '/');
+        if (db == NULL)
+            db = dir;
+        else
+            db++;
+        if (hDbExists(db))
+            slNameAddHead(&dbList, db);
+        else
+            {
+            struct trackHubGenome *hubGenome = trackHubGetGenomeUndecorated(db);
+            if (hubGenome != NULL)
+                slNameAddHead(&dbList, hubGenome->name);
+            else
+                {
+                // Not connected to session currently.  If the name looks like an NCBI Assembly ID
+                // then try connecting to the corresponding UCSC assembly hub.
+                regmatch_t substrs[5];
+                if (regexMatchSubstr(db, "^(GC[AF])_([0-9]{3})([0-9]{3})([0-9]{3})\\.[0-9]$",
+                                     substrs, ArraySize(substrs)))
+                    {
+                    char gcPrefix[4], first3[4], mid3[4], last3[4];
+                    regexSubstringCopy(db, substrs[1], gcPrefix, sizeof gcPrefix);
+                    regexSubstringCopy(db, substrs[2], first3, sizeof first3);
+                    regexSubstringCopy(db, substrs[3], mid3, sizeof mid3);
+                    regexSubstringCopy(db, substrs[4], last3, sizeof last3);
+                    struct dyString *dy = dyStringCreate("https://hgdownload.soe.ucsc.edu/hubs/%s/%s/%s/"
+                                                         "%s/%s/hub.txt",
+                                                         gcPrefix, first3, mid3, last3, db);
+                    struct errCatch *errCatch = errCatchNew();
+                    struct trackHub *hub = NULL;
+                    if (errCatchStart(errCatch))
+                        {
+                        hub = trackHubOpen(dy->string, db);
+                        }
+                    errCatchEnd(errCatch);
+                    if (hub != NULL && hub->genomeList != NULL &&
+                        endsWith(hub->genomeList->name, db))
+                        slNameAddHead(&dbList, hub->genomeList->name);
+                    }
+                }
+            }
+        }
+    }
+// Reverse alphabetical sort to put wuhCor1/SARS-CoV-2 first
+slNameSort(&dbList);
+slReverse(&dbList);
+return dbList;
+}
+
 char *phyloPlaceDbSetting(char *db, char *settingName)
 /* Return a setting from hgPhyloPlaceData/<db>/config.ra or NULL if not found. */
 {
 static struct hash *configHash = NULL;
 static char *configDb = NULL;
-if (!sameOk(db, configDb))
+char *dbBase = trackHubSkipHubName(db);
+if (!sameOk(dbBase, configDb))
     {
     char configFile[1024];
-    safef(configFile, sizeof configFile, PHYLOPLACE_DATA_DIR "/%s/config.ra", db);
+    safef(configFile, sizeof configFile, PHYLOPLACE_DATA_DIR "/%s/config.ra", dbBase);
     if (fileExists(configFile))
         {
         configHash = raReadSingle(configFile);
-        configDb = cloneString(db);
+        configDb = cloneString(dbBase);
         }
     }
-if (sameOk(db, configDb))
+if (sameOk(dbBase, configDb))
     return cloneString(hashFindVal(configHash, settingName));
 return NULL;
 }
 
 char *phyloPlaceDbSettingPath(char *db, char *settingName)
 /* Return path to a file named by a setting from hgPhyloPlaceData/<db>/config.ra,
  * or NULL if not found.  (Append hgPhyloPlaceData/<db>/ to the beginning of relative path) */
 {
 char *fileName = phyloPlaceDbSetting(db, settingName);
 if (isNotEmpty(fileName) && fileName[0] != '/' && !fileExists(fileName))
     {
-    struct dyString *dy = dyStringCreate(PHYLOPLACE_DATA_DIR "/%s/%s", db, fileName);
+    struct dyString *dy = dyStringCreate(PHYLOPLACE_DATA_DIR "/%s/%s",
+                                         trackHubSkipHubName(db), fileName);
     if (fileExists(dy->string))
         return dyStringCannibalize(&dy);
     else
         return NULL;
     }
 return fileName;
 }
 
 char *getUsherPath(boolean abortIfNotFound)
 /* Return hgPhyloPlaceData/usher if it exists, else NULL.  Do not free the returned value. */
 {
 char *usherPath = PHYLOPLACE_DATA_DIR "/usher";
 if (fileExists(usherPath))
     return usherPath;
 else if (abortIfNotFound)
@@ -94,52 +160,54 @@
     return matUtilsPath;
 else if (abortIfNotFound)
     errAbort("Missing matUtils executable (expected to be at %s)", matUtilsPath);
 return NULL;
 }
 
 char *getUsherAssignmentsPath(char *db, boolean abortIfNotFound)
 /* If <db>/config.ra specifies the file for use by usher --load-assignments and the file exists,
  * return the path, else NULL.  Do not free the returned value. */
 {
 char *usherAssignmentsPath = phyloPlaceDbSettingPath(db, "usherAssignmentsFile");
 if (isNotEmpty(usherAssignmentsPath) && fileExists(usherAssignmentsPath))
     return usherAssignmentsPath;
 else if (abortIfNotFound)
     errAbort("Missing usher protobuf file (config setting in "
-             PHYLOPLACE_DATA_DIR "/%s/config.ra = %s", db, usherAssignmentsPath);
+             PHYLOPLACE_DATA_DIR "/%s/config.ra = %s",
+             trackHubSkipHubName(db), usherAssignmentsPath);
 return NULL;
 }
 
 //#*** This needs to go in a lib so CGIs know whether to include it in the menu. needs better name.
 boolean hgPhyloPlaceEnabled()
 /* Return TRUE if hgPhyloPlace is enabled in hg.conf and db wuhCor1 exists. */
 {
 char *cfgSetting = cfgOption("hgPhyloPlaceEnabled");
 boolean isEnabled = (isNotEmpty(cfgSetting) &&
                      differentWord(cfgSetting, "off") && differentWord(cfgSetting, "no"));
+//#*** TODO -- make less wuhCor1-specific
 return (isEnabled && hDbExists("wuhCor1"));
 }
 
 static void addPathIfNecessary(struct dyString *dy, char *db, char *fileName)
 /* If fileName exists, copy it into dy, else try hgPhyloPlaceData/<db>/fileName */
 {
 dyStringClear(dy);
 if (fileExists(fileName))
     dyStringAppend(dy, fileName);
 else
-    dyStringPrintf(dy, PHYLOPLACE_DATA_DIR "/%s/%s", db, fileName);
+    dyStringPrintf(dy, PHYLOPLACE_DATA_DIR "/%s/%s", trackHubSkipHubName(db), fileName);
 }
 
 struct treeChoices *loadTreeChoices(char *db)
 /* If <db>/config.ra specifies a treeChoices file, load it up, else return NULL. */
 {
 struct treeChoices *treeChoices = NULL;
 char *filename = phyloPlaceDbSettingPath(db, "treeChoices");
 if (isNotEmpty(filename) && fileExists(filename))
     {
     AllocVar(treeChoices);
     int maxChoices = 128;
     AllocArray(treeChoices->protobufFiles, maxChoices);
     AllocArray(treeChoices->metadataFiles, maxChoices);
     AllocArray(treeChoices->sources, maxChoices);
     AllocArray(treeChoices->descriptions, maxChoices);
@@ -224,31 +292,32 @@
 {
 if (node->numEdges > 0)
     {
     if (node->priv)
         {
         struct singleNucChange *snc, *sncs = node->priv;
         for (snc = sncs;  snc != NULL;  snc = snc->next)
             informativeBases[snc->chromStart] = TRUE;
         }
     int i;
     for (i = 0;  i < node->numEdges;  i++)
         rInformativeBasesFromTree(node->edges[i], informativeBases);
     }
 }
 
-static boolean *informativeBasesFromTree(struct phyloTree *bigTree, struct slName **maskSites)
+static boolean *informativeBasesFromTree(struct phyloTree *bigTree, struct slName **maskSites,
+                                         int chromSize)
 /* Return an array indexed by reference position with TRUE at positions that have a non-leaf
  * variant in bigTree.  If a position is in maskSites but is informative in bigTree then remove
  * it from maskSites because it was not masked (yet) back when the tree was built. */
 {
 boolean *informativeBases;
 AllocArray(informativeBases, chromSize);
 if (bigTree)
     {
     rInformativeBasesFromTree(bigTree, informativeBases);
     int i;
     for (i = 0;  i < chromSize;  i++)
         {
         if (maskSites[i] && informativeBases[i])
             maskSites[i] = NULL;
         }
@@ -349,34 +418,34 @@
                 {
                 lineFileAbort(lf, "VCF header did not include #CHROM line defining sample IDs for "
                               "genotype columns");
                 }
             int colCount = chopTabs(line, NULL);
             int genotypeCount = colCount - VCF_NUM_COLS_BEFORE_GENOTYPES;
             if (genotypeCount != sampleCount)
                 {
                 lineFileAbort(lf, "VCF header defines %d samples but there are %d genotype columns",
                               sampleCount, genotypeCount);
                 }
             char *words[colCount];
             chopTabs(line, words);
             //#*** TODO: check that POS is sorted
             int pos = strtol(words[1], NULL, 10);
-            if (pos > chromSize)
+            if (pos > refGenome->size)
                 {
                 lineFileAbort(lf, "VCF POS value %d exceeds size of reference sequence (%d)",
-                              pos, chromSize);
+                              pos, refGenome->size);
                 }
             // make sure REF value (if given) matches reference genome
             int chromStart = pos - 1;
             struct slName *maskedReasons = maskSites[chromStart];
             char *ref = words[3];
             if (strlen(ref) != 1)
                 {
                 // Not an SNV -- skip it.
                 //#*** should probably report or at least count these...
                 continue;
                 }
             char refBase = toupper(refGenome->dna[chromStart]);
             if (ref[0] == '*' || ref[0] == '.')
                 ref[0] = refBase;
             else if (ref[0] != refBase)
@@ -422,31 +491,31 @@
                                     slAddHead(&si->maskedReasonsList, slRefNew(maskedReasons));
                                     }
                                 else
                                     {
                                     if (isIupacAmbiguous(alt[0]))
                                         si->ambigCount++;
                                     slAddHead(&si->sncList, snc);
                                     }
                                 }
                             }
                         }
                     }
                 }
             if (!maskedReasons)
                 {
-                fputs(chrom, f);
+                fputs(refGenome->name, f);
                 for (i = 1;  i < colCount;  i++)
                     {
                     fputc('\t', f);
                     fputs(words[i], f);
                     }
                 fputc('\n', f);
                 }
             }
         }
     }
 errCatchEnd(errCatch);
 carefulClose(&f);
 if (errCatch->gotError)
     {
     warn("%s", errCatch->message->string);
@@ -479,35 +548,35 @@
         struct singleNucChange *snc;
         for (snc = si->sncList;  snc != NULL;  snc = snc->next)
             {
             char mutStr[64];
             safef(mutStr, sizeof mutStr, "%c%d%c", snc->refBase, snc->chromStart+1, snc->newBase);
             slNameAddHead(&sampleMuts, mutStr);
             }
         slReverse(&sampleMuts);
         pi->sampleMuts = sampleMuts;
         }
     else
         errAbort("addSampleMutsFromSeqInfo: no seqInfo for placed sequence '%s'", pi->sampleId);
     }
 }
 
-static void displaySampleMuts(struct placementInfo *info)
+static void displaySampleMuts(struct placementInfo *info, char *refAcc)
 {
 printf("<p>Differences from the reference genome "
-       "(<a href='https://www.ncbi.nlm.nih.gov/nuccore/NC_045512.2' target=_blank>"
-       "NC_045512.2</a>): ");
+       "(<a href='https://www.ncbi.nlm.nih.gov/nuccore/%s' target=_blank>%s</a>): ",
+       refAcc, refAcc);
 if (info->sampleMuts == NULL)
     printf("(None; identical to reference)");
 else
     {
     struct slName *sln;
     for (sln = info->sampleMuts;  sln != NULL;  sln = sln->next)
         {
         if (sln != info->sampleMuts)
             printf(", ");
         printf("%s", sln->name);
         }
     }
 puts("</p>");
 }
 
@@ -708,30 +777,33 @@
     int nCladeIx = stringArrayIx("Nextstrain_clade", headerWords, headerWordCount);
     int gCladeIx = stringArrayIx("GISAID_clade", headerWords, headerWordCount);
     int lineageIx = stringArrayIx("pangolin_lineage", headerWords, headerWordCount);
     if (lineageIx < 0)
         lineageIx = stringArrayIx("pango_lineage", headerWords, headerWordCount);
     int countryIx = stringArrayIx("country", headerWords, headerWordCount);
     int divisionIx = stringArrayIx("division", headerWords, headerWordCount);
     int locationIx = stringArrayIx("location", headerWords, headerWordCount);
     int countryExpIx = stringArrayIx("country_exposure", headerWords, headerWordCount);
     int divExpIx = stringArrayIx("division_exposure", headerWords, headerWordCount);
     int origLabIx = stringArrayIx("originating_lab", headerWords, headerWordCount);
     int subLabIx = stringArrayIx("submitting_lab", headerWords, headerWordCount);
     int regionIx = stringArrayIx("region", headerWords, headerWordCount);
     int nCladeUsherIx = stringArrayIx("Nextstrain_clade_usher", headerWords, headerWordCount);
     int lineageUsherIx = stringArrayIx("pango_lineage_usher", headerWords, headerWordCount);
+    int authorsIx = stringArrayIx("authors", headerWords, headerWordCount);
+    int pubsIx = stringArrayIx("publications", headerWords, headerWordCount);
+    int nLineageIx = stringArrayIx("Nextstrain_lineage", headerWords, headerWordCount);
     while (lineFileNext(lf, &line, NULL))
         {
         char *words[headerWordCount];
         int wordCount = chopTabs(line, words);
         lineFileExpectWords(lf, headerWordCount, wordCount);
         struct sampleMetadata *met;
         AllocVar(met);
         if (strainIx >= 0)
             met->strain = cloneString(words[strainIx]);
         if (epiIdIx >= 0)
             met->epiId = cloneString(words[epiIdIx]);
         if (genbankIx >= 0 && !sameString("?", words[genbankIx]))
             met->gbAcc = cloneString(words[genbankIx]);
         if (dateIx >= 0)
             met->date = cloneString(words[dateIx]);
@@ -751,30 +823,36 @@
             met->location = cloneString(words[locationIx]);
         if (countryExpIx >= 0)
             met->countryExp = cloneString(words[countryExpIx]);
         if (divExpIx >= 0)
             met->divExp = cloneString(words[divExpIx]);
         if (origLabIx >= 0)
             met->origLab = cloneString(words[origLabIx]);
         if (subLabIx >= 0)
             met->subLab = cloneString(words[subLabIx]);
         if (regionIx >= 0)
             met->region = cloneString(words[regionIx]);
         if (nCladeUsherIx >= 0)
             met->nCladeUsher = cloneString(words[nCladeUsherIx]);
         if (lineageUsherIx >= 0)
             met->lineageUsher = cloneString(words[lineageUsherIx]);
+        if (authorsIx >= 0)
+            met->authors = cloneString(words[authorsIx]);
+        if (pubsIx >= 0)
+            met->pubs = cloneString(words[pubsIx]);
+        if (nLineageIx >= 0)
+            met->nLineage = cloneString(words[nLineageIx]);
         // If epiId and/or genbank ID is included, we'll probably be using that to look up items.
         if (epiIdIx >= 0 && !isEmpty(words[epiIdIx]))
             hashAdd(sampleMetadata, words[epiIdIx], met);
         if (genbankIx >= 0 && !isEmpty(words[genbankIx]) && !sameString("?", words[genbankIx]))
             {
             if (strchr(words[genbankIx], '.'))
                 {
                 // Index by versionless accession
                 char copy[strlen(words[genbankIx])+1];
                 safecpy(copy, sizeof copy, words[genbankIx]);
                 char *dot = strchr(copy, '.');
                 *dot = '\0';
                 hashAdd(sampleMetadata, copy, met);
                 }
             else
@@ -849,74 +927,95 @@
 if (!met && isdigit(sampleId[0]) && strstr(sampleId, "_from_"))
     {
     char *eg = strstr(sampleId, "_eg_");
     if (eg)
         met = hashFindVal(sampleMetadata, eg+strlen("_eg_"));
     }
 return met;
 }
 
 static char *lineageForSample(struct hash *sampleMetadata, char *sampleId)
 /* Look up sampleId's lineage in epiToLineage file. Return NULL if we don't find a match. */
 {
 char *lineage = NULL;
 struct sampleMetadata *met = metadataForSample(sampleMetadata, sampleId);
 if (met)
-    lineage = met->lineage;
+    lineage = met->lineage ? met->lineage : met->nLineage;
 return lineage;
 }
 
 static void findNearestNeighbors(struct hash *samplePlacements, struct hash *sampleMetadata,
                                  struct mutationAnnotatedTree *bigTree)
 /* For each placed sample, find the nearest neighbor in the bigTree and its assigned lineage,
  * and fill in those two fields of placementInfo. */
 {
 if (!bigTree)
     return;
 struct hashCookie cookie = hashFirst(samplePlacements);
 struct hashEl *hel;
 while ((hel = hashNext(&cookie)) != NULL)
     {
     struct placementInfo *info = hel->val;
     info->nearestNeighbor = findNearestNeighbor(bigTree, info->sampleId, info->variantPath);
     if (isNotEmpty(info->nearestNeighbor))
         info->neighborLineage = lineageForSample(sampleMetadata, info->nearestNeighbor);
     }
 }
 
-static void printLineageUrl(char *lineage)
+static void printLineageLink(char *lineage, char *db)
+/* Print lineage with link to outbreak.info or pango-designation issue if appropriate.
+ * Caller must handle case of empty/NULL lineage. */
+{
+boolean isSarsCov2 = sameString(db, "wuhCor1");
+if (isEmpty(lineage))
+    errAbort("programmer error:printLineageLink called with empty lineage");
+else if (isSarsCov2 && startsWith("proposed", lineage))
+    printf("<a href='"PANGO_DESIGNATION_ISSUE_URLBASE"%s' target=_blank>%s</a>",
+           lineage+strlen("proposed"), lineage);
+else if (isSarsCov2 && differentString(lineage, "None") && differentString(lineage, "Unassigned"))
+    {
+    // Trim _suffix that I add to extra tree annotations for problematic branches, if present.
+    char lineageCopy[strlen(lineage)+1];
+    safecpy(lineageCopy, sizeof lineageCopy, lineage);
+    char *p = strchr(lineageCopy, '_');
+    if (p != NULL)
+        *p = '\0';
+    printf("<a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a>", lineageCopy, lineageCopy);
+    }
+else
+    printf("%s", lineage);
+}
+
+static void maybePrintLineageLink(char *lineage, char *db)
 /* If lineage is not empty/NULL, print ": lineage <lineage>" and link to outbreak.info
  * (unless lineage is "None") */
 {
 if (isNotEmpty(lineage))
     {
-    if (differentString(lineage, "None"))
-        printf(": lineage <a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a>",
-               lineage, lineage);
-    else
-        printf(": lineage %s", lineage);
+    printf(": lineage ");
+    printLineageLink(lineage, db);
     }
 }
 
-static void displayNearestNeighbors(struct placementInfo *info, char *source)
+static void displayNearestNeighbors(struct placementInfo *info, char *source, char *db)
 /* Use info->variantPaths to find sample's nearest neighbor(s) in tree. */
 {
 if (isNotEmpty(info->nearestNeighbor))
     {
     printf("<p>Nearest neighboring %s sequence already in phylogenetic tree: %s",
            source, info->nearestNeighbor);
-    printLineageUrl(info->neighborLineage);
+    maybePrintLineageLink(info->neighborLineage, db);
     puts("</p>");
     }
 }
 
 static int placementInfoRefCmpSampleMuts(const void *va, const void *vb)
 /* Compare slRef->placementInfo->sampleMuts lists.  Shorter lists first.  Using alpha sort
  * to distinguish between different sampleMuts contents arbitrarily; the purpose is to
  * clump samples with identical lists. */
 {
 struct slRef * const *rra = va;
 struct slRef * const *rrb = vb;
 struct placementInfo *pa = (*rra)->val;
 struct placementInfo *pb = (*rrb)->val;
 int diff = slCount(pa->sampleMuts) - slCount(pb->sampleMuts);
 if (diff == 0)
@@ -1023,31 +1122,31 @@
     if (info)
         {
         if (isNotEmpty(info->nearestNeighbor))
             neighbor = info->nearestNeighbor;
         if (isNotEmpty(info->neighborLineage))
             lineage = info->neighborLineage;
         }
     printf("%-*s  %s  %s\n", maxLen, asciiRow, neighbor, lineage);
     }
 slNameFreeList(&rowList);
 dyStringFree(&dy);
 }
 
 static void describeSamplePlacements(struct slName *sampleIds, struct hash *samplePlacements,
                                      struct phyloTree *subtree, struct hash *sampleMetadata,
-                                     char *source)
+                                     char *source, char *refAcc, char *db)
 /* Report how each sample fits into the big tree. */
 {
 // Sort sample placements by sampleMuts so we can group identical samples.
 struct slRef *placementRefs = getPlacementRefList(sampleIds, samplePlacements);
 slSort(&placementRefs, placementInfoRefCmpSampleMuts);
 int relatedCount = slCount(placementRefs);
 int clumpSize = countIdentical(placementRefs);
 if (clumpSize < relatedCount && relatedCount > 2)
     {
     // Not all of the related sequences are identical, so they will be broken down into
     // separate "clumps".  List all related samples first to avoid confusion.
     puts("<pre>");
     asciiTreeWithNeighborInfo(subtree, samplePlacements);
     puts("</pre>");
     }
@@ -1067,42 +1166,42 @@
             slNameAddHead(&sortedSamples, info->sampleId);
             }
         slNameSort(&sortedSamples);
         printf("<b>%d identical samples:</b>\n<ul>\n", clumpSize);
         struct slName *sln;
         for (sln = sortedSamples;  sln != NULL;  sln = sln->next)
             printf("<li><b>%s</b>\n", sln->name);
         puts("</ul>");
         }
     else
         {
         printf("<b>%s</b>\n", info->sampleId);
         ref = ref->next;
         }
     refsToGo = ref;
-    displaySampleMuts(info);
+    displaySampleMuts(info, refAcc);
     if (info->imputedBases)
         {
         puts("<p>Base values imputed by parsimony:\n<ul>");
         struct baseVal *bv;
         for (bv = info->imputedBases;  bv != NULL;  bv = bv->next)
             printf("<li>%d: %s\n", bv->chromStart+1, bv->val);
         puts("</ul>");
         puts("</p>");
         }
     displayVariantPath(info->variantPath, clumpSize == 1 ? info->sampleId : "samples");
-    displayNearestNeighbors(info, source);
+    displayNearestNeighbors(info, source, db);
     if (showParsimonyScore && info->parsimonyScore > 0)
         printf("<p>Parsimony score added by your sample: %d</p>\n", info->parsimonyScore);
         //#*** TODO: explain parsimony score
     }
 }
 
 struct phyloTree *phyloPruneToIds(struct phyloTree *node, struct slName *sampleIds)
 /* Prune all descendants of node that have no leaf descendants in sampleIds. */
 {
 if (node->numEdges)
     {
     struct phyloTree *prunedKids = NULL;
     int i;
     for (i = 0;  i < node->numEdges;  i++)
         {
@@ -1151,31 +1250,33 @@
 static void lookForCladesAndLineages(struct hash *samplePlacements,
                                      boolean *retGotClades, boolean *retGotLineages)
 /* See if UShER has annotated any clades and/or lineages for seqs. */
 {
 boolean gotClades = FALSE, gotLineages = FALSE;
 struct hashEl *hel;
 struct hashCookie cookie = hashFirst(samplePlacements);
 while ((hel = hashNext(&cookie)) != NULL)
     {
     struct placementInfo *pi = hel->val;
     if (pi)
         {
         if (isNotEmpty(pi->nextClade))
             gotClades = TRUE;
         if (isNotEmpty(pi->pangoLineage))
+            {
             gotLineages = TRUE;
+            }
         if (gotClades && gotLineages)
             break;
         }
     }
 *retGotClades = gotClades;
 *retGotLineages = gotLineages;
 }
 
 static char *nextstrainHost()
 /* Return the nextstrain hostname from an hg.conf param, or NULL if missing. */
 {
 return cfgOption("nextstrainHost");
 }
 
 static char *nextstrainUrlFromTn(struct tempName *jsonTn)
@@ -1249,87 +1350,101 @@
         }
     }
 if (0 && isFasta)
     {
     printf("&nbsp;");
     struct dyString *js = dyStringCreate("window.open('https://master.clades.nextstrain.org/"
                                          "?input-fasta=%s');",
                                          "needATn");  //#*** TODO: save FASTA to file
     cgiMakeOnClickButton("viewNextclade", js->string, "view sequences in Nextclade");
     }
 puts("</p>");
 }
 
 #define TOOLTIP(text) " <div class='tooltip'>(?)<span class='tooltiptext'>" text "</span></div>"
 
-static void printSummaryHeader(boolean isFasta, boolean gotClades, boolean gotLineages)
+static void printSummaryHeader(boolean isFasta, boolean gotClades, boolean gotLineages,
+                               char *refName, char *db)
 /* Print the summary table header row with tooltips explaining columns. */
 {
 puts("<thead><tr>");
 if (isFasta)
     puts("<th>Fasta Sequence</th>\n"
          "<th>Size"
          TOOLTIP("Length of uploaded sequence in bases, excluding runs of N bases at "
                  "beginning and/or end")
          "</th>\n<th>#Ns"
          TOOLTIP("Number of 'N' bases in uploaded sequence, excluding runs of N bases at "
                  "beginning and/or end")
          "</th>");
 else
     puts("<th>VCF Sample</th>\n"
          "<th>#Ns"
          TOOLTIP("Number of no-call variants for this sample in uploaded VCF, "
                  "i.e. '.' used in genotype column")
          "</th>");
 puts("<th>#Mixed"
      TOOLTIP("Number of IUPAC ambiguous bases, e.g. 'R' for 'A or G'")
      "</th>");
 if (isFasta)
-    puts("<th>Bases aligned"
-         TOOLTIP("Number of bases aligned to reference NC_045512.2 Wuhan/Hu-1, including "
+    printf("<th>Bases aligned"
+         TOOLTIP("Number of bases aligned to reference %s, including "
                  "matches and mismatches")
          "</th>\n<th>Inserted bases"
          TOOLTIP("Number of bases in aligned portion of uploaded sequence that are not present in "
-                 "reference NC_045512.2 Wuhan/Hu-1")
+                 "reference %s")
          "</th>\n<th>Deleted bases"
-         TOOLTIP("Number of bases in reference NC_045512.2 Wuhan/Hu-1 that are not "
+         TOOLTIP("Number of bases in reference %s that are not "
                  "present in aligned portion of uploaded sequence")
-         "</th>");
+           "</th>", refName, refName, refName);
 puts("<th>#SNVs used for placement"
      TOOLTIP("Number of single-nucleotide variants in uploaded sample "
              "(does not include N's or mixed bases) used by UShER to place sample "
              "in phylogenetic tree")
      "</th>\n<th>#Masked SNVs"
      TOOLTIP("Number of single-nucleotide variants in uploaded sample that are masked "
              "(not used for placement) because they occur at known "
              "<a href='https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473/12' "
              "target=_blank>Problematic Sites</a>"));;
 if (gotClades)
+    {
+    if (sameString(db, "wuhCor1"))
         puts("</th>\n<th>Nextstrain clade"
              TOOLTIP("The <a href='https://nextstrain.org/blog/2021-01-06-updated-SARS-CoV-2-clade-naming' "
-             "target=_blank>Nextstrain clade</a> assigned to the sample by UShER"));
+                     "target=_blank>Nextstrain clade</a> assigned to the sample by "
+                     "placement in the tree"));
+    else
+        puts("</th>\n<th>Nextstrain lineage"
+             TOOLTIP("The Nextstrain lineage assigned by "
+                     "placement in the tree"));
+    }
 if (gotLineages)
     puts("</th>\n<th>Pango lineage"
      TOOLTIP("The <a href='https://cov-lineages.org/' "
              "target=_blank>Pango lineage</a> assigned to the sample by UShER"));
 puts("</th>\n<th>Neighboring sample in tree"
      TOOLTIP("A sample already in the tree that is a child of the node at which the uploaded "
              "sample was placed, to give an example of a closely related sample")
-     "</th>\n<th>Lineage of neighbor"
-     TOOLTIP("The <a href='https://cov-lineages.org/' target=_blank>"
-             "Pango lineage</a> assigned to the nearest neighboring sample already in the tree")
-     "</th>\n<th>#Imputed values for mixed bases"
+     "</th>\n<th>Lineage of neighbor");
+if (sameString(db, "wuhCor1"))
+    puts(TOOLTIP("The <a href='https://cov-lineages.org/' target=_blank>"
+                 "Pango lineage</a> assigned by pangolin "
+                 "to the nearest neighboring sample already in the tree"));
+else
+    puts(TOOLTIP("The lineage assigned by Nextclade "
+                 "to the nearest neighboring sample already in the tree"));
+puts("</th>\n<th>#Imputed values for mixed bases"
      TOOLTIP("If the uploaded sequence contains mixed/ambiguous bases, then UShER may assign "
              "values based on maximum parsimony")
      "</th>\n<th>#Maximally parsimonious placements"
      TOOLTIP("Number of potential placements in the tree with minimal parsimony score; "
              "the higher the number, the less confident the placement")
      "</th>\n<th>Parsimony score"
      TOOLTIP("Number of mutations/changes that must be added to the tree when placing the "
              "uploaded sample; the higher the number, the more diverged the sample")
      "</th>\n<th>Subtree number"
      TOOLTIP("Sequence number of subtree that contains this sample")
      "</th></tr></thead>");
 }
 
 
 // Default QC thresholds for calling an input sequence excellent/good/fair/bad [/fail]:
@@ -1489,73 +1604,75 @@
 printf(TOOLTIP("%s"), text);
 }
 
 static void appendExcludingNs(struct dyString *dy, struct seqInfo *si)
 /* Append a note to dy about how many N bases and start and/or end are excluded from statistic. */
 {
 dyStringAppend(dy, "excluding ");
 if (si->nCountStart)
     dyStringPrintf(dy, "%d N bases at start", si->nCountStart);
 if (si->nCountStart && si->nCountEnd)
     dyStringAppend(dy, " and ");
 if (si->nCountEnd)
     dyStringPrintf(dy, "%d N bases at end", si->nCountEnd);
 }
 
-static void printLineageTd(char *lineage, char *alt)
+static void printLineageTd(char *lineage, char *alt, char *db)
 /* Print a table cell with lineage (& link to outbreak.info if not 'None') or alt if no lineage. */
 {
-if (lineage && differentString(lineage, "None"))
-    printf("<td><a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a></td>", lineage, lineage);
-else if (lineage)
-    printf("<td>%s</td>", lineage);
-else
+if (isEmpty(lineage))
     printf("<td>%s</td>", alt);
+else
+    {
+    printf("<td>");
+    printLineageLink(lineage, db);
+    printf("</td>");
+    }
 }
 
 static void printSubtreeTd(struct subtreeInfo *subtreeInfoList, struct tempName *jsonTns[],
                            char *seqName)
 /* Print a table cell with subtree (& link if possible) if found. */
 {
 int ix;
 struct subtreeInfo *ti = subtreeInfoForSample(subtreeInfoList, seqName, &ix);
 if (ix < 0)
     //#*** Probably an error.
     printf("<td>n/a</td>");
 else
     {
     printf("<td>%d", ix+1);
     if (ti && nextstrainHost())
         {
         char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]);
         printf(" (<a href='%s' target=_blank>view in Nextstrain<a>)", nextstrainUrl);
         }
     printf("</td>");
     }
 }
 
 static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta,
                                struct usherResults *ur, struct tempName *jsonTns[],
-                               struct hash *sampleMetadata, struct dnaSeq *refGenome)
+                               char *refAcc, char *db)
 /* Show a table with composition & alignment stats for each sequence that passed basic QC. */
 {
 if (seqInfoList)
     {
     puts("<table class='seqSummary'>");
     boolean gotClades = FALSE, gotLineages = FALSE;
     lookForCladesAndLineages(ur->samplePlacements, &gotClades, &gotLineages);
-    printSummaryHeader(isFasta, gotClades, gotLineages);
+    printSummaryHeader(isFasta, gotClades, gotLineages, refAcc, db);
     puts("<tbody>");
     struct dyString *dy = dyStringNew(0);
     struct seqInfo *si;
     for (si = seqInfoList;  si != NULL;  si = si->next)
         {
         puts("<tr>");
         printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | "));
         if (isFasta)
             {
             if (si->nCountStart || si->nCountEnd)
                 {
                 int effectiveLength = si->seq->size - (si->nCountStart + si->nCountEnd);
                 dyStringClear(dy);
                 dyStringPrintf(dy, "%d ", effectiveLength);
                 appendExcludingNs(dy, si);
@@ -1663,34 +1780,34 @@
                     {
                     replaceChar(reason->name, '_', ' ');
                     dyStringPrintf(dy, ", %s", reason->name);
                     }
                 dyStringAppendC(dy, ')');
                 }
             printTooltip(dy->string);
             }
         printf("</td>");
         struct placementInfo *pi = hashFindVal(ur->samplePlacements, si->seq->name);
         if (pi)
             {
             if (gotClades)
                 printf("<td>%s</td>", pi->nextClade ? pi->nextClade : "n/a");
             if (gotLineages)
-                printLineageTd(pi->pangoLineage, "n/a");
+                printLineageTd(pi->pangoLineage, "n/a", db);
             printf("<td>%s</td>",
                    pi->nearestNeighbor ? replaceChars(pi->nearestNeighbor, "|", " | ") : "?");
-            printLineageTd(pi->neighborLineage, "?");
+            printLineageTd(pi->neighborLineage, "?", db);
             int imputedCount = slCount(pi->imputedBases);
             printf("<td class='%s'>%d",
                    qcClassForImputedBases(imputedCount), imputedCount);
             if (imputedCount > 0)
                 {
                 dyStringClear(dy);
                 struct baseVal *bv;
                 for (bv = pi->imputedBases;  bv != NULL;  bv = bv->next)
                     {
                     dyStringAppendSep(dy, ", ");
                     dyStringPrintf(dy, "%d: %s", bv->chromStart+1, bv->val);
                     }
                 printTooltip(dy->string);
                 }
             printf("</td><td class='%s'>%d",
@@ -1704,31 +1821,31 @@
             if (gotClades)
                 printf("<td>n/a</td>");
             if (gotLineages)
                 printf("<td>n/a</td>");
             printf("<td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td>");
             }
         printSubtreeTd(ur->subtreeInfoList, jsonTns, si->seq->name);
         puts("</tr>");
         }
     puts("</tbody></table><p></p>");
     }
 }
 
 static void summarizeSubtrees(struct slName *sampleIds, struct usherResults *results,
                               struct hash *sampleMetadata, struct tempName *jsonTns[],
-                              struct mutationAnnotatedTree *bigTree)
+                              struct mutationAnnotatedTree *bigTree, char *db)
 /* Print a summary table of pasted/uploaded identifiers and subtrees */
 {
 boolean gotClades = FALSE, gotLineages = FALSE;
 lookForCladesAndLineages(results->samplePlacements, &gotClades, &gotLineages);
 puts("<table class='seqSummary'><tbody>");
 puts("<tr><th>Sequence</th>");
 if (gotClades)
     puts("<th>Nextstrain clade (UShER)"
      TOOLTIP("The <a href='https://nextstrain.org/blog/2021-01-06-updated-SARS-CoV-2-clade-naming' "
              "target=_blank>Nextstrain clade</a> "
              "assigned to the sequence by UShER according to its place in the phylogenetic tree")
          "</th>");
 if (gotLineages)
     puts("<th>Pango lineage (UShER)"
          TOOLTIP("The <a href='https://cov-lineages.org/' "
@@ -1740,71 +1857,67 @@
              "Pango lineage</a> assigned to the sequence by "
              "<a href='https://github.com/cov-lineages/pangolin/' target=_blank>pangolin</a>")
      "</th>"
      "<th>subtree</th></tr>");
 struct slName *si;
 for (si = sampleIds;  si != NULL;  si = si->next)
     {
     puts("<tr>");
     printf("<th>%s</td>", replaceChars(si->name, "|", " | "));
     struct placementInfo *pi = hashFindVal(results->samplePlacements, si->name);
     if (pi)
         {
         if (gotClades)
             printf("<td>%s</td>", pi->nextClade ? pi->nextClade : "n/a");
         if (gotLineages)
-            printLineageTd(pi->pangoLineage, "n/a");
+            printLineageTd(pi->pangoLineage, "n/a", db);
         }
     else
         {
         if (gotClades)
             printf("<td>n/a</td>");
         if (gotLineages)
             printf("<td>n/a</td>");
         }
     // pangolin-assigned lineage
     char *lineage = lineageForSample(sampleMetadata, si->name);
-    if (isNotEmpty(lineage))
-        printf("<td><a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a></td>",
-               lineage, lineage);
-    else
-        printf("<td>n/a</td>");
+    printLineageTd(lineage, "n/a", db);
     // Maybe also #mutations with mouseover to show mutation path?
     printSubtreeTd(results->subtreeInfoList, jsonTns, si->name);
     }
 puts("</tbody></table><p></p>");
 }
 
 static struct singleNucChange *sncListFromSampleMutsAndImputed(struct slName *sampleMuts,
                                                                struct baseVal *imputedBases,
                                                                struct seqWindow *gSeqWin)
 /* Convert a list of "<ref><pos><alt>" names to struct singleNucChange list.
  * However, if <alt> is ambiguous, skip it because variantProjector doesn't like it.
  * Add imputed base predictions. */
 {
 struct singleNucChange *sncList = NULL;
 struct slName *mut;
 for (mut = sampleMuts;  mut != NULL;  mut = mut->next)
     {
     char ref = mut->name[0];
     if (ref < 'A' || ref > 'Z')
         errAbort("sncListFromSampleMuts: expected ref base value, got '%c' in '%s'",
                  ref, mut->name);
     int pos = atoi(&(mut->name[1]));
-    if (pos < 1 || pos > chromSize)
+    if (pos < 1 || pos > gSeqWin->end)
         errAbort("sncListFromSampleMuts: expected pos between 1 and %d, got %d in '%s'",
-                 chromSize, pos, mut->name);
+                 gSeqWin->end, pos, mut->name);
     char alt = mut->name[strlen(mut->name)-1];
     if (alt < 'A' || alt > 'Z')
         errAbort("sncListFromSampleMuts: expected alt base value, got '%c' in '%s'",
                  alt, mut->name);
     if (isIupacAmbiguous(alt))
         continue;
     struct singleNucChange *snc;
     AllocVar(snc);
     snc->chromStart = pos-1;
     snc->refBase = snc->parBase = ref;
     snc->newBase = alt;
     slAddHead(&sncList, snc);
     }
 struct baseVal *bv;
 for (bv = imputedBases;  bv != NULL;  bv = bv->next)
@@ -2275,31 +2388,31 @@
 cmd[cIx++] = singleSubtreeJsonTn->forCgi;
 cmd[cIx++] = results->singleSubtreeInfo->subtreeTn->forCgi;
 struct subtreeInfo *ti;
 for (ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, sIx++)
     {
     cmd[cIx++] = jsonTns[sIx]->forCgi;
     cmd[cIx++] = ti->subtreeTn->forCgi;
     }
 cmd[cIx++] = NULL;
 struct pipeline *pl = pipelineOpen(cmds, pipelineRead, NULL, NULL, 0);
 pipelineClose(&pl);
 reportTiming(pStartTime, "make subtree zipfile");
 return zipTn;
 }
 
-static struct slName **getProblematicSites(char *db)
+static struct slName **getProblematicSites(char *db, char *chrom, int chromSize)
 /* If config.ra specfies maskFile them return array of lists (usually NULL) of reasons that
  * masking is recommended, one per position in genome; otherwise return NULL. */
 {
 struct slName **pSites = NULL;
 char *pSitesFile = phyloPlaceDbSettingPath(db, "maskFile");
 if (isNotEmpty(pSitesFile) && fileExists(pSitesFile))
     {
     AllocArray(pSites, chromSize);
     struct bbiFile *bbi = bigBedFileOpen(pSitesFile);
     struct lm *lm = lmInit(0);
     struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, chrom, 0, chromSize, 0, lm);
     for (bb = bbList;  bb != NULL;  bb = bb->next)
         {
         char *extra = bb->rest;
         char *reason = nextWord(&extra);
@@ -2473,31 +2586,31 @@
         {
         char *words[3];
         int wordCount = chopTabs(line, words);
         lineFileExpectWords(lf, 2, wordCount);
         char *fullName = hashFindVal(nameHash, words[1]);
         if (fullName)
             hashAdd(nameHash, words[0], fullName);
         else
             {
             missCount++;
             if (missExample == NULL)
                 missExample = cloneString(words[1]);
             }
         }
     lineFileClose(&lf);
-    if (missCount > 0)
+    if (missCount > 0 && verboseLevel() > 1)
         fprintf(stderr, "aliasFile %s: %d values in second column were not found in tree, "
                 "e.g. '%s'", aliasFile, missCount, missExample);
     }
 }
 
 static struct hash *getTreeNames(struct mutationAnnotatedTree *bigTree, char *aliasFile,
                                  boolean addComponents)
 /* Make a hash of full names of leaves of bigTree; if addComponents, also map components of those
  * names to the full names in case the user gives us partial names/IDs. */
 {
 int nodeCount = bigTree->nodeHash->elCount;
 struct hash *nameHash = hashNew(digitsBaseTwo(nodeCount) + 3);
 rAddLeafNames(bigTree->tree, bigTree->condensedNodes, nameHash, addComponents);
 addAliases(nameHash, aliasFile);
 return nameHash;
@@ -2626,32 +2739,34 @@
 struct tempName *vcfTn = NULL;
 struct slName *sampleIds = NULL;
 char *usherPath = getUsherPath(TRUE);
 char *protobufPath = NULL;
 char *source = NULL;
 char *metadataFile = NULL;
 char *aliasFile = NULL;
 getProtobufMetadataSource(db, defaultProtobuf, &protobufPath, &metadataFile, &source, &aliasFile);
 struct mutationAnnotatedTree *bigTree = parseParsimonyProtobuf(protobufPath);
 reportTiming(&startTime, "parse protobuf file");
 if (! bigTree)
     {
     warn("Problem parsing %s; can't make subtree subtracks.", protobufPath);
     }
 lineFileCarefulNewlines(lf);
-struct slName **maskSites = getProblematicSites(db);
-boolean *informativeBases = informativeBasesFromTree(bigTree->tree, maskSites);
+char *chrom = hDefaultChrom(db);
+int chromSize = hChromSize(db, chrom);
+struct slName **maskSites = getProblematicSites(db, chrom, chromSize);
+boolean *informativeBases = informativeBasesFromTree(bigTree->tree, maskSites, chromSize);
 struct dnaSeq *refGenome = hChromSeq(db, chrom, 0, chromSize);
 boolean isFasta = FALSE;
 boolean subtreesOnly = FALSE;
 struct seqInfo *seqInfoList = NULL;
 if (lfLooksLikeFasta(lf))
     {
     struct slPair *failedSeqs;
     struct slPair *failedPsls;
     struct hash *treeNames = getTreeNames(bigTree, NULL, FALSE);
     vcfTn = vcfFromFasta(lf, db, refGenome, informativeBases, maskSites, treeNames,
                          &sampleIds, &seqInfoList, &failedSeqs, &failedPsls, &startTime);
     if (failedSeqs)
         {
         puts("<p>");
         struct slPair *fail;
@@ -2705,31 +2820,31 @@
                                          sampleIds, bigTree->condensedNodes,
                                          &startTime);
     }
 if (results && results->singleSubtreeInfo)
     {
     if (retSuccess)
         *retSuccess = TRUE;
     puts("<p></p>");
     readQcThresholds(db);
     int subtreeCount = slCount(results->subtreeInfoList);
     // Sort subtrees by number of user samples (largest first).
     slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
     // Make Nextstrain/auspice JSON file for each subtree.
     char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
     struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
-    struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
+    struct seqWindow *gSeqWin = memSeqWindowNew(chrom, refGenome->dna);
     struct hash *sampleMetadata = getSampleMetadata(metadataFile);
     struct hash *sampleUrls = hashNew(0);
     struct tempName *jsonTns[subtreeCount];
     struct subtreeInfo *ti;
     int ix;
     for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
         {
         AllocVar(jsonTns[ix]);
         char subtreeName[512];
         safef(subtreeName, sizeof(subtreeName), "subtreeAuspice%d", ix+1);
         trashDirFile(jsonTns[ix], "ct", subtreeName, ".json");
         treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, NULL,
                           jsonTns[ix]->forCgi, source);
         // Add a link for every sample to this subtree, so the single-subtree JSON can
         // link to subtree JSONs
@@ -2754,72 +2869,79 @@
            "</p>\n");
     puts("<p><em>Note: "
          "The Nextstrain subtree views, and Download files below, are temporary files and will "
          "expire within two days.  "
          "Please download the Nextstrain subtree JSON files if you will want to view them "
          "again in the future.  The JSON files can be drag-dropped onto "
          "<a href='https://auspice.us/' target=_blank>https://auspice.us/</a>."
          "</em></p>");
 
     struct tempName *tsvTn = NULL, *sTsvTn = NULL;
     struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
                                                 &startTime);
     struct tempName *ctTn = NULL;
     if (subtreesOnly)
         {
-        summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, bigTree);
+        summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, bigTree, db);
         reportTiming(&startTime, "describe subtrees");
         }
     else
         {
         findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree);
 
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
-        ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree,
-                                 source, fontHeight, &sampleTree, &startTime);
+        ctTn = writeCustomTracks(db, vcfTn, results, sampleIds, bigTree, source, fontHeight,
+                                 &sampleTree, &startTime);
 
         // Make a sample summary TSV file and accumulate S gene changes
         struct hash *spikeChanges = hashNew(0);
         tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoHash,
                                                  geneInfoList, gSeqWin, spikeChanges, &startTime);
         sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
         downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
                      zipTn->forHtml);
 
         int seqCount = slCount(seqInfoList);
         if (seqCount <= MAX_SEQ_DETAILS)
             {
-            summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, refGenome);
+            char *refAcc = cloneString(chrom);
+            if (regexMatch(refAcc, "v[0-9]+$"))
+                {
+                char *v = strrchr(refAcc, 'v');
+                assert(v != NULL);
+                *v = '.';
+                }
+            summarizeSequences(seqInfoList, isFasta, results, jsonTns, refAcc, db);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
                     printf("%d related samples", subtreeUserSampleCount);
                 else if (subtreeCount > 1)
                     printf("Unrelated sample");
                 printf("</h3>\n");
                 makeNextstrainButtonN("viewNextstrainSub", ix, subtreeUserSampleCount, subtreeSize,
                                       jsonTns);
                 puts("<br>");
                 // Make a sub-subtree with only user samples for display:
                 struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
                 subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);
                 describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements,
-                                         subtree, sampleMetadata, source);
+                                         subtree, sampleMetadata, source, refAcc, db);
                 }
             reportTiming(&startTime, "describe placements");
             }
         else
             printf("<p>(Skipping details; "
                    "you uploaded %d sequences, and details are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);
         }
 
     puts("<h3>Downloads</h3>");
     if (! subtreesOnly)
         {
         puts("<ul>");
         // Offer big tree w/new samples for download