fb0c581ba94fd1ca33d57e3253f732cd4c4ea62a
angie
  Fri Aug 26 10:51:31 2022 -0700
Support Auspice JSON output for monkeypox: url settings not just files, Nextclade_lineage, non-hardcoded Auspice config header, correct amino acid changes on - strand genes.

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index f055fd5..dcea69f 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -1,142 +1,215 @@
 /* Convert a (sub)tree with condensed nodes to JSON for Nextstrain to display, adding in sample
  * mutations, protein changes and metadata. */
 
 /* Copyright (C) 2020 The Regents of the University of California */
 
 #include "common.h"
+#include "errCatch.h"
 #include "hash.h"
 #include "hui.h"
 #include "jsonWrite.h"
 #include "linefile.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "phyloTree.h"
 #include "variantProjector.h"
 
 
-static void writeAuspiceMeta(FILE *outF, struct slName *subtreeUserSampleIds, char *source)
+static void auspiceMetaColoringCategoricalStart(struct jsonWrite *jw, char *key, char *title)
+/* Write key, title and type of a "categorical" coloring spec, but leave it open in case a
+ * scale list needs to be added. */
+{
+jsonWriteObjectStart(jw, NULL);
+jsonWriteString(jw, "key", key);
+jsonWriteString(jw, "title", title);
+jsonWriteString(jw, "type", "categorical");
+}
+
+static void auspiceMetaColoringCategoricalEnd(struct jsonWrite *jw)
+/* Close out a coloring spec that was opened with auspiceMetaColoringCategoricalStart. */
+{
+jsonWriteObjectEnd(jw);
+}
+
+static void auspiceMetaColoringCategorical(struct jsonWrite *jw, char *key, char *title)
+/* Write a "categorical" coloring spec with no scale component. */
+{
+auspiceMetaColoringCategoricalStart(jw, key, title);
+auspiceMetaColoringCategoricalEnd(jw);
+}
+
+static void jsonWritePair(struct jsonWrite *jw, char *valA, char *valB)
+/* Write a list with two string values. */
+{
+jsonWriteListStart(jw, NULL);
+jsonWriteString(jw, NULL, valA);
+jsonWriteString(jw, NULL, valB);
+jsonWriteListEnd(jw);
+}
+
+static void auspiceMetaColoringSarsCov2Nextclade(struct jsonWrite *jw, char *key, char *title)
+/* Write a coloring spec for SARS-CoV-2 Nextstrain clades.  (Non-VoC clades are omitted and
+ * will be assigned grayscale values by Auspice. */
+{
+auspiceMetaColoringCategoricalStart(jw, key, title);
+jsonWriteListStart(jw, "scale");
+// Color hex values are words from line N of
+// https://github.com/nextstrain/ncov/blob/master/defaults/color_schemes.tsv
+// where N = number of clade_membership lines in
+// https://github.com/nextstrain/ncov/blob/master/defaults/color_ordering.tsv
+jsonWritePair(jw, "20H (Beta, V2)", "#5E1D9D");
+jsonWritePair(jw, "20I (Alpha, V1)", "#492AB5");
+jsonWritePair(jw, "20J (Gamma, V3)", "#4042C7");
+jsonWritePair(jw, "21A (Delta)", "#3E5DD0");
+jsonWritePair(jw, "21I (Delta)", "#4377CD");
+jsonWritePair(jw, "21J (Delta)", "#4A8CC2");
+jsonWritePair(jw, "21B (Kappa)", "#549DB2");
+jsonWritePair(jw, "21C (Epsilon)", "#60AA9E");
+jsonWritePair(jw, "21D (Eta)", "#6EB389");
+jsonWritePair(jw, "21E (Theta)", "#80B974");
+jsonWritePair(jw, "21F (Iota)", "#92BC63");
+jsonWritePair(jw, "21G (Lambda)", "#A6BE55");
+jsonWritePair(jw, "21H (Mu)", "#B9BC4A");
+jsonWritePair(jw, "21K (Omicron)", "#CBB742");
+jsonWritePair(jw, "21L (Omicron)", "#D9AD3D");
+jsonWritePair(jw, "21M (Omicron)", "#E29D39");
+jsonWritePair(jw, "22A (Omicron)", "#E68634");
+jsonWritePair(jw, "22B (Omicron)", "#E56A2F");
+jsonWritePair(jw, "22C (Omicron)", "#E04929");
+jsonWritePair(jw, "22D (Omicron)", "#DB2823");
+jsonWritePair(jw, "uploaded sample", "#000000");
+jsonWriteListEnd(jw);
+auspiceMetaColoringCategoricalEnd(jw);
+}
+
+static void writeAuspiceMetaGenomeAnnotations(struct jsonWrite *jw, struct geneInfo *geneInfoList,
+                                              uint genomeSize)
+/* Write out auspice genome annotations (protein-coding gene CDS and "nuc"). */
+{
+jsonWriteObjectStart(jw, "genome_annotations");
+struct geneInfo *gi;
+for (gi = geneInfoList;  gi != NULL;  gi = gi->next)
+    {
+    jsonWriteObjectStart(jw, gi->psl->qName);
+    jsonWriteNumber(jw, "start", gi->psl->tStart+1);
+    jsonWriteNumber(jw, "end", gi->psl->tEnd);
+    jsonWriteString(jw, "strand", (pslOrientation(gi->psl) > 0) ? "+" : "-");
+    jsonWriteString(jw, "type", "CDS");
+    jsonWriteObjectEnd(jw);
+    }
+jsonWriteObjectStart(jw, "nuc");
+jsonWriteNumber(jw, "start", 1);
+jsonWriteNumber(jw, "end", genomeSize);
+jsonWriteString(jw, "strand", "+");
+jsonWriteString(jw, "type", "source");
+jsonWriteObjectEnd(jw);
+jsonWriteObjectEnd(jw);
+}
+
+static char *getDefaultColor(struct slName *colorFields)
+/* Pick default color from available color fields from metadata.  Do not free returned string. */
+{
+char *colorDefault = NULL;
+if (slNameInList(colorFields, "Nextstrain_lineage"))
+    colorDefault = "Nextstrain_lineage";
+else if (slNameInList(colorFields, "Nextstrain_clade"))
+    colorDefault = "Nextstrain_clade";
+else if (colorFields != NULL)
+    colorDefault = colorFields->name;
+else
+    colorDefault = "userOrOld";
+return colorDefault;
+}
+
+static void auspiceMetaColorings(struct jsonWrite *jw, char *source, struct slName *colorFields)
+/* Write coloring specs for colorFields from metadata, locally added userOrOld, and
+ * Auspice-automatic gt. */
+{
+jsonWriteListStart(jw, "colorings");
+auspiceMetaColoringCategoricalStart(jw, "userOrOld", "Sample type");
+jsonWriteListStart(jw, "scale");
+jsonWritePair(jw, "uploaded sample", "#CC0000");
+jsonWritePair(jw, source, "#000000");
+jsonWriteListEnd(jw);
+auspiceMetaColoringCategoricalEnd(jw);
+auspiceMetaColoringCategorical(jw, "gt", "Genotype");
+struct slName *col;
+for (col = colorFields;  col != NULL;  col = col->next)
+    {
+    if (sameString(col->name, "Nextstrain_clade"))
+        auspiceMetaColoringSarsCov2Nextclade(jw, col->name, "Nextstrain Clade");
+    else if (sameString(col->name, "Nextstrain_clade_usher"))
+        auspiceMetaColoringSarsCov2Nextclade(jw, col->name, "Nextstrain Clade assigned by UShER");
+    else if (sameString(col->name, "pango_lineage"))
+        auspiceMetaColoringCategorical(jw, col->name, "Pango lineage");
+    else if (sameString(col->name, "pango_lineage_usher"))
+        auspiceMetaColoringCategorical(jw, col->name, "Pango lineage assigned by UShER");
+    else if (sameString(col->name, "Nextstrain_lineage"))
+        auspiceMetaColoringCategorical(jw, col->name, "Nextstrain lineage");
+    else if (sameString(col->name, "country"))
+        auspiceMetaColoringCategorical(jw, col->name, "Country");
+    else
+        auspiceMetaColoringCategorical(jw, col->name, col->name);
+    }
+jsonWriteListEnd(jw);
+}
+
+static void writeAuspiceMeta(struct jsonWrite *jw, struct slName *subtreeUserSampleIds, char *source,
+                             struct slName *colorFields, struct geneInfo *geneInfoList,
+                             uint genomeSize)
 /* Write metadata to configure Auspice display. */
 {
-fprintf(outF,
-        "\"meta\": { "
-        "\"title\": \"Subtree with %s", subtreeUserSampleIds->name);
+jsonWriteObjectStart(jw, "meta");
+// Title
+struct dyString *dy = dyStringCreate("Subtree with %s", subtreeUserSampleIds->name);
 int sampleCount = slCount(subtreeUserSampleIds);
 if (sampleCount > 10)
-    fprintf(outF, " and %d other uploaded samples", sampleCount - 1);
+    dyStringPrintf(dy, " and %d other uploaded samples", sampleCount - 1);
 else
     {
     struct slName *sln;
     for (sln = subtreeUserSampleIds->next;  sln != NULL;  sln = sln->next)
-        fprintf(outF, ", %s", sln->name);
-    }
-fputs("\", "
-      "\"panels\": [ \"tree\"] , "
-      "\"colorings\": [ "
-      "  { \"key\": \"pango_lineage\", "
-      "    \"title\": \"Pango lineage\", \"type\": \"categorical\" },"
-      "  { \"key\": \"Nextstrain_clade\","
-      "    \"scale\": [ "
-      "        [ \"20H (Beta, V2)\", \"#571EA2\" ],"
-      "        [ \"20I (Alpha, V1)\", \"#4334BF\" ],"
-      "        [ \"20J (Gamma, V3)\", \"#3F55CE\" ],"
-      "        [ \"21A (Delta)\", \"#4376CD\" ],"
-      "        [ \"21I (Delta)\", \"#4C91C0\" ],"
-      "        [ \"21J (Delta)\", \"#59A4A9\" ],"
-      "        [ \"21C (Epsilon)\", \"#7FB975\" ],"
-      "        [ \"21D (Eta)\", \"#97BD5F\" ],"
-      "        [ \"21F (Iota)\", \"#C7B944\" ],"
-      "        [ \"21G (Lambda)\", \"#D9AD3D\" ],"
-      "        [ \"21H (Mu)\", \"#E49838\" ],"
-      "        [ \"21K (Omicron)\", \"#DB2823\" ],"
-      "        [ \"21L (Omicron)\", \"#DB2823\" ],"
-      "        [ \"21M (Omicron)\", \"#DB2823\" ],"
-      "        [ \"uploaded sample\", \"#FF0000\" ] ],"
-      "    \"title\": \"Nextstrain Clade\", \"type\": \"categorical\" },"
-      , outF);
-if (sameString(source, "GISAID"))
-    fputs("  { \"key\": \"GISAID_clade\","
-      "    \"scale\": [ [ \"S\", \"#EC676D\" ], [ \"L\", \"#F79E43\" ], [ \"O\", \"#F9D136\" ],"
-      "        [ \"V\", \"#FAEA95\" ], [ \"G\", \"#B6D77A\" ], [ \"GH\", \"#8FD4ED\" ],"
-      "        [ \"GR\", \"#A692C3\" ] ],"
-      "    \"title\": \"GISAID Clade\", \"type\": \"categorical\" },"
-          , outF);
-fprintf(outF,
-      "  { \"key\": \"pango_lineage_usher\", "
-      "    \"title\": \"Pango lineage assigned by UShER\", \"type\": \"categorical\" },"
-      "  { \"key\": \"Nextstrain_clade_usher\","
-      "    \"scale\": [ "
-      "        [ \"20H (Beta, V2)\", \"#571EA2\" ],"
-      "        [ \"20I (Alpha, V1)\", \"#4334BF\" ],"
-      "        [ \"20J (Gamma, V3)\", \"#3F55CE\" ],"
-      "        [ \"20H (Beta,V2)\", \"#571EA2\" ],"
-      "        [ \"20I (Alpha,V1)\", \"#4334BF\" ],"
-      "        [ \"20J (Gamma,V3)\", \"#3F55CE\" ],"
-      "        [ \"21A (Delta)\", \"#4376CD\" ],"
-      "        [ \"21I (Delta)\", \"#4C91C0\" ],"
-      "        [ \"21J (Delta)\", \"#59A4A9\" ],"
-      "        [ \"21C (Epsilon)\", \"#7FB975\" ],"
-      "        [ \"21D (Eta)\", \"#97BD5F\" ],"
-      "        [ \"21F (Iota)\", \"#C7B944\" ],"
-      "        [ \"21G (Lambda)\", \"#D9AD3D\" ],"
-      "        [ \"21H (Mu)\", \"#E49838\" ],"
-      "        [ \"21K (Omicron)\", \"#DB2823\" ],"
-      "        [ \"21L\", \"#DB2823\" ],"
-      "        [ \"21L (Omicron)\", \"#DB2823\" ],"
-      "        [ \"21M (Omicron)\", \"#DB2823\" ],"
-      "        [ \"uploaded sample\", \"#FF0000\" ] ],"
-      "    \"title\": \"Nextstrain Clade assigned by UShER\", \"type\": \"categorical\" },"
-        "  { \"key\": \"userOrOld\", "
-        "    \"scale\": [ [ \"uploaded sample\", \"#CC0000\"] , [ \"%s\", \"#000000\"] ],"
-        "    \"title\": \"Sample type\", \"type\": \"categorical\" },"
-        "  {\"key\": \"gt\", \"title\": \"Genotype\", \"type\": \"categorical\"},"
-        "  {\"key\": \"country\", \"title\": \"Country\", \"type\": \"categorical\"}"
-        , source);
-fputs("  ] , "
-//#*** Filters didn't seem to work... maybe something about the new fetch feature, or do I need to spcify in some other way?
-//#***      "\"filters\": [ \"GISAID_clade\", \"region\", \"country\", \"division\", \"author\" ], "
-      "\"filters\": [ ], "
-      "\"genome_annotations\":"
-      "{\"E\":{\"end\":26472,\"start\":26245,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"M\":{\"end\":27191,\"start\":26523,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"N\":{\"end\":29533,\"start\":28274,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF1ab\":{\"end\":21555,\"start\":266,\"strand\":\"+\",\"type\":\"CDS\"},"
-//      " \"ORF1a\":{\"end\":13468,\"start\":266,\"strand\":\"+\",\"type\":\"CDS\"},"
-//      " \"ORF1b\":{\"end\":21555,\"start\":13468,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF3a\":{\"end\":26220,\"start\":25393,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF6\":{\"end\":27387,\"start\":27202,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF7a\":{\"end\":27759,\"start\":27394,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF7b\":{\"end\":27887,\"start\":27756,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF8\":{\"end\":28259,\"start\":27894,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"ORF9b\":{\"end\":28577,\"start\":28284,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"S\":{\"end\":25384,\"start\":21563,\"strand\":\"+\",\"type\":\"CDS\"},"
-      " \"nuc\":{\"end\":29903,\"start\":1,\"strand\":\"+\",\"type\":\"source\"}},"
-      "\"display_defaults\": { "
-      "  \"branch_label\": \"none\", "
-      "  \"color_by\": \"Nextstrain_clade\" "
-      "}, "
-      , outF);
-fprintf(outF,
-        "\"description\": \"Dataset generated by [UShER web interface]"
+        dyStringPrintf(dy, ", %s", sln->name);
+    }
+jsonWriteString(jw, "title", dy->string);
+// Description
+jsonWriteStringf(jw, "description", "Dataset generated by [UShER web interface]"
                  "(%shgPhyloPlace) using the "
                  "[usher](https://github.com/yatisht/usher/) program.  "
 //#*** TODO: describe input from which tree was generated: user sample, version of tree, etc.
+                 "If you have metadata you wish to display, you can now drag on a CSV file and "
+                 "it will be added into this view, [see here]("NEXTSTRAIN_DRAG_DROP_DOC") "
+                 "for more info."
                  , hLocalHostCgiBinUrl());
-fputs("If you have metadata you wish to display, you can now drag on a CSV file and it will be "
-      "added into this view, [see here]("NEXTSTRAIN_DRAG_DROP_DOC") "
-      "for more info.\"} ,"
-      , outF);
+// Panels: just the tree (no map, entropy etc.)
+jsonWriteListStart(jw, "panels");
+jsonWriteString(jw, NULL, "tree");
+jsonWriteListEnd(jw);
+// Default label & color
+jsonWriteObjectStart(jw, "display_defaults");
+jsonWriteString(jw, "branch_label", "none");
+jsonWriteString(jw, "color_by", getDefaultColor(colorFields));
+jsonWriteObjectEnd(jw);
+// Colorings: userOrOld, gt and whatever we got from metadata
+auspiceMetaColorings(jw, source, colorFields);
+// Filters didn't work when I tried them a long time ago... revisit someday.
+jsonWriteListStart(jw, "filters");
+jsonWriteListEnd(jw);
+// Annotations for coloring/filtering by base
+writeAuspiceMetaGenomeAnnotations(jw, geneInfoList, genomeSize);
+jsonWriteObjectEnd(jw);
 }
 
 static void jsonWriteObjectValueUrl(struct jsonWrite *jw, char *name, char *value, char *url)
 /* Write an object with member "value" set to value, and if url is non-empty, "url" set to url. */
 {
 jsonWriteObjectStart(jw, name);
 jsonWriteString(jw, "value", value);
 if (isNotEmpty(url))
     jsonWriteString(jw, "url", url);
 jsonWriteObjectEnd(jw);
 }
 
 static void jsonWriteObjectValue(struct jsonWrite *jw, char *name, char *value)
 /* Write an object with one member, "value", set to value, as most Auspice node attributes are
  * formatted. */
@@ -146,60 +219,65 @@
 
 static void makeLineageUrl(char *lineage, char *lineageUrl, size_t lineageUrlSize)
 /* If lineage is not "uploaded sample", make an outbreak.info link to it, otherwise just copy
  * lineage. */
 {
 if (sameString(lineage, "uploaded sample"))
     safecpy(lineageUrl, lineageUrlSize, lineage);
 else
     safef(lineageUrl, lineageUrlSize, OUTBREAK_INFO_URLBASE "%s", lineage);
 }
 
 static void jsonWriteLeafNodeAttributes(struct jsonWrite *jw, char *name,
                                         struct sampleMetadata *met, boolean isUserSample,
                                         char *source, struct hash *sampleUrls,
                                         char **retUserOrOld, char **retNClade, char **retGClade,
-                                        char **retLineage,
+                                        char **retLineage, char **retNLineage,
                                         char **retNCladeUsher, char **retLineageUsher)
 /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash,
  * or may be a new sample from the user.  Set rets for color categories so parent branches can
  * determine their color categories. */
 {
 *retUserOrOld = isUserSample ? "uploaded sample" : source;
 jsonWriteObjectValue(jw, "userOrOld", *retUserOrOld);
 if (met && met->date)
     jsonWriteObjectValue(jw, "date", met->date);
 if (met && met->author)
     {
     jsonWriteObjectValue(jw, "author", met->author);
     // Note: Nextstrain adds paper_url and title when available; they also add author and use
     // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A")
     }
 *retNClade = isUserSample ? "uploaded sample" : (met && met->nClade) ? met->nClade : NULL;
 if (isNotEmpty(*retNClade))
     jsonWriteObjectValue(jw, "Nextstrain_clade", *retNClade);
 *retGClade = isUserSample ? "uploaded sample" : (met && met->gClade) ? met->gClade : NULL;
 if (isNotEmpty(*retGClade))
     jsonWriteObjectValue(jw, "GISAID_clade", *retGClade);
 *retLineage = isUserSample ? "uploaded sample" :
                              (met && met->lineage) ? met->lineage : NULL;
 if (isNotEmpty(*retLineage))
     {
     char lineageUrl[1024];
     makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl);
     jsonWriteObjectValueUrl(jw, "pango_lineage", *retLineage, lineageUrl);
     }
+*retNLineage = isUserSample ? "uploaded sample" : (met && met->nLineage) ? met->nLineage : NULL;
+if (isNotEmpty(*retNLineage))
+    {
+    jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage);
+    }
 if (met && met->epiId)
     jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId);
 if (met && met->gbAcc)
     jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc);
 if (met && met->country)
     jsonWriteObjectValue(jw, "country", met->country);
 if (met && met->division)
     jsonWriteObjectValue(jw, "division", met->division);
 if (met && met->location)
     jsonWriteObjectValue(jw, "location", met->location);
 if (met && met->countryExp)
     jsonWriteObjectValue(jw, "country_exposure", met->countryExp);
 if (met && met->divExp)
     jsonWriteObjectValue(jw, "division_exposure", met->divExp);
 if (met && met->origLab)
@@ -226,82 +304,83 @@
     char *p = strstr(sampleUrl, "subtreeAuspice");
     char *subtreeNum = p + strlen("subtreeAuspice");
     if (p && isdigit(*subtreeNum))
         {
         int num = atoi(subtreeNum);
         char subtreeLabel[1024];
         safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num);
         jsonWriteObjectValueUrl(jw, "subtree", subtreeLabel, sampleUrl);
         }
     else
         jsonWriteObjectValueUrl(jw, "subtree", sampleUrl, sampleUrl);
     }
 }
 
 static void jsonWriteBranchNodeAttributes(struct jsonWrite *jw, char *userOrOld,
-                                          char *nClade, char *gClade, char *lineage,
+                                          char *nClade, char *gClade, char *lineage, char *nLineage,
                                           char *nCladeUsher, char *lineageUsher)
 /* Write elements of node_attrs for a branch. */
 {
 if (userOrOld)
     jsonWriteObjectValue(jw, "userOrOld", userOrOld);
 if (nClade)
     jsonWriteObjectValue(jw, "Nextstrain_clade", nClade);
 if (gClade)
     jsonWriteObjectValue(jw, "GISAID_clade", gClade);
 if (lineage)
     jsonWriteObjectValue(jw, "pango_lineage", lineage);
+if (nLineage)
+    jsonWriteObjectValue(jw, "Nextstrain_lineage", lineage);
 if (nCladeUsher)
     jsonWriteObjectValue(jw, "Nextstrain_clade_usher", nCladeUsher);
 if (lineageUsher)
     jsonWriteObjectValue(jw, "pango_lineage_usher", lineageUsher);
 }
 
 static boolean changesProtein(struct singleNucChange *snc, struct geneInfo *gi,
                               struct seqWindow *gSeqWin,
                               int *retAaStart, char *retOldAa, char *retNewAa)
 /* If snc changes the coding sequence of gene, return TRUE and set ret values accordingly
  * (note amino acid values are single-base not strings). */
 {
 boolean isCodingChange = FALSE;
 if (snc->chromStart < gi->psl->tEnd && snc->chromStart >= gi->psl->tStart)
     {
     struct bed3 gBed3 = { NULL, gSeqWin->seqName, snc->chromStart, snc->chromStart+1 };
     char gAlt[2];
     safef(gAlt, sizeof(gAlt), "%c", snc->newBase);
-    if (!sameString(gi->psl->strand, "+"))
-        errAbort("changesProtein: only works for psl->strand='+', but gene '%s' has strand '%s'",
-                 gi->psl->qName, gi->psl->strand);
     struct vpTx *vpTx = vpGenomicToTranscript(gSeqWin, &gBed3, gAlt, gi->psl, gi->txSeq);
     if (vpTx->start.region == vpExon)
         {
         int aaStart = vpTx->start.txOffset / 3;
         int codonStart = aaStart * 3;
         char newCodon[4];
         safencpy(newCodon, sizeof newCodon, gi->txSeq->dna + codonStart, 3);
         int codonOffset = vpTx->start.txOffset - codonStart;
         assert(codonOffset < sizeof newCodon);
-        newCodon[codonOffset] = snc->newBase;
+        char newNt = (pslOrientation(gi->psl) > 0) ? snc->newBase : ntCompTable[(int)snc->newBase];
+        newCodon[codonOffset] = newNt;
         char newAa = lookupCodon(newCodon);
         char oldAa;
         if (snc->parBase == snc->refBase)
             oldAa = lookupCodon(gi->txSeq->dna + codonStart);
         else
             {
             char oldCodon[4];
             safencpy(oldCodon, sizeof oldCodon, gi->txSeq->dna + codonStart, 3);
-            oldCodon[codonOffset] = snc->parBase;
+            char oldNt = (pslOrientation(gi->psl) > 0) ? snc->parBase : ntCompTable[(int)snc->parBase];
+            oldCodon[codonOffset] = oldNt;
             oldAa = lookupCodon(oldCodon);
             }
         // Watch out for lookupCodon's null-character return value for stop codon; we want '*'.
         if (newAa == '\0')
             newAa = '*';
         if (oldAa == '\0')
             oldAa = '*';
         if (newAa != oldAa)
             {
             isCodingChange = TRUE;
             *retAaStart = aaStart;
             *retOldAa = oldAa;
             *retNewAa = newAa;
             }
         }
@@ -466,160 +545,190 @@
         runLength++;
         if (runLength > maxRunLength)
             {
             maxRunLength = runLength;
             maxRunVal = array[i];
             }
         }
     else
         runLength = 1;
     }
 return (maxRunLength > (arraySize >> 1)) ? maxRunVal : NULL;
 }
 
 static void rTreeToAuspiceJson(struct phyloTree *node, int depth, struct auspiceJsonInfo *aji,
                                char **retUserOrOld, char **retNClade, char **retGClade,
-                               char **retLineage, char **retNCladeUsher, char **retLineageUsher)
+                               char **retLineage, char **retNLineage,
+                               char **retNCladeUsher, char **retLineageUsher)
 /* Write Augur/Auspice V2 JSON for tree.  Enclosing object start and end are written by caller. */
 {
 if (node->priv)
     {
     struct singleNucChange *sncList = node->priv;
     depth += slCount(sncList);
     }
 boolean isUserSample = FALSE;
 if (node->ident->name)
     isUserSample = slNameInList(aji->subtreeUserSampleIds, node->ident->name);
 char *name = node->ident->name;
 struct sampleMetadata *met = name ? metadataForSample(aji->sampleMetadata, name) : NULL;
 if (name)
     jsonWriteString(aji->jw, "name", name);
 else
     jsonWriteStringf(aji->jw, "name", "NODE%d", aji->nodeNum++);
 jsonWriteBranchAttrs(aji->jw, node, aji->geneInfoList, aji->gSeqWin);
 if (node->numEdges > 0)
     {
     jsonWriteListStart(aji->jw, "children");
     char *kidUserOrOld[node->numEdges];
     char *kidNClade[node->numEdges];
     char *kidGClade[node->numEdges];
     char *kidLineage[node->numEdges];
     char *kidNCladeUsher[node->numEdges];
     char *kidLineageUsher[node->numEdges];
+    char *kidNLineage[node->numEdges];
     // Step through children in reverse order because nextstrain/Auspice draws upside-down. :)
     int i;
     for (i = node->numEdges - 1;  i >= 0;  i--)
         {
         jsonWriteObjectStart(aji->jw, NULL);
         rTreeToAuspiceJson(node->edges[i], depth, aji,
                            &kidUserOrOld[i], &kidNClade[i], &kidGClade[i], &kidLineage[i],
-                           &kidNCladeUsher[i], &kidLineageUsher[i]);
+                           &kidNLineage[i], &kidNCladeUsher[i], &kidLineageUsher[i]);
         jsonWriteObjectEnd(aji->jw);
         }
     jsonWriteListEnd(aji->jw);
     if (retUserOrOld)
         *retUserOrOld = majorityMaybe(kidUserOrOld, node->numEdges);
     if (retNClade)
         *retNClade = majorityMaybe(kidNClade, node->numEdges);
     if (retGClade)
         *retGClade = majorityMaybe(kidGClade, node->numEdges);
     if (retLineage)
         *retLineage = majorityMaybe(kidLineage, node->numEdges);
     if (retNCladeUsher)
         *retNCladeUsher = majorityMaybe(kidNCladeUsher, node->numEdges);
     if (retLineageUsher)
         *retLineageUsher = majorityMaybe(kidLineageUsher, node->numEdges);
+    if (retNLineage)
+        *retNLineage = majorityMaybe(kidNLineage, node->numEdges);
     }
 jsonWriteObjectStart(aji->jw, "node_attrs");
 jsonWriteDouble(aji->jw, "div", depth);
 if (node->numEdges == 0)
     jsonWriteLeafNodeAttributes(aji->jw, name, met, isUserSample, aji->source, aji->sampleUrls,
-                                retUserOrOld, retNClade, retGClade, retLineage,
+                                retUserOrOld, retNClade, retGClade, retLineage, retNLineage,
                                 retNCladeUsher, retLineageUsher);
 else if (retUserOrOld && retGClade && retLineage)
     jsonWriteBranchNodeAttributes(aji->jw, *retUserOrOld, *retNClade, *retGClade, *retLineage,
-                                  *retNCladeUsher, *retLineageUsher);
+                                  *retNLineage, *retNCladeUsher, *retLineageUsher);
 jsonWriteObjectEnd(aji->jw);
 }
 
 struct phyloTree *phyloTreeNewNode(char *name)
 /* Alloc & return a new node with no children. */
 {
 struct phyloTree *node;
 AllocVar(node);
 AllocVar(node->ident);
 node->ident->name = cloneString(name);
 return node;
 }
 
 struct geneInfo *getGeneInfoList(char *bigGenePredFile, struct dnaSeq *refGenome)
 /* If config.ra has a source of gene annotations, then return the gene list. */
 {
 struct geneInfo *geneInfoList = NULL;
-if (isNotEmpty(bigGenePredFile) && fileExists(bigGenePredFile))
+struct bbiFile *bbi = NULL;
+struct errCatch *errCatch = errCatchNew();
+if (isNotEmpty(bigGenePredFile))
+    {
+    if (errCatchStart(errCatch))
+        {
+        bbi = bigBedFileOpen(bigGenePredFile);
+        }
+    errCatchEnd(errCatch);
+    }
+if (bbi)
     {
-    struct bbiFile *bbi = bigBedFileOpen(bigGenePredFile);
     struct lm *lm = lmInit(0);
     struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, refGenome->name, 0,
                                                              refGenome->size, 0, lm);
     for (bb = bbList;  bb != NULL;  bb = bb->next)
         {
         struct genePredExt *gp = genePredFromBigGenePred(refGenome->name, bb);
-        if (gp->strand[0] != '+')
-            errAbort("getGeneInfoList: strand must be '+' but got '%s' for gene %s",
-                     gp->strand, gp->name);
         int txLen = 0;
         int ex;
         for (ex = 0;  ex < gp->exonCount;  ex++)
             txLen += (gp->exonEnds[ex] - gp->exonStarts[ex]);
         char *seq = needMem(txLen+1);
         int txOffset = 0;
         for (ex = 0;  ex < gp->exonCount;  ex++)
             {
             int exonLen = gp->exonEnds[ex] - gp->exonStarts[ex];
             safencpy(seq+txOffset, txLen+1-txOffset, refGenome->dna+gp->exonStarts[ex], exonLen);
             txOffset += exonLen;
             }
+        if (sameString(gp->strand, "-"))
+            reverseComplement(seq, txLen);
         struct geneInfo *gi;
         AllocVar(gi);
         gi->psl = genePredToPsl((struct genePred *)gp, refGenome->size, txLen);
+        gi->psl->qName = cloneString(gp->name2);
         gi->txSeq = newDnaSeq(seq, txLen, gp->name2);
         slAddHead(&geneInfoList, gi);
         }
     lmCleanup(&lm);
     bigBedFileClose(&bbi);
     }
 slReverse(&geneInfoList);
 return geneInfoList;
 }
 
 void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList,
                        struct seqWindow *gSeqWin, struct hash *sampleMetadata,
                        struct hash *sampleUrls, char *jsonFile, char *source)
 /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format
  * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */
 {
 struct phyloTree *tree = sti->subtree;
 FILE *outF = mustOpen(jsonFile, "w");
-fputs("{ \"version\": \"v2\", ", outF);
-writeAuspiceMeta(outF, sti->subtreeUserSampleIds, source);
-// The meta part is mostly constant & easier to just write out, but jsonWrite is better for the
-// nested tree structure.
 struct jsonWrite *jw = jsonWriteNew();
+jsonWriteObjectStart(jw, NULL);
+jsonWriteString(jw, "version", "v2");
+//#*** FIXME: TODO: either pass in along with sampleMetadata, or better yet, compute while building
+//#*** tree object and then write the header object.
+struct slName *colorFields = NULL;
+if (sameString(db, "wuhCor1"))
+    {
+    slNameAddHead(&colorFields, "country");
+    slNameAddHead(&colorFields, "Nextstrain_clade_usher");
+    slNameAddHead(&colorFields, "pango_lineage_usher");
+    slNameAddHead(&colorFields, "Nextstrain_clade");
+    slNameAddHead(&colorFields, "pango_lineage");
+    }
+else
+    {
+    slNameAddHead(&colorFields, "country");
+    slNameAddHead(&colorFields, "Nextstrain_lineage");
+    }
+//#*** END FIXME
+writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, colorFields, geneInfoList,
+                 gSeqWin->end);
 jsonWriteObjectStart(jw, "tree");
 int nodeNum = 10000; // Auspice.us starting node number for newick -> json
 int depth = 0;
 
 // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree
 struct phyloTree *root = phyloTreeNewNode("wrapper");
 phyloAddEdge(root, tree);
 tree = root;
 struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin,
                                sampleMetadata, sampleUrls, nodeNum, source };
-rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL);
-jsonWriteObjectEnd(jw);
+rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+jsonWriteObjectEnd(jw); // tree
+jsonWriteObjectEnd(jw); // top-level object
 fputs(jw->dy->string, outF);
 jsonWriteFree(&jw);
-fputs("}", outF);
 carefulClose(&outF);
 }