5dd22774eea1a954815f0a32797d5a1dbfdcecc8
angie
  Tue Sep 10 14:01:20 2024 -0700
When writing the auspice JSON tree, if a sample is found in the optional config file anchorSamples, then add an empty 'vaccine' attribute to get the X icon in auspice display.

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index 0fb9e22..97c74ff 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -361,30 +361,31 @@
 else
     safef(lineageUrl, lineageUrlSize, OUTBREAK_INFO_URLBASE "%s", lineage);
 }
 
 struct auspiceJsonInfo
 /* Collection of a bunch of things used when writing out auspice JSON for a subtree, so the
  * recursive function doesn't need a dozen args. */
     {
     struct jsonWrite *jw;
     struct slName *subtreeUserSampleIds;  // Subtree node names for user samples (not from big tree)
     struct geneInfo *geneInfoList;        // Transcript seq & alignment for predicting AA change
     struct seqWindow *gSeqWin;            // Reference genome seq for predicting AA change
     struct sampleMetadataStore *sampleMetadata; // Sample metadata for decorating tree
     struct hash *sampleUrls;              // URLs for samples, if applicable
     struct hash *samplePlacements;        // Sample placement info e.g. clade/lineage from usher
+    struct hash *anchorSamples;           // Sample names found in setting anchorSamples (if any)
     int nodeNum;                          // For generating sequential node ID (in absence of name)
     char *source;                         // Source of non-user sequences in tree (GISAID or public)
     };
 
 static void jsonWriteLeafNodeAttributes(struct auspiceJsonInfo *aji, char *name,
                                         boolean isUserSample, boolean isRsv,
                                         int branchAttrCount, char **branchAttrCols,
                                         char **branchAttrVals)
 /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash,
  * or may be a new sample from the user.  Set rets for color categories so parent branches can
  * determine their color categories. */
 {
 char *userOrOld = isUserSample ? "uploaded sample" : aji->source;
 jsonWriteObjectValue(aji->jw, "userOrOld", userOrOld);
 int i;
@@ -451,30 +452,37 @@
                     branchAttrVals[i] = pi->pangoLineage;
                     char lineageUrl[1024];
                     makeLineageUrl(pi->pangoLineage, lineageUrl, sizeof lineageUrl);
                     jsonWriteObjectValueUrl(aji->jw, branchAttrCols[i], branchAttrVals[i],
                                             lineageUrl);
                     wroteLink = TRUE;
                     }
                 else if (sameString(branchAttrCols[i], "GCC_usher"))
                     branchAttrVals[i] = pi->pangoLineage;
                 }
             }
         if (!wroteLink)
             jsonWriteObjectValue(aji->jw, branchAttrCols[i], branchAttrVals[i]);
         }
     }
+// If sample is in anchorSamples, write an empty "vaccine" object attribute to get the "X" icon
+// in auspice.
+if (aji->anchorSamples && hashLookup(aji->anchorSamples, name))
+    {
+    jsonWriteObjectStart(aji->jw, "vaccine");
+    jsonWriteObjectEnd(aji->jw);
+    }
 char *sampleUrl = (aji->sampleUrls && name) ? hashFindVal(aji->sampleUrls, name) : NULL;
 if (isNotEmpty(sampleUrl))
     {
     char *p = strstr(sampleUrl, "subtreeAuspice");
     char *subtreeNum = p + strlen("subtreeAuspice");
     if (p && isdigit(*subtreeNum))
         {
         int num = atoi(subtreeNum);
         char subtreeLabel[1024];
         safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num);
         jsonWriteObjectValueUrl(aji->jw, "subtree", subtreeLabel, sampleUrl);
         }
     else
         jsonWriteObjectValueUrl(aji->jw, "subtree", sampleUrl, sampleUrl);
     }
@@ -884,54 +892,75 @@
 if (isNotEmpty(branchAttrSetting))
     {
     attrList = slNameListFromComma(branchAttrSetting);
     branchAttrCount += slCount(attrList);
     }
 char **branchAttrCols = NULL;
 AllocArray(branchAttrCols, branchAttrCount);
 branchAttrCols[0] = cloneString("userOrOld");
 int i;
 for (i = 1, attr = attrList;  i < branchAttrCount && attr != NULL;  i++, attr = attr->next)
     branchAttrCols[i] = cloneString(trimSpaces(attr->name));
 *retBranchAttrCols = branchAttrCols;
 return branchAttrCount;
 }
 
+struct hash *getAnchorSamples(char *org, char *db)
+/* If config setting/file anchorSamples exists then make a hash with its names for quick
+ * lookup. */
+{
+struct hash *anchorSamples = NULL;
+char *anchorFile = phyloPlaceRefSettingPath(org, db, "anchorSamples");
+if (anchorFile && fileExists(anchorFile))
+    {
+    anchorSamples = hashNew(0);
+    struct lineFile *lf = lineFileOpen(anchorFile, TRUE);
+    char *line;
+    while (lineFileNextReal(lf, &line))
+        hashAddInt(anchorSamples, line, 1);
+    lineFileClose(&lf);
+    }
+return anchorSamples;
+}
+
 void treeToAuspiceJson(struct subtreeInfo *sti, char *org, char *db, struct geneInfo *geneInfoList,
                        struct seqWindow *gSeqWin, struct sampleMetadataStore *sampleMetadata,
                        struct hash *sampleUrls, struct hash *samplePlacements,
                        char *jsonFile, char *source)
 /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format
  * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */
 {
 struct phyloTree *tree = sti->subtree;
 FILE *outF = mustOpen(jsonFile, "w");
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteObjectStart(jw, NULL);
 jsonWriteString(jw, "version", "v2");
 boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) ||
                  startsWith("RGCC", db));
 boolean isFlu = (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db));
 writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, org, db, geneInfoList,
                  gSeqWin->end, isRsv, isFlu);
 jsonWriteObjectStart(jw, "tree");
 int nodeNum = 10000; // Auspice.us starting node number for newick -> json
 int depth = 0;
 
+// Hash names in setting anchorSamples if found
+struct hash *anchorSamples = getAnchorSamples(org, db);
+
 // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree
 struct phyloTree *root = phyloTreeNewNode("wrapper");
 phyloAddEdge(root, tree);
 tree = root;
 struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin,
-                               sampleMetadata, sampleUrls, samplePlacements, nodeNum, source };
-
+                               sampleMetadata, sampleUrls, samplePlacements, anchorSamples,
+                               nodeNum, source };
 
 char **branchAttrCols = NULL;
 int branchAttrCount = getBranchAttrCols(org, db, &branchAttrCols);
 rTreeToAuspiceJson(tree, depth, &aji, NULL, isRsv, branchAttrCount, branchAttrCols, NULL);
 jsonWriteObjectEnd(jw); // tree
 jsonWriteObjectEnd(jw); // top-level object
 fputs(jw->dy->string, outF);
 jsonWriteFree(&jw);
 carefulClose(&outF);
 }