66a1a82f72f9f3295bb4007b1b5681013976a694
angie
  Wed Mar 20 15:20:27 2024 -0700
Overhaul struct sampleMetadata: instead of one struct member per anticipated column, use an array of column values and a shared array of column names.  As we support more species, the available metadata gets more divergent so this needs to be more general.  We still need to make aggregation of attributes on branches more generic / config-driven.

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index 1b2d661..613fc83 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -363,93 +363,85 @@
 }
 
 static void jsonWriteLeafNodeAttributes(struct jsonWrite *jw, char *name,
                                         struct sampleMetadata *met, boolean isUserSample,
                                         char *source, struct hash *sampleUrls,
                                         struct hash *samplePlacements, boolean isRsv,
                                         char **retUserOrOld, char **retNClade, char **retGClade,
                                         char **retLineage, char **retNLineage,
                                         char **retNCladeUsher, char **retLineageUsher)
 /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash,
  * or may be a new sample from the user.  Set rets for color categories so parent branches can
  * determine their color categories. */
 {
 *retUserOrOld = isUserSample ? "uploaded sample" : source;
 jsonWriteObjectValue(jw, "userOrOld", *retUserOrOld);
-if (met && met->date)
-    jsonWriteObjectValue(jw, "date", met->date);
-if (met && met->author)
+*retNClade = *retGClade = *retLineage = *retNLineage = *retNCladeUsher = *retLineageUsher = "";
+if (met != NULL)
     {
-    jsonWriteObjectValue(jw, "author", met->author);
-    // Note: Nextstrain adds paper_url and title when available; they also add author and use
-    // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A")
-    }
-struct placementInfo *pi = (isUserSample && name) ? hashFindVal(samplePlacements, name) : NULL;
-
-*retNClade = (met && met->nClade) ? met->nClade : isUserSample ? "uploaded sample" : NULL;
-if (isNotEmpty(*retNClade))
-    jsonWriteObjectValue(jw, (isRsv ? "goya_nextclade" : "Nextstrain_clade"), *retNClade);
-*retGClade = (met && met->gClade) ? met->gClade : isUserSample ? "uploaded sample" : NULL;
-if (isNotEmpty(*retGClade))
-    jsonWriteObjectValue(jw, (isRsv ? "GCC_assigned_2023-11" : "GISAID_clade"), *retGClade);
-*retLineage =  (met && met->lineage) ? met->lineage : isUserSample ? "uploaded sample" : NULL;
-if (isNotEmpty(*retLineage))
+    int i;
+    for (i = 0;  i < met->columnCount;  i++)
+        {
+        char *colName = met->columnNames[i];
+        if (sameString(colName, "pangolin_lineage"))
+            {
+            colName = "pango_lineage";
+            if (isNotEmpty(met->columnValues[i]))
                 {
                 char lineageUrl[1024];
-    makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl);
-    jsonWriteObjectValueUrl(jw, (isRsv ? "GCC_nextclade" : "pango_lineage"),
-                            *retLineage, lineageUrl);
-    }
-*retNLineage = (met && met->nLineage) ? met->nLineage : isUserSample ? "uploaded sample" : NULL;
-if (isNotEmpty(*retNLineage))
-    {
-    jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage);
-    }
-if (met && met->epiId)
-    jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId);
-if (met && met->gbAcc)
-    jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc);
-if (met && met->country)
-    jsonWriteObjectValue(jw, "country", met->country);
-if (met && met->division)
-    jsonWriteObjectValue(jw, "division", met->division);
-if (met && met->location)
-    jsonWriteObjectValue(jw, "location", met->location);
-if (met && met->countryExp)
-    jsonWriteObjectValue(jw, "country_exposure", met->countryExp);
-if (met && met->divExp)
-    jsonWriteObjectValue(jw, "division_exposure", met->divExp);
-if (met && met->origLab)
-    jsonWriteObjectValue(jw, "originating_lab", met->origLab);
-if (met && met->subLab)
-    jsonWriteObjectValue(jw, "submitting_lab", met->subLab);
-if (met && met->region)
-    jsonWriteObjectValue(jw, "region", met->region);
-*retNCladeUsher = (pi && pi->nextClade) ? pi->nextClade :
-                  (met && met->nCladeUsher) ? met->nCladeUsher :
-                  isUserSample ? "uploaded sample" : NULL;
-if (isNotEmpty(*retNCladeUsher))
+                makeLineageUrl(met->columnValues[i], lineageUrl, sizeof lineageUrl);
+                jsonWriteObjectValueUrl(jw, colName, met->columnValues[i], lineageUrl);
+                }
+            else if (isNotEmpty(met->columnValues[i]))
+                jsonWriteObjectValue(jw, colName, met->columnValues[i]);
+            }
+        else
+            jsonWriteObjectValue(jw, colName, met->columnValues[i]);
+        // Some columns get passed up for aggregation so we can color internal nodes/branches.
+        if (sameString(colName, "Nextstrain_clade") || sameString(colName, "goya_nextclade"))
+            *retNClade = met->columnValues[i];
+        else if (sameString(colName, "GISAID_clade") || sameString(colName, "GCC_assigned_2023-11"))
+            *retGClade = met->columnValues[i];
+        else if (sameString(colName, "pango_lineage") || sameString(colName, "GCC_nextclade"))
+            *retLineage = met->columnValues[i];
+        else if (sameString(colName, "Nextstrain_clade_usher") || sameString(colName, "goya_usher"))
+            *retNCladeUsher = met->columnValues[i];
+        else if (sameString(colName, "pango_lineage_usher") || sameString(colName, "GCC_usher"))
+            *retLineageUsher = met->columnValues[i];
+        }
+    }
+else if (isUserSample)
+    {
+    struct placementInfo *pi = name ? hashFindVal(samplePlacements, name) : NULL;
+    //#*** Really need to know what columns are present in the absence of met, so we can avoid
+    //#*** writing objects that shouldn't be there for this org.
+    *retNClade = *retGClade = *retLineage = *retNLineage = "uploaded sample";
+    jsonWriteObjectValue(jw, isRsv ? "goya_nextclade" : "Nextstrain_clade", "uploaded sample");
+    jsonWriteObjectValue(jw, isRsv ? "GCC_assigned_2023-11" : "GISAID_clade", "uploaded sample");
+    jsonWriteObjectValue(jw, isRsv ? "GCC_nextclade" : "pango_lineage", "uploaded sample");
+    jsonWriteObjectValue(jw, "Nextstrain_lineage", "uploaded sample");
+    *retNCladeUsher = (pi && pi->nextClade) ? pi->nextClade : "uploaded sample";
     jsonWriteObjectValue(jw, (isRsv ? "goya_usher" : "Nextstrain_clade_usher"), *retNCladeUsher);
-*retLineageUsher = (pi && pi->pangoLineage) ? pi->pangoLineage :
-                   (met && met->lineageUsher) ? met->lineageUsher :
-                   isUserSample ? "uploaded sample" : NULL;
-if (isNotEmpty(*retLineageUsher))
+    *retLineageUsher = (pi && pi->pangoLineage) ? pi->pangoLineage : "uploaded sample";
+    if (isRsv)
+        jsonWriteObjectValue(jw, "GCC_usher", *retLineageUsher);
+    else
         {
         char lineageUrl[1024];
         makeLineageUrl(*retLineageUsher, lineageUrl, sizeof lineageUrl);
-    jsonWriteObjectValueUrl(jw, (isRsv ? "GCC_usher" : "pango_lineage_usher"),
-                            *retLineageUsher, lineageUrl);
+        jsonWriteObjectValueUrl(jw, "pango_lineage_usher", *retLineageUsher, lineageUrl);
+        }
     }
 char *sampleUrl = (sampleUrls && name) ? hashFindVal(sampleUrls, name) : NULL;
 if (isNotEmpty(sampleUrl))
     {
     char *p = strstr(sampleUrl, "subtreeAuspice");
     char *subtreeNum = p + strlen("subtreeAuspice");
     if (p && isdigit(*subtreeNum))
         {
         int num = atoi(subtreeNum);
         char subtreeLabel[1024];
         safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num);
         jsonWriteObjectValueUrl(jw, "subtree", subtreeLabel, sampleUrl);
         }
     else
         jsonWriteObjectValueUrl(jw, "subtree", sampleUrl, sampleUrl);