d903a7409978ea81cc0f2e6e34687319f7d9d222
angie
  Wed Jul 28 15:50:24 2021 -0700
When extracting samples from the tree with matUtils, add the new --usher-clades-txt option and create empty placementInfo's to hold the results of parsing clades.txt so we can show clades/lineages according to the tree in the result summary table.

diff --git src/hg/hgPhyloPlace/runUsher.c src/hg/hgPhyloPlace/runUsher.c
index cfcd2fe..3052cf9 100644
--- src/hg/hgPhyloPlace/runUsher.c
+++ src/hg/hgPhyloPlace/runUsher.c
@@ -854,55 +854,71 @@
 int subtreeCount = processOutDirFiles(results, tnOutDir.forCgi, &singleSubtreeTn, &singleSubtreeMuts,
                                       subtreeTns, subtreeMuts, MAX_SUBTREES);
 if (singleSubtreeTn == NULL)
     {
     warn("Sorry, there was a problem running usher.  "
          "Please ask genome-www@soe.ucsc.edu to take a look at %s.", tnStderr.forCgi);
     return NULL;
     }
 results->subtreeInfoList = parseSubtrees(subtreeCount, singleSubtreeTn, singleSubtreeMuts,
                                          subtreeTns, subtreeMuts, userSampleIds, condensedNodes);
 results->singleSubtreeInfo = results->subtreeInfoList;
 results->subtreeInfoList = results->subtreeInfoList->next;
 return results;
 }
 
+static void addEmptyPlacements(struct slName *sampleIds, struct hash *samplePlacements)
+/* Parsing an usher-style clades.txt file from matUtils extract requires samplePlacements to
+ * have placementInfo for each sample.  When running usher, those are added when we parse
+ * usher stderr; when running matUtils, just allocate one for each sample. */
+{
+struct slName *sample;
+for (sample = sampleIds;  sample != NULL;  sample = sample->next)
+    {
+    struct placementInfo *info;
+    AllocVar(info);
+    hashAdd(samplePlacements, sample->name, info);
+    info->sampleId = cloneString(sample->name);
+    }
+}
+
 struct usherResults *runMatUtilsExtractSubtrees(char *matUtilsPath, char *protobufPath,
                                                 int subtreeSize, struct slName *sampleIds,
                                                 struct hash *condensedNodes, int *pStartTime)
 /* Open a pipe from Yatish Turakhia and Jakob McBroome's matUtils extract to extract subtrees
  * containing sampleIds, save resulting subtrees to trash files, return subtree results.
  * Caller must ensure that sampleIds are names of leaves in the protobuf tree. */
 {
 struct usherResults *results = usherResultsNew();
 char subtreeSizeStr[16];
 safef(subtreeSizeStr, sizeof subtreeSizeStr, "%d", subtreeSize);
 char *numThreadsStr = "16";
 struct tempName tnSamples;
 trashDirFile(&tnSamples, "ct", "matUtilsExtractSamples", ".txt");
 FILE *f = mustOpen(tnSamples.forCgi, "w");
 struct slName *sample;
 for (sample = sampleIds;  sample != NULL;  sample = sample->next)
     fprintf(f, "%s\n", sample->name);
 carefulClose(&f);
 struct tempName tnOutDir;
 trashDirFile(&tnOutDir, "ct", "matUtils_outdir", ".dir");
 char *cmd[] = { matUtilsPath, "extract", "-i", protobufPath, "-d", tnOutDir.forCgi,
                 "-s", tnSamples.forCgi,
                 "-x", subtreeSizeStr, "-X", SINGLE_SUBTREE_SIZE, "-T", numThreadsStr,
-                NULL };
+                "--usher-clades-txt", NULL };
 char **cmds[] = { cmd, NULL };
 struct tempName tnStderr;
 trashDirFile(&tnStderr, "ct", "matUtils_stderr", ".txt");
 struct pipeline *pl = pipelineOpen(cmds, pipelineRead, NULL, tnStderr.forCgi);
 pipelineClose(&pl);
 reportTiming(pStartTime, "run matUtils");
+addEmptyPlacements(sampleIds, results->samplePlacements);
 struct tempName *singleSubtreeTn = NULL, *subtreeTns[MAX_SUBTREES];
 struct variantPathNode *singleSubtreeMuts = NULL, *subtreeMuts[MAX_SUBTREES];
 int subtreeCount = processOutDirFiles(results, tnOutDir.forCgi, &singleSubtreeTn, &singleSubtreeMuts,
                                       subtreeTns, subtreeMuts, MAX_SUBTREES);
 results->subtreeInfoList = parseSubtrees(subtreeCount, singleSubtreeTn, singleSubtreeMuts,
                                          subtreeTns, subtreeMuts, sampleIds, condensedNodes);
 results->singleSubtreeInfo = results->subtreeInfoList;
 results->subtreeInfoList = results->subtreeInfoList->next;
 return results;
 }