0b186effb2d2b536d2c280ec31bec6e153e6bd7e
angie
  Tue May 21 13:26:39 2024 -0700
Add support for uploaded names/IDs for the multi-ref/tree organism.ra case.  Add new config option anchorSamples, pass to usher-sampled/matUtils/server if present.
anchorSamples is a file with names of sequences that should always be included in the subtree to provide some larger-scale context, e.g. well-known vaccine or reference material strains.  Influenza user request.

diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h
index 96f8552..ac8cc21 100644
--- src/hg/hgPhyloPlace/phyloPlace.h
+++ src/hg/hgPhyloPlace/phyloPlace.h
@@ -156,39 +156,40 @@
     struct mmHash *mmh;     // Either NULL (if hash is non-NULL) or a memory-mapped hash.
     struct hash *hash;      // Either NULL (if mmh is non-NULL) or a regular hash.
     };
 
 struct tempName *vcfFromFasta(struct lineFile *lf, char *org, char *db, struct dnaSeq *refGenome,
                               struct slName **maskSites, struct hashOrMmHash *treeNames,
                               struct slName **retSampleIds, struct seqInfo **retSeqInfo,
                               struct slPair **retFailedSeqs, struct slPair **retFailedPsls,
                               int *pStartTime);
 /* Read in FASTA from lf and make sure each item has a reasonable size and not too high
  * percentage of N's.  Align to reference, extract SNVs from alignment, and save as VCF
  * with sample genotype columns. */
 
 struct usherResults *runUsher(char *org, char *usherPath, char *usherAssignmentsPath, char *vcfFile,
                               int subtreeSize, struct slName **pUserSampleIds,
-                              struct treeChoices *treeChoices, int *pStartTime);
+                              struct treeChoices *treeChoices, char *anchorFile, int *pStartTime);
 /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and
  * subtrees to trash files, return list of slRef to struct tempName for the trash files
  * and parse other results out of stderr output.  The usher-sampled version of usher might
  * modify userSampleIds, adding a prefix if a sample with the same name is already in the tree. */
 
 struct usherResults *runMatUtilsExtractSubtrees(char *org, char *matUtilsPath, char *protobufPath,
                                                 int subtreeSize, struct slName *sampleIds,
-                                                struct treeChoices *treeChoices, int *pStartTime);
+                                                struct treeChoices *treeChoices, char *anchorFile,
+                                                int *pStartTime);
 /* Open a pipe from Yatish Turakhia and Jakob McBroome's matUtils extract to extract subtrees
  * containing sampleIds, save resulting subtrees to trash files, return subtree results.
  * Caller must ensure that sampleIds are names of leaves in the protobuf tree. */
 
 boolean serverIsConfigured(char *org);
 /* Return TRUE if all necessary configuration settings are in place to run usher-sampled-server. */
 
 boolean serverIsRunning(char *org, FILE *errFile);
 /* Return TRUE if we can find a PID for server and that PID looks alive according to /proc. */
 
 boolean startServer(char *org, struct treeChoices *treeChoices, FILE *errFile);
 /* Start up an usher-sampled-server process to run in the background. */
 
 void serverReloadProtobufs(char *org, struct treeChoices *treeChoices);
 /* Send a reload command and list of protobufs for org to usher server. */