68f95eec388eb3895787fc885fa01fe7ee3447ac
angie
  Thu Aug 26 11:36:26 2021 -0700
Recently usher started prepending node_ to numeric internal node names stored in the protobuf, so node names in usher outputs are now node_<number> instead of just <number>.  However, the protobuf still has numeric names, so it may be necessary to strip the prefix when looking up bigTree nodes.

diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h
index db22d6f..5c3de07 100644
--- src/hg/hgPhyloPlace/phyloPlace.h
+++ src/hg/hgPhyloPlace/phyloPlace.h
@@ -14,30 +14,34 @@
 
 #define PHYLOPLACE_DATA_DIR "hgPhyloPlaceData"
 
 // Allow users to upload a lot of sequences, but put limits on how much detail we'll show and
 // how many custom tracks we'll create.
 #define MAX_SUBTREE_BUTTONS 5
 #define MAX_SEQ_DETAILS 100
 #define MAX_SUBTREE_CTS 10
 
 // For usher's -K option (single subtree):
 #define SINGLE_SUBTREE_SIZE "1000"
 
 #define NEXTSTRAIN_DRAG_DROP_DOC "https://docs.nextstrain.org/projects/auspice/en/latest/advanced-functionality/drag-drop-csv-tsv.html"
 #define OUTBREAK_INFO_URLBASE "https://outbreak.info/situation-reports?pango="
 
+// usher now preprends "node_" to node numbers when parsing protobuf, although they're still stored
+// numeric in the protobuf.
+#define USHER_NODE_PREFIX "node_"
+
 struct treeChoices
 /* Phylogenetic tree versions for the user to choose from. */
 {
     char **protobufFiles;      // Mutation annotated tree files in protobuf format for UShER
     char **metadataFiles;      // Sample metadata a la GISAID's nextmeta download option
     char **sources;            // GISAID or public
     char **descriptions;       // Menu labels to describe the options to the user
     char **aliasFiles;         // Two-column files associating IDs/aliases with full tree names
     int count;                 // Number of choices (and size of each array)
 };
 
 struct seqInfo
 /* User sequences, alignments and statistics */
 {
     struct seqInfo *next;
@@ -194,30 +198,34 @@
 /* Look up sampleId in sampleMetadata, by accession if sampleId seems to include an accession. */
 
 struct phyloTree *phyloPruneToIds(struct phyloTree *node, struct slName *sampleIds);
 /* Prune all descendants of node that have no leaf descendants in sampleIds. */
 
 char *phyloPlaceDbSetting(char *db, char *settingName);
 /* Return a setting from hgPhyloPlaceData/<db>/config.ra or NULL if not found. */
 
 char *phyloPlaceDbSettingPath(char *db, char *settingName);
 /* Return path to a file named by a setting from hgPhyloPlaceData/<db>/config.ra,
  * or NULL if not found.  (Append hgPhyloPlaceData/<db>/ to the beginning of relative path) */
 
 struct treeChoices *loadTreeChoices(char *db);
 /* If <db>/config.ra specifies a treeChoices file, load it up, else return NULL. */
 
+boolean isInternalNodeName(char *nodeName, int minNewNode);
+/* Return TRUE if nodeName looks like an internal node ID from the protobuf tree, i.e. is numeric
+ * or <USHER_NODE_PREFIX>_<number> and, if minNewNode > 0, number is less than minNewNode. */
+
 void reportTiming(int *pStartTime, char *message);
 /* Print out a report to stderr of how much time something took. */
 
 boolean hgPhyloPlaceEnabled();
 /* Return TRUE if hgPhyloPlace is enabled in hg.conf and db wuhCor1 exists. */
 
 char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
                         boolean doMeasureTiming, int subtreeSize, int fontHeight,
                         boolean *retSuccess);
 /* Given a lineFile that contains either FASTA, VCF, or a list of sequence names/ids:
  * If FASTA/VCF, then prepare VCF for usher; if that goes well then run usher, report results,
  * make custom track files and return the top-level custom track file.
  * If list of seq names/ids, then attempt to find their full names in the protobuf, run matUtils
  * to make subtrees, show subtree results, and return NULL.  Set retSuccess to TRUE if we were
  * able to get at least some results for the user's input. */