68f95eec388eb3895787fc885fa01fe7ee3447ac angie Thu Aug 26 11:36:26 2021 -0700 Recently usher started prepending node_ to numeric internal node names stored in the protobuf, so node names in usher outputs are now node_<number> instead of just <number>. However, the protobuf still has numeric names, so it may be necessary to strip the prefix when looking up bigTree nodes. diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h index db22d6f..5c3de07 100644 --- src/hg/hgPhyloPlace/phyloPlace.h +++ src/hg/hgPhyloPlace/phyloPlace.h @@ -14,30 +14,34 @@ #define PHYLOPLACE_DATA_DIR "hgPhyloPlaceData" // Allow users to upload a lot of sequences, but put limits on how much detail we'll show and // how many custom tracks we'll create. #define MAX_SUBTREE_BUTTONS 5 #define MAX_SEQ_DETAILS 100 #define MAX_SUBTREE_CTS 10 // For usher's -K option (single subtree): #define SINGLE_SUBTREE_SIZE "1000" #define NEXTSTRAIN_DRAG_DROP_DOC "https://docs.nextstrain.org/projects/auspice/en/latest/advanced-functionality/drag-drop-csv-tsv.html" #define OUTBREAK_INFO_URLBASE "https://outbreak.info/situation-reports?pango=" +// usher now preprends "node_" to node numbers when parsing protobuf, although they're still stored +// numeric in the protobuf. +#define USHER_NODE_PREFIX "node_" + struct treeChoices /* Phylogenetic tree versions for the user to choose from. */ { char **protobufFiles; // Mutation annotated tree files in protobuf format for UShER char **metadataFiles; // Sample metadata a la GISAID's nextmeta download option char **sources; // GISAID or public char **descriptions; // Menu labels to describe the options to the user char **aliasFiles; // Two-column files associating IDs/aliases with full tree names int count; // Number of choices (and size of each array) }; struct seqInfo /* User sequences, alignments and statistics */ { struct seqInfo *next; @@ -194,30 +198,34 @@ /* Look up sampleId in sampleMetadata, by accession if sampleId seems to include an accession. */ struct phyloTree *phyloPruneToIds(struct phyloTree *node, struct slName *sampleIds); /* Prune all descendants of node that have no leaf descendants in sampleIds. */ char *phyloPlaceDbSetting(char *db, char *settingName); /* Return a setting from hgPhyloPlaceData/<db>/config.ra or NULL if not found. */ char *phyloPlaceDbSettingPath(char *db, char *settingName); /* Return path to a file named by a setting from hgPhyloPlaceData/<db>/config.ra, * or NULL if not found. (Append hgPhyloPlaceData/<db>/ to the beginning of relative path) */ struct treeChoices *loadTreeChoices(char *db); /* If <db>/config.ra specifies a treeChoices file, load it up, else return NULL. */ +boolean isInternalNodeName(char *nodeName, int minNewNode); +/* Return TRUE if nodeName looks like an internal node ID from the protobuf tree, i.e. is numeric + * or <USHER_NODE_PREFIX>_<number> and, if minNewNode > 0, number is less than minNewNode. */ + void reportTiming(int *pStartTime, char *message); /* Print out a report to stderr of how much time something took. */ boolean hgPhyloPlaceEnabled(); /* Return TRUE if hgPhyloPlace is enabled in hg.conf and db wuhCor1 exists. */ char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf, boolean doMeasureTiming, int subtreeSize, int fontHeight, boolean *retSuccess); /* Given a lineFile that contains either FASTA, VCF, or a list of sequence names/ids: * If FASTA/VCF, then prepare VCF for usher; if that goes well then run usher, report results, * make custom track files and return the top-level custom track file. * If list of seq names/ids, then attempt to find their full names in the protobuf, run matUtils * to make subtrees, show subtree results, and return NULL. Set retSuccess to TRUE if we were * able to get at least some results for the user's input. */