0b186effb2d2b536d2c280ec31bec6e153e6bd7e angie Tue May 21 13:26:39 2024 -0700 Add support for uploaded names/IDs for the multi-ref/tree organism.ra case. Add new config option anchorSamples, pass to usher-sampled/matUtils/server if present. anchorSamples is a file with names of sequences that should always be included in the subtree to provide some larger-scale context, e.g. well-known vaccine or reference material strains. Influenza user request. diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h index 96f8552..ac8cc21 100644 --- src/hg/hgPhyloPlace/phyloPlace.h +++ src/hg/hgPhyloPlace/phyloPlace.h @@ -156,39 +156,40 @@ struct mmHash *mmh; // Either NULL (if hash is non-NULL) or a memory-mapped hash. struct hash *hash; // Either NULL (if mmh is non-NULL) or a regular hash. }; struct tempName *vcfFromFasta(struct lineFile *lf, char *org, char *db, struct dnaSeq *refGenome, struct slName **maskSites, struct hashOrMmHash *treeNames, struct slName **retSampleIds, struct seqInfo **retSeqInfo, struct slPair **retFailedSeqs, struct slPair **retFailedPsls, int *pStartTime); /* Read in FASTA from lf and make sure each item has a reasonable size and not too high * percentage of N's. Align to reference, extract SNVs from alignment, and save as VCF * with sample genotype columns. */ struct usherResults *runUsher(char *org, char *usherPath, char *usherAssignmentsPath, char *vcfFile, int subtreeSize, struct slName **pUserSampleIds, - struct treeChoices *treeChoices, int *pStartTime); + struct treeChoices *treeChoices, char *anchorFile, int *pStartTime); /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and * subtrees to trash files, return list of slRef to struct tempName for the trash files * and parse other results out of stderr output. The usher-sampled version of usher might * modify userSampleIds, adding a prefix if a sample with the same name is already in the tree. */ struct usherResults *runMatUtilsExtractSubtrees(char *org, char *matUtilsPath, char *protobufPath, int subtreeSize, struct slName *sampleIds, - struct treeChoices *treeChoices, int *pStartTime); + struct treeChoices *treeChoices, char *anchorFile, + int *pStartTime); /* Open a pipe from Yatish Turakhia and Jakob McBroome's matUtils extract to extract subtrees * containing sampleIds, save resulting subtrees to trash files, return subtree results. * Caller must ensure that sampleIds are names of leaves in the protobuf tree. */ boolean serverIsConfigured(char *org); /* Return TRUE if all necessary configuration settings are in place to run usher-sampled-server. */ boolean serverIsRunning(char *org, FILE *errFile); /* Return TRUE if we can find a PID for server and that PID looks alive according to /proc. */ boolean startServer(char *org, struct treeChoices *treeChoices, FILE *errFile); /* Start up an usher-sampled-server process to run in the background. */ void serverReloadProtobufs(char *org, struct treeChoices *treeChoices); /* Send a reload command and list of protobufs for org to usher server. */