b7c2dcc865207dd23298429f93057c9068acef68
angie
Wed Dec 9 15:48:34 2020 -0800
hgPhyloPlace: Add config option treeChoices: tab-sep file of {protobuf, metadata, source, description} so we can offer the user a choice between GISAID and public sequence trees (possibly different releases for reproducibility).
diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h
index 9408a7f..531ae88 100644
--- src/hg/hgPhyloPlace/phyloPlace.h
+++ src/hg/hgPhyloPlace/phyloPlace.h
@@ -3,30 +3,40 @@
#ifndef _PHYLO_PLACE_H_
#define _PHYLO_PLACE_H_
#include "common.h"
#include "dnaseq.h"
#include "hash.h"
#include "linefile.h"
#include "parsimonyProto.h"
#include "phyloTree.h"
#include "trashDir.h"
#define PHYLOPLACE_DATA_DIR "hgPhyloPlaceData"
#define NEXTSTRAIN_DRAG_DROP_DOC "https://docs.nextstrain.org/projects/auspice/en/latest/advanced-functionality/drag-drop-csv-tsv.html"
+struct treeChoices
+/* Phylogenetic tree versions for the user to choose from. */
+{
+ char **protobufFiles; // Mutation annotated tree files in protobuf format for UShER
+ char **metadataFiles; // Sample metadata a la GISAID's nextmeta download option
+ char **sources; // GISAID or public
+ char **descriptions; // Menu labels to describe the options to the user
+ int count; // Number of choices (and size of each array)
+};
+
struct seqInfo
/* User sequences, alignments and statistics */
{
struct seqInfo *next;
struct dnaSeq *seq;
struct psl *psl;
struct singleNucChange *sncList;
struct singleNucChange *maskedSncList;
struct slRef *maskedReasonsList;
uint nCountStart;
uint nCountMiddle;
uint nCountEnd;
uint ambigCount;
};
@@ -111,51 +121,55 @@
struct slName **retSampleIds, struct seqInfo **retSeqInfo,
struct slPair **retFailedSeqs, struct slPair **retFailedPsls,
int *pStartTime);
/* Read in FASTA from lf and make sure each item has a reasonable size and not too high
* percentage of N's. Align to reference, extract SNVs from alignment, and save as VCF
* with sample genotype columns. */
struct usherResults *runUsher(char *usherPath, char *usherAssignmentsPath, char *vcfFile,
int subtreeSize, struct slName *userSampleIds,
struct hash *condensedNodes, int *pStartTime);
/* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and
* subtrees to trash files, return list of slRef to struct tempName for the trash files
* and parse other results out of stderr output. */
void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct dnaSeq *ref,
- char *bigGenePredFile, struct hash *sampleMetadata, char *jsonFile);
+ char *bigGenePredFile, struct hash *sampleMetadata, char *jsonFile,
+ char *source);
/* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format
* (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */
struct tempName *writeCustomTracks(struct tempName *vcfTn, struct usherResults *ur,
struct slName *sampleIds, struct phyloTree *bigTree,
int fontHeight, int *pStartTime);
/* Write one custom track per subtree, and one custom track with just the user's uploaded samples. */
struct sampleMetadata *metadataForSample(struct hash *sampleMetadata, char *sampleId);
/* Look up sampleId in sampleMetadata, by accession if sampleId seems to include an accession. */
struct phyloTree *phyloPruneToIds(struct phyloTree *node, struct slName *sampleIds);
/* Prune all descendants of node that have no leaf descendants in sampleIds. */
char *phyloPlaceDbSetting(char *db, char *settingName);
/* Return a setting from hgPhyloPlaceData/<db>/config.ra or NULL if not found. */
char *phyloPlaceDbSettingPath(char *db, char *settingName);
/* Return path to a file named by a setting from hgPhyloPlaceData/<db>/config.ra,
* or NULL if not found. (Append hgPhyloPlaceData/<db>/ to the beginning of relative path) */
+struct treeChoices *loadTreeChoices(char *db);
+/* If <db>/config.ra specifies a treeChoices file, load it up, else return NULL. */
+
void reportTiming(int *pStartTime, char *message);
/* Print out a report to stderr of how much time something took. */
boolean hgPhyloPlaceEnabled();
/* Return TRUE if hgPhyloPlace is enabled in hg.conf and db wuhCor1 exists. */
-char *phyloPlaceSamples(struct lineFile *lf, char *db, boolean doMeasureTiming, int subtreeSize,
- int fontHeight);
-/* Given a lineFile that contains either FASTA or VCF, prepare VCF for add_missing_samples;
- * if that goes well then run add_missing_samples, report results, make custom track files
+char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
+ boolean doMeasureTiming, int subtreeSize, int fontHeight);
+/* Given a lineFile that contains either FASTA or VCF, prepare VCF for usher;
+ * if that goes well then run usher, report results, make custom track files
* and return the top-level custom track file; otherwise return NULL. */
#endif //_PHYLO_PLACE_H_