f22f0b5f1d467e570dad2407813748889408d9bd
angie
Tue Sep 14 09:38:11 2021 -0700
Don't rely on outputs printed only when usher is compiled with -DDEBUG=1; those are going away in server mode. We already weren't showing the 'Best nodes' output, so just remove that code. We already have sample mutations in seqInfo->sncList, so just reformat that into placementInfo->sampleMuts.
diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index a00a4c0..d583c05 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -24,31 +24,30 @@
#include "psl.h"
#include "ra.h"
#include "regexHelper.h"
#include "trashDir.h"
#include "vcf.h"
// Globals:
static boolean measureTiming = FALSE;
// wuhCor1-specific:
char *chrom = "NC_045512v2";
int chromSize = 29903;
// Parameter constants:
int maxGenotypes = 1000; // Upper limit on number of samples user can upload at once.
-boolean showBestNodePaths = FALSE;
boolean showParsimonyScore = FALSE;
char *phyloPlaceDbSetting(char *db, char *settingName)
/* Return a setting from hgPhyloPlaceData/ Differences from the reference genome "
"("
"NC_045512.2): ");
if (info->sampleMuts == NULL)
printf("(None; identical to reference)");
else
{
struct slName *sln;
for (sln = info->sampleMuts; sln != NULL; sln = sln->next)
{
if (sln != info->sampleMuts)
printf(", ");
printf("%s", sln->name);
@@ -869,65 +897,30 @@
}
}
static void displayNearestNeighbors(struct placementInfo *info, char *source)
/* Use info->variantPaths to find sample's nearest neighbor(s) in tree. */
{
if (isNotEmpty(info->nearestNeighbor))
{
printf(" Nearest neighboring %s sequence already in phylogenetic tree: %s",
source, info->nearestNeighbor);
printLineageUrl(info->neighborLineage);
puts(" The placement in the tree is unambiguous; "
- "there are no other parsimony-optimal placements in the phylogenetic tree. This placement is not the only parsimony-optimal placement in the tree; "
- "%d other placements exist.
");
- puts("
Base values imputed by parsimony:\n
Parsimony score added by your sample: %d
\n", info->parsimonyScore); //#*** TODO: explain parsimony score } } struct phyloTree *phyloPruneToIds(struct phyloTree *node, struct slName *sampleIds) /* Prune all descendants of node that have no leaf descendants in sampleIds. */ { if (node->numEdges) { struct phyloTree *prunedKids = NULL; int i; for (i = 0; i < node->numEdges; i++) @@ -2180,60 +2171,58 @@ } } static struct hash *hashFromSeqInfoList(struct seqInfo *seqInfoList) /* Hash sequence name to seqInfo for quick lookup. */ { struct hash *hash = hashNew(0); struct seqInfo *si; for (si = seqInfoList; si != NULL; si = si->next) hashAdd(hash, si->seq->name, si); return hash; } static struct tempName *writeTsvSummary(struct usherResults *results, struct phyloTree *sampleTree, - struct slName *sampleIds, struct seqInfo *seqInfoList, + struct slName *sampleIds, struct hash *seqInfoHash, struct geneInfo *geneInfoList, struct seqWindow *gSeqWin, struct hash *spikeChanges, int *pStartTime) /* Write a tab-separated summary file for download. If the user uploaded enough samples to make * a tree, then write out samples in tree order; otherwise use sampleIds list. * Accumulate S gene changes. */ { struct tempName *tsvTn = NULL; AllocVar(tsvTn); trashDirFile(tsvTn, "ct", "usher_samples", ".tsv"); FILE *f = mustOpen(tsvTn->forCgi, "w"); fprintf(f, "name\tnuc_mutations\taa_mutations\timputed_bases\tmutation_path" "\tplacement_count\tparsimony_score_increase\tlength\taligned_bases" "\tins_bases\tins_ranges\tdel_bases\tdel_ranges\tmasked_mutations" "\tnearest_neighbor\tneighbor_lineage\tnextstrain_clade\tpango_lineage" "\n"); -struct hash *seqInfoHash = hashFromSeqInfoList(seqInfoList); if (sampleTree) { rWriteTsvSummaryTreeOrder(sampleTree, f, results, seqInfoHash, geneInfoList, gSeqWin, spikeChanges); } else { struct slName *sample; for (sample = sampleIds; sample != NULL; sample = sample->next) writeOneTsvRow(f, sample->name, results, seqInfoHash, geneInfoList, gSeqWin, spikeChanges); } carefulClose(&f); -hashFree(&seqInfoHash); reportTiming(pStartTime, "write tsv summary"); return tsvTn; } static struct tempName *writeSpikeChangeSummary(struct hash *spikeChanges, int totalSampleCount) /* Write a tab-separated summary of S (Spike) gene changes for download. */ { struct tempName *tsvTn = NULL; AllocVar(tsvTn); trashDirFile(tsvTn, "ct", "usher_S_muts", ".tsv"); FILE *f = mustOpen(tsvTn->forCgi, "w"); fprintf(f, "aa_mutation\tsample_count\tsample_frequency\tsample_ids\tcategory" "\n"); struct aaMutInfo *sChanges[spikeChanges->elCount]; struct hashCookie cookie = hashFirst(spikeChanges); @@ -2682,41 +2671,43 @@ printf("Sorry, could not align any sequences to reference well enough to place in " "the phylogenetic tree.
\n"); isFasta = TRUE; } else if (lfLooksLikeVcf(lf)) { vcfTn = checkAndSaveVcf(lf, refGenome, maskSites, &seqInfoList, &sampleIds); reportTiming(&startTime, "check uploaded VCF"); } else { subtreesOnly = TRUE; sampleIds = readSampleIds(lf, bigTree, aliasFile); } lineFileClose(&lf); +struct hash *seqInfoHash = hashFromSeqInfoList(seqInfoList); if (sampleIds == NULL) { return ctFile; } struct usherResults *results = NULL; if (vcfTn) { fflush(stdout); results = runUsher(usherPath, protobufPath, vcfTn->forCgi, subtreeSize, sampleIds, bigTree->condensedNodes, &startTime); + addSampleMutsFromSeqInfo(results->samplePlacements, seqInfoHash); } else if (subtreesOnly) { char *matUtilsPath = getMatUtilsPath(TRUE); results = runMatUtilsExtractSubtrees(matUtilsPath, protobufPath, subtreeSize, sampleIds, bigTree->condensedNodes, &startTime); } if (results && results->singleSubtreeInfo) { if (retSuccess) *retSuccess = TRUE; puts(""); readQcThresholds(db); int subtreeCount = slCount(results->subtreeInfoList); @@ -2769,31 +2760,31 @@ { summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, bigTree); reportTiming(&startTime, "describe subtrees"); } else { findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree); // Make custom tracks for uploaded samples and subtree(s). struct phyloTree *sampleTree = NULL; ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree, source, fontHeight, &sampleTree, &startTime); // Make a sample summary TSV file and accumulate S gene changes struct hash *spikeChanges = hashNew(0); - tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList, + tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoHash, geneInfoList, gSeqWin, spikeChanges, &startTime); sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds)); downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml, zipTn->forHtml); int seqCount = slCount(seqInfoList); if (seqCount <= MAX_SEQ_DETAILS) { summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, refGenome); reportTiming(&startTime, "write summary table (including reading in lineages)"); for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++) { int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds); printf("