a2a68e8ad4ff5927b604b27898c183a519c9e5f4 angie Tue Feb 23 20:08:34 2021 -0800 Instead of replacing small subtrees (usher -k) with the new single subtree option (-K), make both (unless a large number of sequences are uploaded; then just -K). diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c index 00013b9..6f49fe1 100644 --- src/hg/hgPhyloPlace/phyloPlace.c +++ src/hg/hgPhyloPlace/phyloPlace.c @@ -1022,77 +1022,89 @@ int *retIx) /* Find the subtree that contains sample name and set *retIx to its index in the list. * If we can't find it, return NULL and set *retIx to -1. */ { struct subtreeInfo *ti; int ix; for (ti = subtreeInfoList, ix = 0; ti != NULL; ti = ti->next, ix++) if (slNameInList(ti->subtreeUserSampleIds, name)) break; if (ti == NULL) ix = -1; *retIx = ix; return ti; } -//#*** TODO: Replace temporary host with nextstrain.org when feature request is released -//#*** https://github.com/nextstrain/nextstrain.org/pull/216 static char *nextstrainHost() -/* Until the new /fetch/ function is live on nextstrain.org, get their temporary staging host - * from an hg.conf param, or NULL if missing. */ +/* Return the nextstrain hostname from an hg.conf param, or NULL if missing. */ { return cfgOption("nextstrainHost"); } static char *nextstrainUrlFromTn(struct tempName *jsonTn) /* Return a link to Nextstrain to view an annotated subtree. */ { char *jsonUrlForNextstrain = urlFromTn(jsonTn); char *protocol = strstr(jsonUrlForNextstrain, "://"); if (protocol) jsonUrlForNextstrain = protocol + strlen("://"); struct dyString *dy = dyStringCreate("%s/fetch/%s", nextstrainHost(), jsonUrlForNextstrain); return dyStringCannibalize(&dy); } -static void makeNextstrainButton(char *idBase, int ix, struct tempName *jsonTns[]) -/* Make a button to view results in Nextstrain. idBase is a short string and +static void makeNextstrainButton(char *id, struct tempName *tn, char *label) +/* Make a button to view an auspice JSON file in Nextstrain. */ +{ +char *nextstrainUrl = nextstrainUrlFromTn(tn); +struct dyString *js = dyStringCreate("window.open('%s');", nextstrainUrl); +cgiMakeOnClickButton(id, js->string, label); +dyStringFree(&js); +freeMem(nextstrainUrl); +} + +static void makeNextstrainButtonN(char *idBase, int ix, struct tempName *jsonTns[]) +/* Make a button to view one subtree in Nextstrain. idBase is a short string and * ix is 0-based subtree number. */ { char buttonId[256]; safef(buttonId, sizeof buttonId, "%s%d", idBase, ix+1); char buttonLabel[256]; safef(buttonLabel, sizeof buttonLabel, "view subtree %d in Nextstrain", ix+1); -char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]); -struct dyString *js = dyStringCreate("window.open('%s');", nextstrainUrl); -cgiMakeOnClickButton(buttonId, js->string, buttonLabel); -dyStringFree(&js); -freeMem(nextstrainUrl); +makeNextstrainButton(buttonId, jsonTns[ix], buttonLabel); +} + +static void makeNsSingleTreeButton(struct tempName *tn) +/* Make a button to view single subtree (with all uploaded samples) in Nextstrain. */ +{ +makeNextstrainButton("viewNextstrainSingleSubtree", tn, "view comprehensive subtree in Nextstrain"); } -static void makeButtonRow(struct tempName *jsonTns[], int subtreeCount, boolean isFasta) +static void makeButtonRow(struct tempName *singleSubtreeJsonTn, struct tempName *jsonTns[], + int subtreeCount, boolean isFasta) /* Russ's suggestion: row of buttons at the top to view results in GB, Nextstrain, Nextclade. */ { puts("

"); cgiMakeButton("submit", "view in Genome Browser"); if (nextstrainHost()) { + printf(" "); + makeNsSingleTreeButton(singleSubtreeJsonTn); int ix; for (ix = 0; ix < subtreeCount; ix++) { printf(" "); - makeNextstrainButton("viewNextstrainTopRow", ix, jsonTns); + makeNextstrainButtonN("viewNextstrainTopRow", ix, jsonTns); } } if (0 && isFasta) { printf(" "); struct dyString *js = dyStringCreate("window.open('https://master.clades.nextstrain.org/" "?input-fasta=%s');", "needATn"); //#*** TODO: save FASTA to file cgiMakeOnClickButton("viewNextclade", js->string, "view sequences in Nextclade"); } puts("

"); } #define TOOLTIP(text) "
(?)" text "
" @@ -1830,78 +1842,84 @@ vcfTn = checkAndSaveVcf(lf, refGenome, maskSites, &seqInfoList, &sampleIds); reportTiming(&startTime, "check uploaded VCF"); } else { if (isNotEmpty(lf->fileName)) warn("Sorry, can't recognize your file %s as FASTA or VCF.\n", lf->fileName); else warn("Sorry, can't recognize your uploaded data as FASTA or VCF.\n"); } lineFileClose(&lf); if (vcfTn) { fflush(stdout); int seqCount = slCount(seqInfoList); + // Don't make smaller subtrees when a large number of sequences are uploaded. + if (seqCount > MAX_SEQ_DETAILS) + subtreeSize = 0; struct usherResults *results = runUsher(usherPath, usherAssignmentsPath, vcfTn->forCgi, subtreeSize, sampleIds, bigTree->condensedNodes, &startTime); - if (results->subtreeInfoList || seqCount > MAX_SEQ_DETAILS) + if (results->singleSubtreeInfo) { readQcThresholds(db); int subtreeCount = slCount(results->subtreeInfoList); // Sort subtrees by number of user samples (largest first). slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp); // Make Nextstrain/auspice JSON file for each subtree. char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile"); struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome); struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize); struct hash *sampleMetadata = getSampleMetadata(metadataFile); + struct tempName *singleSubtreeJsonTn; + AllocVar(singleSubtreeJsonTn); + trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json"); + treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata, + singleSubtreeJsonTn->forCgi, source); struct tempName *jsonTns[subtreeCount]; struct subtreeInfo *ti; int ix; for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++) { AllocVar(jsonTns[ix]); trashDirFile(jsonTns[ix], "ct", "subtreeAuspice", ".json"); treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, jsonTns[ix]->forCgi, source); } puts("

"); - if (subtreeCount > 0 && subtreeCount <= MAX_SUBTREE_BUTTONS) - { - makeButtonRow(jsonTns, subtreeCount, isFasta); + int subtreeButtonCount = (seqCount <= MAX_SEQ_DETAILS) ? subtreeCount : 0; + makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeButtonCount, isFasta); printf("

If you have metadata you wish to display, click a 'view subtree in " "Nextstrain' button, and then you can drag on a CSV file to " "add it to the tree view." "

\n"); - } if (seqCount <= MAX_SEQ_DETAILS) { summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree, refGenome); reportTiming(&startTime, "write summary table (including reading in lineages)"); for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++) { int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds); printf("

Subtree %d: ", ix+1); if (subtreeUserSampleCount > 1) printf("%d related samples", subtreeUserSampleCount); else if (subtreeCount > 1) printf("Unrelated sample"); printf("

\n"); - makeNextstrainButton("viewNextstrainSub", ix, jsonTns); + makeNextstrainButtonN("viewNextstrainSub", ix, jsonTns); puts("
"); // Make a sub-subtree with only user samples for display: struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi); subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds); describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements, subtree, sampleMetadata, bigTree, source); } reportTiming(&startTime, "describe placements"); } else printf("

(Skipping details and subtrees; " "you uploaded %d sequences, and details/subtrees are shown only when " "you upload at most %d sequences.)

\n", seqCount, MAX_SEQ_DETAILS);