08ec019f344fc52bd83ea3a98bedccd0048d732c
angie
Mon May 24 00:04:08 2021 -0700
Let the user upload a file containing names or IDs of sequences already in the selected tree; run matUtils extract to get subtrees that include those sequences. protobufs.tab gets a new optional column to specify a file that maps alias to tree name/ID (e.g. for mapping EPI_ISL to public names/IDs).
diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index e589069..17fd160 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -75,30 +75,41 @@
}
return fileName;
}
char *getUsherPath(boolean abortIfNotFound)
/* Return hgPhyloPlaceData/usher if it exists, else NULL. Do not free the returned value. */
{
char *usherPath = PHYLOPLACE_DATA_DIR "/usher";
if (fileExists(usherPath))
return usherPath;
else if (abortIfNotFound)
errAbort("Missing required file %s", usherPath);
return NULL;
}
+char *getMatUtilsPath(boolean abortIfNotFound)
+/* Return hgPhyloPlaceData/matUtils if it exists, else NULL. Do not free the returned value. */
+{
+char *matUtilsPath = PHYLOPLACE_DATA_DIR "/matUtils";
+if (fileExists(matUtilsPath))
+ return matUtilsPath;
+else if (abortIfNotFound)
+ errAbort("Missing required file %s", matUtilsPath);
+return NULL;
+}
+
char *getUsherAssignmentsPath(char *db, boolean abortIfNotFound)
/* If /config.ra specifies the file for use by usher --load-assignments and the file exists,
* return the path, else NULL. Do not free the returned value. */
{
char *usherAssignmentsPath = phyloPlaceDbSettingPath(db, "usherAssignmentsFile");
if (isNotEmpty(usherAssignmentsPath) && fileExists(usherAssignmentsPath))
return usherAssignmentsPath;
else if (abortIfNotFound)
errAbort("Missing required file %s", usherAssignmentsPath);
return NULL;
}
//#*** This needs to go in a lib so CGIs know whether to include it in the menu. needs better name.
boolean hgPhyloPlaceEnabled()
/* Return TRUE if hgPhyloPlace is enabled in hg.conf and db wuhCor1 exists. */
@@ -120,59 +131,65 @@
}
struct treeChoices *loadTreeChoices(char *db)
/* If /config.ra specifies a treeChoices file, load it up, else return NULL. */
{
struct treeChoices *treeChoices = NULL;
char *filename = phyloPlaceDbSettingPath(db, "treeChoices");
if (isNotEmpty(filename) && fileExists(filename))
{
AllocVar(treeChoices);
int maxChoices = 128;
AllocArray(treeChoices->protobufFiles, maxChoices);
AllocArray(treeChoices->metadataFiles, maxChoices);
AllocArray(treeChoices->sources, maxChoices);
AllocArray(treeChoices->descriptions, maxChoices);
+ AllocArray(treeChoices->aliasFiles, maxChoices);
struct lineFile *lf = lineFileOpen(filename, TRUE);
char *line;
while (lineFileNextReal(lf, &line))
{
- char *words[5];
+ char *words[6];
int wordCount = chopTabs(line, words);
- lineFileExpectWords(lf, 4, wordCount);
+ lineFileExpectAtLeast(lf, 4, wordCount);
if (treeChoices->count >= maxChoices)
{
warn("File %s has too many lines, only showing first %d phylogenetic tree choices",
filename, maxChoices);
break;
}
struct dyString *dy = dyStringNew(0);
addPathIfNecessary(dy, db, words[0]);
treeChoices->protobufFiles[treeChoices->count] = cloneString(dy->string);
addPathIfNecessary(dy, db, words[1]);
treeChoices->metadataFiles[treeChoices->count] = cloneString(dy->string);
treeChoices->sources[treeChoices->count] = cloneString(words[2]);
// Description can be either a file or just some text.
addPathIfNecessary(dy, db, words[3]);
if (fileExists(dy->string))
{
char *desc = NULL;
readInGulp(dy->string, &desc, NULL);
treeChoices->descriptions[treeChoices->count] = desc;
}
else
treeChoices->descriptions[treeChoices->count] = cloneString(words[3]);
+ if (wordCount > 4)
+ {
+ addPathIfNecessary(dy, db, words[4]);
+ treeChoices->aliasFiles[treeChoices->count] = cloneString(dy->string);
+ }
treeChoices->count++;
dyStringFree(&dy);
}
lineFileClose(&lf);
}
return treeChoices;
}
static char *urlFromTn(struct tempName *tn)
/* Make a full URL to a trash file that our net.c code will be able to follow, for when we can't
* just leave it up to the user's web browser to do the right thing with "../". */
{
struct dyString *dy = dyStringCreate("%s%s", hLocalHostCgiBinUrl(), tn->forHtml);
return dyStringCannibalize(&dy);
}
@@ -1180,34 +1197,36 @@
ix+1, userSampleCount, subtreeSize - userSampleCount);
makeNextstrainButton(buttonId, jsonTns[ix], buttonLabel, dyMo->string);
dyStringFree(&dyMo);
}
static void makeNsSingleTreeButton(struct tempName *tn)
/* Make a button to view single subtree (with all uploaded samples) in Nextstrain. */
{
makeNextstrainButton("viewNextstrainSingleSubtree", tn, "view single subtree in Nextstrain",
"view one subtree that includes all of your uploaded sequences plus "
SINGLE_SUBTREE_SIZE" randomly selected sequences from the phylogenetic "
"tree for context");
}
static void makeButtonRow(struct tempName *singleSubtreeJsonTn, struct tempName *jsonTns[],
- struct subtreeInfo *subtreeInfoList, int subtreeSize, boolean isFasta)
+ struct subtreeInfo *subtreeInfoList, int subtreeSize, boolean isFasta,
+ boolean offerCustomTrack)
/* Russ's suggestion: row of buttons at the top to view results in GB, Nextstrain, Nextclade. */
{
puts("");
+if (offerCustomTrack)
cgiMakeButtonWithMsg("submit", "view in Genome Browser",
"view your uploaded sequences, their phylogenetic relationship and their "
"mutations along with many other datasets available in the Genome Browser");
if (nextstrainHost())
{
printf(" ");
makeNsSingleTreeButton(singleSubtreeJsonTn);
struct subtreeInfo *ti;
int ix;
for (ix = 0, ti = subtreeInfoList; ti != NULL; ti = ti->next, ix++)
{
int userSampleCount = slCount(ti->subtreeUserSampleIds);
printf(" ");
makeNextstrainButtonN("viewNextstrainTopRow", ix, userSampleCount, subtreeSize, jsonTns);
}
@@ -2206,83 +2225,301 @@
printf("Global phylogenetic tree with your sequences | ", treeFile);
printf("TSV summary of sequences and placements | ", sampleSummaryFile);
printf("TSV summary of Spike mutations | ", spikeSummaryFile);
printf("ZIP file of subtree JSON and Newick files | ", subtreeZipFile);
puts("
");
}
static int subTreeInfoUserSampleCmp(const void *pa, const void *pb)
/* Compare subtreeInfo by number of user sample IDs (highest number first). */
{
struct subtreeInfo *tiA = *(struct subtreeInfo **)pa;
struct subtreeInfo *tiB = *(struct subtreeInfo **)pb;
return slCount(tiB->subtreeUserSampleIds) - slCount(tiA->subtreeUserSampleIds);
}
-char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
- boolean doMeasureTiming, int subtreeSize, int fontHeight)
-/* Given a lineFile that contains either FASTA or VCF, prepare VCF for usher;
- * if that goes well then run usher, report results, make custom track files
- * and return the top-level custom track file; otherwise return NULL. */
+static void getProtobufMetadataSource(char *db, char *protobufFile, char **retProtobufPath,
+ char **retMetadataFile, char **retSource, char **retAliasFile)
+/* If the config file specifies a list of tree choices, and protobufFile is a valid choice, then
+ * set ret* to the files associated with that choice. Otherwise fall back on older conf settings.
+ * Return the selected treeChoice if there is one. */
{
-char *ctFile = NULL;
-measureTiming = doMeasureTiming;
-int startTime = clock1000();
-struct tempName *vcfTn = NULL;
-struct slName *sampleIds = NULL;
-char *usherPath = getUsherPath(TRUE);
-char *usherAssignmentsPath = NULL;
-char *source = NULL;
-char *metadataFile = NULL;
struct treeChoices *treeChoices = loadTreeChoices(db);
if (treeChoices)
{
- usherAssignmentsPath = defaultProtobuf;
- if (isEmpty(usherAssignmentsPath))
- usherAssignmentsPath = treeChoices->protobufFiles[0];
+ *retProtobufPath = protobufFile;
+ if (isEmpty(*retProtobufPath))
+ *retProtobufPath = treeChoices->protobufFiles[0];
int i;
for (i = 0; i < treeChoices->count; i++)
- if (sameString(treeChoices->protobufFiles[i], usherAssignmentsPath))
+ if (sameString(treeChoices->protobufFiles[i], *retProtobufPath))
{
- metadataFile = treeChoices->metadataFiles[i];
- source = treeChoices->sources[i];
+ *retMetadataFile = treeChoices->metadataFiles[i];
+ *retSource = treeChoices->sources[i];
+ *retAliasFile = treeChoices->aliasFiles[i];
break;
}
if (i == treeChoices->count)
{
- usherAssignmentsPath = treeChoices->protobufFiles[0];
- metadataFile = treeChoices->metadataFiles[0];
- source = treeChoices->sources[0];
+ *retProtobufPath = treeChoices->protobufFiles[0];
+ *retMetadataFile = treeChoices->metadataFiles[0];
+ *retSource = treeChoices->sources[0];
+ *retAliasFile = treeChoices->aliasFiles[0];
}
}
else
{
// Fall back on old settings
- usherAssignmentsPath = getUsherAssignmentsPath(db, TRUE);
- metadataFile = phyloPlaceDbSettingPath(db, "metadataFile");
- source = "GISAID";
+ *retProtobufPath = getUsherAssignmentsPath(db, TRUE);
+ *retMetadataFile = phyloPlaceDbSettingPath(db, "metadataFile");
+ *retSource = "GISAID";
+ *retAliasFile = NULL;
+ }
+}
+
+static void addNameAndComponents(struct hash *nameHash, char *fullName)
+/* Add entries to nameHash mapping fullName to itself, and components of fullName to fullName. */
+{
+char *fullNameHashStored = hashStoreName(nameHash, fullName);
+// Now that we have hash storage for fullName, make it point to itself.
+struct hashEl *hel = hashLookup(nameHash, fullName);
+if (hel == NULL)
+ errAbort("Can't look up '%s' right after adding it", fullName);
+hel->val = fullNameHashStored;
+char *words[4];
+char copy[strlen(fullName)+1];
+safecpy(copy, sizeof copy, fullName);
+int wordCount = chopString(copy, "|", words, ArraySize(words));
+if (wordCount == 3)
+ {
+ // name|ID|date
+ hashAdd(nameHash, words[0], fullNameHashStored);
+ hashAdd(nameHash, words[1], fullNameHashStored);
+ }
+else if (wordCount == 2)
+ {
+ // ID|date
+ hashAdd(nameHash, words[0], fullNameHashStored);
+ }
+}
+
+static void rAddLeafNames(struct phyloTree *node, struct hash *condensedNodes, struct hash *nameHash)
+/* Recursively descend tree, adding leaf node names to nameHash (including all names of condensed
+ * leaf nodes). Also map components of full names (country/isolate/year|ID|date) to full names. */
+{
+if (node->numEdges == 0)
+ {
+ char *leafName = node->ident->name;
+ struct slName *nodeList = hashFindVal(condensedNodes, leafName);
+ if (nodeList)
+ {
+ struct slName *sample;
+ for (sample = nodeList; sample != NULL; sample = sample->next)
+ addNameAndComponents(nameHash, sample->name);
+ }
+ else
+ addNameAndComponents(nameHash, leafName);
+ }
+else
+ {
+ int i;
+ for (i = 0; i < node->numEdges; i++)
+ rAddLeafNames(node->edges[i], condensedNodes, nameHash);
+ }
+}
+
+static void addAliases(struct hash *nameHash, char *aliasFile)
+/* If there is an aliasFile, then add its mappings of ID/alias to full tree name to nameHash. */
+{
+if (isNotEmpty(aliasFile) && fileExists(aliasFile))
+ {
+ struct lineFile *lf = lineFileOpen(aliasFile, TRUE);
+ int missCount = 0;
+ char *missExample = NULL;
+ char *line;
+ while (lineFileNextReal(lf, &line))
+ {
+ char *words[3];
+ int wordCount = chopTabs(line, words);
+ lineFileExpectWords(lf, 2, wordCount);
+ char *fullName = hashFindVal(nameHash, words[1]);
+ if (fullName)
+ hashAdd(nameHash, words[0], fullName);
+ else
+ {
+ missCount++;
+ if (missExample == NULL)
+ missExample = cloneString(words[1]);
+ }
+ }
+ lineFileClose(&lf);
+ if (missCount > 0)
+ fprintf(stderr, "aliasFile %s: %d values in second column were not found in tree, "
+ "e.g. '%s'", aliasFile, missCount, missExample);
+ }
+}
+
+static struct hash *getTreeNames(struct mutationAnnotatedTree *bigTree, char *aliasFile)
+/* Make a hash of full names of leaves of bigTree; also map components of those names to the
+ * full names in case the user gives us partial names. */
+{
+int nodeCount = bigTree->nodeHash->elCount;
+struct hash *nameHash = hashNew(digitsBaseTwo(nodeCount) + 3);
+rAddLeafNames(bigTree->tree, bigTree->condensedNodes, nameHash);
+addAliases(nameHash, aliasFile);
+return nameHash;
+}
+
+static char *matchName(struct hash *nameHash, char *name)
+/* Look for a possibly partial name or ID provided by the user in nameHash. Return the result,
+ * possibly NULL. If the full name doesn't match, try components of the name. */
+{
+name = trimSpaces(name);
+// GISAID fasta headers all have hCoV-19/country/isolate/year|EPI_ISL_#|date; strip the hCoV-19
+// because Nextstrain strips it in nextmeta/nextfasta download files, and so do I when building
+// UCSC's tree.
+if (startsWithNoCase("hCoV-19/", name))
+ name += strlen("hCoV-19/");
+char *match = hashFindVal(nameHash, name);
+int minWordSize=5;
+if (match == NULL && strchr(name, '|'))
+ {
+ // GISAID fasta headers have name|ID|date, and so do our tree IDs; try ID and name separately.
+ char *words[4];
+ char copy[strlen(name)+1];
+ safecpy(copy, sizeof copy, name);
+ int wordCount = chopString(copy, "|", words, ArraySize(words));
+ if (wordCount == 3)
+ {
+ // name|ID|date; try ID first.
+ if (strlen(words[1]) > minWordSize)
+ match = hashFindVal(nameHash, words[1]);
+ if (match == NULL && strlen(words[0]) > minWordSize)
+ {
+ match = hashFindVal(nameHash, words[0]);
+ // Sometimes country/isolate names have spaces... strip out if present.
+ if (match == NULL && strchr(words[0], ' '))
+ {
+ stripChar(words[0], ' ');
+ match = hashFindVal(nameHash, words[0]);
+ }
+ }
+ }
+ else if (wordCount == 2)
+ {
+ // ID|date
+ if (strlen(words[0]) > minWordSize)
+ match = hashFindVal(nameHash, words[0]);
+ }
+ }
+else if (match == NULL && strchr(name, ' '))
+ {
+ // GISAID sequence names may include spaces, in both country names ("South Korea") and
+ // isolate names. That messes up FASTA headers, so Nextstrain strips out spaces when
+ // making the nextmeta and nextfasta download files for GISAID. Try stripping out spaces:
+ char copy[strlen(name)+1];
+ safecpy(copy, sizeof copy, name);
+ stripChar(copy, ' ');
+ match = hashFindVal(nameHash, copy);
+ }
+return match;
+}
+
+static struct slName *readSampleIds(struct lineFile *lf, struct mutationAnnotatedTree *bigTree,
+ char *aliasFile)
+/* Read a file of sample names/IDs from the user; typically these will not be exactly the same
+ * as the protobuf's (UCSC protobuf names are typically country/isolate/year|ID|date), so attempt
+ * to find component matches if an exact match isn't found. */
+{
+struct slName *sampleIds = NULL;
+struct slName *unmatched = NULL;
+struct hash *nameHash = getTreeNames(bigTree, aliasFile);
+char *line;
+while (lineFileNext(lf, &line, NULL))
+ {
+ // If tab-sep or comma-sep, just try first word in line
+ char *tab = strchr(line, '\t');
+ if (tab)
+ *tab = '\0';
+ else
+ {
+ char *comma = strchr(line, ',');
+ if (comma)
+ *comma = '\0';
+ }
+ char *match = matchName(nameHash, line);
+ if (match)
+ slNameAddHead(&sampleIds, match);
+ else
+ slNameAddHead(&unmatched, line);
}
-struct mutationAnnotatedTree *bigTree = parseParsimonyProtobuf(usherAssignmentsPath);
+if (unmatched)
+ {
+ struct dyString *firstFew = dyStringNew(0);
+ int maxExamples = 5;
+ struct slName *example;
+ int i;
+ for (i = 0, example = unmatched; example != NULL && i < maxExamples;
+ i++, example = example->next)
+ {
+ dyStringAppendSep(firstFew, ", ");
+ dyStringPrintf(firstFew, "'%s'", example->name);
+ }
+ warn("Unable to find %d of your sequences in the tree, e.g. %s",
+ slCount(unmatched), firstFew->string);
+ dyStringFree(&firstFew);
+ }
+else if (sampleIds == NULL)
+ warn("Could not find any names in input; empty file uploaded?");
+slNameFreeList(&unmatched);
+return sampleIds;
+}
+
+char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
+ boolean doMeasureTiming, int subtreeSize, int fontHeight,
+ boolean *retSuccess)
+/* Given a lineFile that contains either FASTA, VCF, or a list of sequence names/ids:
+ * If FASTA/VCF, then prepare VCF for usher; if that goes well then run usher, report results,
+ * make custom track files and return the top-level custom track file.
+ * If list of seq names/ids, then attempt to find their full names in the protobuf, run matUtils
+ * to make subtrees, show subtree results, and return NULL. Set retSuccess to TRUE if we were
+ * able to get at least some results for the user's input. */
+{
+char *ctFile = NULL;
+if (retSuccess)
+ *retSuccess = FALSE;
+measureTiming = doMeasureTiming;
+int startTime = clock1000();
+struct tempName *vcfTn = NULL;
+struct slName *sampleIds = NULL;
+char *usherPath = getUsherPath(TRUE);
+char *protobufPath = NULL;
+char *source = NULL;
+char *metadataFile = NULL;
+char *aliasFile = NULL;
+getProtobufMetadataSource(db, defaultProtobuf, &protobufPath, &metadataFile, &source, &aliasFile);
+struct mutationAnnotatedTree *bigTree = parseParsimonyProtobuf(protobufPath);
reportTiming(&startTime, "parse protobuf file");
if (! bigTree)
{
- warn("Problem parsing %s; can't make subtree subtracks.", usherAssignmentsPath);
+ warn("Problem parsing %s; can't make subtree subtracks.", protobufPath);
}
lineFileCarefulNewlines(lf);
struct slName **maskSites = getProblematicSites(db);
struct dnaSeq *refGenome = hChromSeq(db, chrom, 0, chromSize);
boolean isFasta = FALSE;
+boolean subtreesOnly = FALSE;
struct seqInfo *seqInfoList = NULL;
if (lfLooksLikeFasta(lf))
{
boolean *informativeBases = informativeBasesFromTree(bigTree->tree, maskSites);
struct slPair *failedSeqs;
struct slPair *failedPsls;
vcfTn = vcfFromFasta(lf, db, refGenome, informativeBases, maskSites,
&sampleIds, &seqInfoList, &failedSeqs, &failedPsls, &startTime);
if (failedSeqs)
{
puts("");
struct slPair *fail;
for (fail = failedSeqs; fail != NULL; fail = fail->next)
printf("%s
\n", fail->name);
puts("
");
@@ -2295,45 +2532,57 @@
printf("%s
\n", fail->name);
puts("
");
}
if (seqInfoList == NULL)
printf("Sorry, could not align any sequences to reference well enough to place in "
"the phylogenetic tree.
\n");
isFasta = TRUE;
}
else if (lfLooksLikeVcf(lf))
{
vcfTn = checkAndSaveVcf(lf, refGenome, maskSites, &seqInfoList, &sampleIds);
reportTiming(&startTime, "check uploaded VCF");
}
else
{
- if (isNotEmpty(lf->fileName))
- warn("Sorry, can't recognize your file %s as FASTA or VCF.\n", lf->fileName);
- else
- warn("Sorry, can't recognize your uploaded data as FASTA or VCF.\n");
+ subtreesOnly = TRUE;
+ sampleIds = readSampleIds(lf, bigTree, aliasFile);
}
lineFileClose(&lf);
+if (sampleIds == NULL)
+ {
+ return ctFile;
+ }
+struct usherResults *results = NULL;
if (vcfTn)
{
fflush(stdout);
- int seqCount = slCount(seqInfoList);
- struct usherResults *results = runUsher(usherPath, usherAssignmentsPath, vcfTn->forCgi,
+ results = runUsher(usherPath, protobufPath, vcfTn->forCgi,
subtreeSize, sampleIds, bigTree->condensedNodes,
&startTime);
- if (results->singleSubtreeInfo)
+ }
+else if (subtreesOnly)
+ {
+ char *matUtilsPath = getMatUtilsPath(TRUE);
+ results = runMatUtilsExtractSubtrees(matUtilsPath, protobufPath, subtreeSize,
+ sampleIds, bigTree->condensedNodes,
+ &startTime);
+ }
+if (results && results->singleSubtreeInfo)
{
+ if (retSuccess)
+ *retSuccess = TRUE;
puts("");
readQcThresholds(db);
int subtreeCount = slCount(results->subtreeInfoList);
// Sort subtrees by number of user samples (largest first).
slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
// Make Nextstrain/auspice JSON file for each subtree.
char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
struct hash *sampleMetadata = getSampleMetadata(metadataFile);
struct hash *sampleUrls = hashNew(0);
struct tempName *jsonTns[subtreeCount];
struct subtreeInfo *ti;
int ix;
for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++)
@@ -2347,92 +2596,102 @@
// Add a link for every sample to this subtree, so the single-subtree JSON can
// link to subtree JSONs
char *subtreeUrl = nextstrainUrlFromTn(jsonTns[ix]);
struct slName *sample;
for (sample = ti->subtreeUserSampleIds; sample != NULL; sample = sample->next)
hashAdd(sampleUrls, sample->name, subtreeUrl);
}
struct tempName *singleSubtreeJsonTn;
AllocVar(singleSubtreeJsonTn);
trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
sampleUrls, singleSubtreeJsonTn->forCgi, source);
struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
if (subtreeCount > MAX_SUBTREE_BUTTONS)
subtreeInfoForButtons = NULL;
- makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta);
+ makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta,
+ !subtreesOnly);
printf("If you have metadata you wish to display, click a 'view subtree in "
"Nextstrain' button, and then you can drag on a CSV file to "
"add it to the tree view."
"
\n");
+ struct tempName *tsvTn = NULL, *sTsvTn = NULL;
+ struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
+ &startTime);
+ struct tempName *ctTn = NULL;
+ if (! subtreesOnly)
+ {
findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree);
// Make custom tracks for uploaded samples and subtree(s).
struct phyloTree *sampleTree = NULL;
- struct tempName *ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
+ ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
source, fontHeight, &sampleTree, &startTime);
// Make a sample summary TSV file and accumulate S gene changes
struct hash *spikeChanges = hashNew(0);
- struct tempName *tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
+ tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
geneInfoList, gSeqWin, spikeChanges, &startTime);
- struct tempName *sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
- struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
- &startTime);
+ sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
zipTn->forHtml);
+ int seqCount = slCount(seqInfoList);
if (seqCount <= MAX_SEQ_DETAILS)
{
summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, refGenome);
reportTiming(&startTime, "write summary table (including reading in lineages)");
for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++)
{
int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
printf("Subtree %d: ", ix+1);
if (subtreeUserSampleCount > 1)
printf("%d related samples", subtreeUserSampleCount);
else if (subtreeCount > 1)
printf("Unrelated sample");
printf("
\n");
makeNextstrainButtonN("viewNextstrainSub", ix, subtreeUserSampleCount, subtreeSize,
jsonTns);
puts("
");
// Make a sub-subtree with only user samples for display:
struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);
describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements,
subtree, sampleMetadata, source);
}
reportTiming(&startTime, "describe placements");
}
else
printf("(Skipping details; "
"you uploaded %d sequences, and details are shown only when "
"you upload at most %d sequences.)
\n",
seqCount, MAX_SEQ_DETAILS);
+ }
- // Offer big tree w/new samples for download
puts("Downloads
");
+ if (! subtreesOnly)
+ {
puts("");
+ // Offer big tree w/new samples for download
printf("- SARS-CoV-2 phylogenetic tree "
"with your samples (Newick file)\n", results->bigTreePlusTn->forHtml);
printf("
- TSV summary of sequences and placements\n",
tsvTn->forHtml);
printf("
- TSV summary of S (Spike) gene changes\n",
sTsvTn->forHtml);
+ }
printf("
- ZIP archive of subtree Newick and JSON files\n",
zipTn->forHtml);
// For now, leave in the individual links so I don't break anybody's pipeline that's
// scraping this page...
for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++)
{
int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
printf("
- Subtree with %s", ti->subtreeTn->forHtml,
ti->subtreeUserSampleIds->name);
if (subtreeUserSampleCount > 10)
printf(" and %d other samples", subtreeUserSampleCount - 1);
else
{
struct slName *sln;
for (sln = ti->subtreeUserSampleIds->next; sln != NULL; sln = sln->next)
@@ -2441,30 +2700,32 @@
puts(" (Newick file)");
printf("
- Auspice JSON for subtree with %s",
jsonTns[ix]->forHtml, ti->subtreeUserSampleIds->name);
if (subtreeUserSampleCount > 10)
printf(" and %d other samples", subtreeUserSampleCount - 1);
else
{
struct slName *sln;
for (sln = ti->subtreeUserSampleIds->next; sln != NULL; sln = sln->next)
printf(", %s", sln->name);
}
puts(" (JSON file)");
}
puts("
");
+ if (!subtreesOnly)
+ {
// Notify in opposite order of custom track creation.
puts("Custom tracks for viewing in the Genome Browser
");
printf("Added custom track of uploaded samples.
\n");
if (subtreeCount > 0 && subtreeCount <= MAX_SUBTREE_CTS)
printf("Added %d subtree custom track%s.
\n",
subtreeCount, (subtreeCount > 1 ? "s" : ""));
ctFile = urlFromTn(ctTn);
}
+ }
else
{
warn("No subtree output from usher.\n");
}
- }
return ctFile;
}