3fd9e9e8d0a977ee255486e60c785aef5689cc39 angie Tue Jul 13 09:42:47 2021 -0700 Add summary table for results from pasted/uploaded IDs, to show lineages & subtree numbers, requested by Russ. diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c index 5ad13be..fb8b18e 100644 --- src/hg/hgPhyloPlace/phyloPlace.c +++ src/hg/hgPhyloPlace/phyloPlace.c @@ -1133,39 +1133,40 @@ int *retIx) /* Find the subtree that contains sample name and set *retIx to its index in the list. * If we can't find it, return NULL and set *retIx to -1. */ { struct subtreeInfo *ti; int ix; for (ti = subtreeInfoList, ix = 0; ti != NULL; ti = ti->next, ix++) if (slNameInList(ti->subtreeUserSampleIds, name)) break; if (ti == NULL) ix = -1; *retIx = ix; return ti; } -static void lookForCladesAndLineages(struct seqInfo *seqInfoList, struct hash *samplePlacements, +static void lookForCladesAndLineages(struct hash *samplePlacements, boolean *retGotClades, boolean *retGotLineages) /* See if UShER has annotated any clades and/or lineages for seqs. */ { boolean gotClades = FALSE, gotLineages = FALSE; -struct seqInfo *si; -for (si = seqInfoList; si != NULL; si = si->next) +struct hashEl *hel; +struct hashCookie cookie = hashFirst(samplePlacements); +while ((hel = hashNext(&cookie)) != NULL) { - struct placementInfo *pi = hashFindVal(samplePlacements, si->seq->name); + struct placementInfo *pi = hel->val; if (pi) { if (isNotEmpty(pi->nextClade)) gotClades = TRUE; if (isNotEmpty(pi->pangoLineage)) gotLineages = TRUE; if (gotClades && gotLineages) break; } } *retGotClades = gotClades; *retGotLineages = gotLineages; } static char *nextstrainHost() @@ -1496,40 +1497,61 @@ if (si->nCountEnd) dyStringPrintf(dy, "%d N bases at end", si->nCountEnd); } static void printLineageTd(char *lineage, char *alt) /* Print a table cell with lineage (& link to outbreak.info if not 'None') or alt if no lineage. */ { if (lineage && differentString(lineage, "None")) printf("<td><a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a></td>", lineage, lineage); else if (lineage) printf("<td>%s</td>", lineage); else printf("<td>%s</td>", alt); } +static void printSubtreeTd(struct subtreeInfo *subtreeInfoList, struct tempName *jsonTns[], + char *seqName) +/* Print a table cell with subtree (& link if possible) if found. */ +{ +int ix; +struct subtreeInfo *ti = subtreeInfoForSample(subtreeInfoList, seqName, &ix); +if (ix < 0) + //#*** Probably an error. + printf("<td>n/a</td>"); +else + { + printf("<td>%d", ix+1); + if (ti && nextstrainHost()) + { + char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]); + printf(" (<a href='%s' target=_blank>view in Nextstrain<a>)", nextstrainUrl); + } + printf("</td>"); + } +} + static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta, struct usherResults *ur, struct tempName *jsonTns[], struct hash *sampleMetadata, struct dnaSeq *refGenome) /* Show a table with composition & alignment stats for each sequence that passed basic QC. */ { if (seqInfoList) { puts("<table class='seqSummary'>"); boolean gotClades = FALSE, gotLineages = FALSE; - lookForCladesAndLineages(seqInfoList, ur->samplePlacements, &gotClades, &gotLineages); + lookForCladesAndLineages(ur->samplePlacements, &gotClades, &gotLineages); printSummaryHeader(isFasta, gotClades, gotLineages); puts("<tbody>"); struct dyString *dy = dyStringNew(0); struct seqInfo *si; for (si = seqInfoList; si != NULL; si = si->next) { puts("<tr>"); printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | ")); if (isFasta) { if (si->nCountStart || si->nCountEnd) { int effectiveLength = si->seq->size - (si->nCountStart + si->nCountEnd); dyStringClear(dy); dyStringPrintf(dy, "%d ", effectiveLength); @@ -1670,50 +1692,96 @@ } printf("</td><td class='%s'>%d", qcClassForPlacements(pi->bestNodeCount), pi->bestNodeCount); printf("</td><td class='%s'>%d", qcClassForPScore(pi->parsimonyScore), pi->parsimonyScore); printf("</td>"); } else { if (gotClades) printf("<td>n/a></td>"); if (gotLineages) printf("<td>n/a></td>"); printf("<td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td>"); } - int ix; - struct subtreeInfo *ti = subtreeInfoForSample(ur->subtreeInfoList, si->seq->name, &ix); - if (ix < 0) - //#*** Probably an error. - printf("<td>n/a</td>"); - else + printSubtreeTd(ur->subtreeInfoList, jsonTns, si->seq->name); + puts("</tr>"); + } + puts("</tbody></table><p></p>"); + } +} + +static void summarizeSubtrees(struct slName *sampleIds, struct usherResults *results, + struct hash *sampleMetadata, struct tempName *jsonTns[], + struct mutationAnnotatedTree *bigTree) +/* Print a summary table of pasted/uploaded identifiers and subtrees */ { - printf("<td>%d", ix+1); - if (ti && nextstrainHost()) +boolean gotClades = FALSE, gotLineages = FALSE; +lookForCladesAndLineages(results->samplePlacements, &gotClades, &gotLineages); +puts("<table class='seqSummary'><tbody>"); +puts("<tr><th>Sequence</th>"); +if (gotClades) + puts("<th>Nextstrain clade (UShER)" + TOOLTIP("The <a href='https://nextstrain.org/blog/2021-01-06-updated-SARS-CoV-2-clade-naming' " + "target=_blank>Nextstrain clade</a> " + "assigned to the sequence by UShER according to its place in the phylogenetic tree") + "</th>"); +if (gotLineages) + puts("<th>Pango lineage (UShER)" + TOOLTIP("The <a href='https://cov-lineages.org/' " + "target=_blank>Pango lineage</a> " + "assigned to the sequence by UShER according to its place in the phylogenetic tree") + "</th>"); +puts("<th>Pango lineage (pangolin)" + TOOLTIP("The <a href='https://cov-lineages.org/' target=_blank>" + "Pango lineage</a> assigned to the sequence by " + "<a href='https://github.com/cov-lineages/pangolin/' target=_blank>pangolin</a>") + "</th>" + "<th>subtree</th></tr>"); +struct slName *si; +for (si = sampleIds; si != NULL; si = si->next) { - char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]); - printf(" (<a href='%s' target=_blank>view in Nextstrain<a>)", nextstrainUrl); + puts("<tr>"); + printf("<th>%s</td>", replaceChars(si->name, "|", " | ")); + struct placementInfo *pi = hashFindVal(results->samplePlacements, si->name); + if (pi) + { + if (gotClades) + printf("<td>%s</td>", pi->nextClade ? pi->nextClade : "n/a"); + if (gotLineages) + printLineageTd(pi->pangoLineage, "n/a"); } - printf("</td>"); + else + { + if (gotClades) + printf("<td>n/a></td>"); + if (gotLineages) + printf("<td>n/a></td>"); } - puts("</tr>"); + // pangolin-assigned lineage + char *lineage = lineageForSample(sampleMetadata, si->name); + if (isNotEmpty(lineage)) + printf("<td><a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a></td>", + lineage, lineage); + else + printf("<td>n/a></td>"); + // Maybe also #mutations with mouseover to show mutation path? + printSubtreeTd(results->subtreeInfoList, jsonTns, si->name); } puts("</tbody></table><p></p>"); } -} static struct singleNucChange *sncListFromSampleMutsAndImputed(struct slName *sampleMuts, struct baseVal *imputedBases, struct seqWindow *gSeqWin) /* Convert a list of "<ref><pos><alt>" names to struct singleNucChange list. * However, if <alt> is ambiguous, skip it because variantProjector doesn't like it. * Add imputed base predictions. */ { struct singleNucChange *sncList = NULL; struct slName *mut; for (mut = sampleMuts; mut != NULL; mut = mut->next) { char ref = mut->name[0]; if (ref < 'A' || ref > 'Z') errAbort("sncListFromSampleMuts: expected ref base value, got '%c' in '%s'", @@ -2631,31 +2699,36 @@ sampleUrls, singleSubtreeJsonTn->forCgi, source); struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList; if (subtreeCount > MAX_SUBTREE_BUTTONS) subtreeInfoForButtons = NULL; makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta, !subtreesOnly); printf("<p>If you have metadata you wish to display, click a 'view subtree in " "Nextstrain' button, and then you can drag on a CSV file to " "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>." "</p>\n"); struct tempName *tsvTn = NULL, *sTsvTn = NULL; struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn, &startTime); struct tempName *ctTn = NULL; - if (! subtreesOnly) + if (subtreesOnly) + { + summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, bigTree); + reportTiming(&startTime, "describe subtrees"); + } + else { findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree); // Make custom tracks for uploaded samples and subtree(s). struct phyloTree *sampleTree = NULL; ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree, source, fontHeight, &sampleTree, &startTime); // Make a sample summary TSV file and accumulate S gene changes struct hash *spikeChanges = hashNew(0); tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList, geneInfoList, gSeqWin, spikeChanges, &startTime); sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds)); downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml, zipTn->forHtml);