6c08bf0d066841b266c00d1da5e5be5652cde7c8 angie Thu Jan 28 14:49:34 2021 -0800 Now that the correct numbers of inserted and deleted bases are reported, make the tooltips more helpful by reporting the bases and locations. sorta refs #26868 diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c index 6f64fe4..db037f4 100644 --- src/hg/hgPhyloPlace/phyloPlace.c +++ src/hg/hgPhyloPlace/phyloPlace.c @@ -1250,39 +1250,41 @@ static void appendExcludingNs(struct dyString *dy, struct seqInfo *si) /* Append a note to dy about how many N bases and start and/or end are excluded from statistic. */ { dyStringAppend(dy, "excluding "); if (si->nCountStart) dyStringPrintf(dy, "%d N bases at start", si->nCountStart); if (si->nCountStart && si->nCountEnd) dyStringAppend(dy, " and "); if (si->nCountEnd) dyStringPrintf(dy, "%d N bases at end", si->nCountEnd); } static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta, struct usherResults *ur, struct tempName *jsonTns[], - struct hash *sampleMetadata, struct mutationAnnotatedTree *bigTree) + struct hash *sampleMetadata, struct mutationAnnotatedTree *bigTree, + struct dnaSeq *refGenome) /* Show a table with composition & alignment stats for each sequence that passed basic QC. */ { if (seqInfoList) { puts("<table class='seqSummary'>"); printSummaryHeader(isFasta); puts("<tbody>"); struct dyString *dy = dyStringNew(0); + struct dyString *dyExtra = dyStringNew(0); struct seqInfo *si; for (si = seqInfoList; si != NULL; si = si->next) { puts("<tr>"); printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | ")); if (isFasta) { if (si->nCountStart || si->nCountEnd) { int effectiveLength = si->seq->size - (si->nCountStart + si->nCountEnd); dyStringClear(dy); dyStringPrintf(dy, "%d ", effectiveLength); appendExcludingNs(dy, si); dyStringPrintf(dy, " (original size %d)", si->seq->size); printf("<td class='%s'>%d", qcClassForLength(effectiveLength), effectiveLength); @@ -1327,66 +1329,94 @@ if (isFasta) { struct psl *psl = si->psl; if (psl) { int aliCount = psl->match + psl->misMatch + psl->repMatch; printf("<td class='%s'>%d ", qcClassForLength(aliCount), aliCount); dyStringClear(dy); dyStringPrintf(dy, "bases %d - %d align to reference bases %d - %d", psl->qStart+1, psl->qEnd, psl->tStart+1, psl->tEnd); printTooltip(dy->string); int insBases = 0, insCount = 0, delBases = 0, delCount = 0; if (psl->qBaseInsert || psl->tBaseInsert) { // Tally up actual insertions and deletions; ignore skipped N bases. + dyStringClear(dy); + dyStringClear(dyExtra); int ix; for (ix = 0; ix < psl->blockCount - 1; ix++) { int qGapStart = psl->qStarts[ix] + psl->blockSizes[ix]; int qGapEnd = psl->qStarts[ix+1]; int qGapLen = qGapEnd - qGapStart; int tGapStart = psl->tStarts[ix] + psl->blockSizes[ix]; int tGapEnd = psl->tStarts[ix+1]; int tGapLen = tGapEnd - tGapStart; if (qGapLen > tGapLen) { insCount++; - insBases += qGapLen - tGapLen; + int insLen = qGapLen - tGapLen; + insBases += insLen; + if (isNotEmpty(dy->string)) + dyStringAppend(dy, ", "); + if (insLen <= 12) + { + char insSeq[insLen+1]; + safencpy(insSeq, sizeof insSeq, si->seq->dna + qGapEnd - insLen, + insLen); + touppers(insSeq); + dyStringPrintf(dy, "%d-%d:%s", + tGapEnd, tGapEnd+1, insSeq); + } + else + dyStringPrintf(dy, "%d-%d:%d bases", + tGapEnd, tGapEnd+1, insLen); } else if (tGapLen > qGapLen) { delCount++; - delBases += tGapLen - qGapLen; + int delLen = tGapLen - qGapLen;; + delBases += delLen; + if (isNotEmpty(dyExtra->string)) + dyStringAppend(dyExtra, ", "); + if (delLen <= 12) + { + char delSeq[delLen+1]; + safencpy(delSeq, sizeof delSeq, refGenome->dna + tGapEnd - delLen, + delLen); + touppers(delSeq); + dyStringPrintf(dyExtra, "%d-%d:%s", + tGapEnd - delLen + 1, tGapEnd, delSeq); + } + else + dyStringPrintf(dyExtra, "%d-%d:%d bases", + tGapEnd - delLen + 1, tGapEnd, delLen); } } } printf("</td><td class='%s'>%d ", qcClassForIndel(insBases), insBases); if (insBases) { - dyStringClear(dy); - dyStringPrintf(dy, "%d bases in %d locations", insBases, insCount); printTooltip(dy->string); } printf("</td><td class='%s'>%d ", qcClassForIndel(delBases), delBases); if (delBases) { - dyStringClear(dy); - dyStringPrintf(dy, "%d bases in %d locations", delBases, delCount); - printTooltip(dy->string); + printTooltip(dyExtra->string); } printf("</td>"); } else printf("<td colspan=3 class='%s'> not alignable </td>", qcClassForLength(0)); } int snvCount = slCount(si->sncList) - alignedAmbigCount; printf("<td class='%s'>%d", qcClassForSNVs(snvCount), snvCount); if (snvCount > 0) { dyStringClear(dy); struct singleNucChange *snc; for (snc = si->sncList; snc != NULL; snc = snc->next) { @@ -1619,31 +1649,32 @@ struct subtreeInfo *ti; int ix; for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++) { AllocVar(jsonTns[ix]); trashDirFile(jsonTns[ix], "ct", "subtreeAuspice", ".json"); treeToAuspiceJson(ti, db, refGenome, bigGenePredFile, sampleMetadata, jsonTns[ix]->forCgi, source); } puts("<p></p>"); makeButtonRow(jsonTns, subtreeCount, isFasta); printf("<p>If you have metadata you wish to display, click a 'view subtree in Nextstrain' " "button, and then you can drag on a CSV file to " "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>." "</p>\n"); - summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree); + summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree, + refGenome); reportTiming(&startTime, "write summary table (including reading in lineages)"); for (ix = 0, ti = results->subtreeInfoList; ti != NULL; ti = ti->next, ix++) { int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds); printf("<h3>Subtree %d: ", ix+1); if (subtreeUserSampleCount > 1) printf("%d related samples", subtreeUserSampleCount); else if (subtreeCount > 1) printf("Unrelated sample"); printf("</h3>\n"); makeNextstrainButton("viewNextstrainSub", ix, jsonTns); puts("<br>"); // Make a sub-subtree with only user samples for display: struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi); subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);