0f5e8ee9ef8d65cf207610e97d1ce7c368623df5 angie Wed Jan 27 15:13:20 2021 -0800 qBaseInsert and tBaseInsert may include sequence skipped on both sides, e.g. due to NNN's. Count actual inserted and deleted bases more carefully. refs #26868 diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c index 2a44636..6f64fe4 100644 --- src/hg/hgPhyloPlace/phyloPlace.c +++ src/hg/hgPhyloPlace/phyloPlace.c @@ -1108,34 +1108,34 @@ "beginning and/or end") "</th>"); else puts("<th>VCF Sample</th>\n" "<th>#Ns" TOOLTIP("Number of no-call variants for this sample in uploaded VCF, " "i.e. '.' used in genotype column") "</th>"); puts("<th>#Mixed" TOOLTIP("Number of IUPAC ambiguous bases, e.g. 'R' for 'A or G'") "</th>"); if (isFasta) puts("<th>Bases aligned" TOOLTIP("Number of bases aligned to reference NC_045512.2 Wuhan/Hu-1, including " "matches and mismatches") - "</th>\n<th>Insertions" + "</th>\n<th>Inserted bases" TOOLTIP("Number of bases in aligned portion of uploaded sequence that are not present in " "reference NC_045512.2 Wuhan/Hu-1") - "</th>\n<th>Deletions" + "</th>\n<th>Deleted bases" TOOLTIP("Number of bases in reference NC_045512.2 Wuhan/Hu-1 that are not " "present in aligned portion of uploaded sequence") "</th>"); puts("<th>#SNVs used for placement" TOOLTIP("Number of single-nucleotide variants in uploaded sample " "(does not include N's or mixed bases) used by UShER to place sample " "in phylogenetic tree") "</th>\n<th>#Masked SNVs" TOOLTIP("Number of single-nucleotide variants in uploaded sample that are masked " "(not used for placement) because they occur at known " "<a href='https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473/12' " "target=_blank>Problematic Sites</a>") "</th>\n<th>Neighboring sample in tree" TOOLTIP("A sample already in the tree that is a child of the node at which the uploaded " "sample was placed, to give an example of a closely related sample") @@ -1323,46 +1323,69 @@ si->ambigCount - alignedAmbigCount); printTooltip(dy->string); } printf("</td>"); if (isFasta) { struct psl *psl = si->psl; if (psl) { int aliCount = psl->match + psl->misMatch + psl->repMatch; printf("<td class='%s'>%d ", qcClassForLength(aliCount), aliCount); dyStringClear(dy); dyStringPrintf(dy, "bases %d - %d align to reference bases %d - %d", psl->qStart+1, psl->qEnd, psl->tStart+1, psl->tEnd); printTooltip(dy->string); + int insBases = 0, insCount = 0, delBases = 0, delCount = 0; + if (psl->qBaseInsert || psl->tBaseInsert) + { + // Tally up actual insertions and deletions; ignore skipped N bases. + int ix; + for (ix = 0; ix < psl->blockCount - 1; ix++) + { + int qGapStart = psl->qStarts[ix] + psl->blockSizes[ix]; + int qGapEnd = psl->qStarts[ix+1]; + int qGapLen = qGapEnd - qGapStart; + int tGapStart = psl->tStarts[ix] + psl->blockSizes[ix]; + int tGapEnd = psl->tStarts[ix+1]; + int tGapLen = tGapEnd - tGapStart; + if (qGapLen > tGapLen) + { + insCount++; + insBases += qGapLen - tGapLen; + } + else if (tGapLen > qGapLen) + { + delCount++; + delBases += tGapLen - qGapLen; + } + } + } printf("</td><td class='%s'>%d ", - qcClassForIndel(psl->qBaseInsert), psl->qBaseInsert); - if (psl->qBaseInsert) + qcClassForIndel(insBases), insBases); + if (insBases) { dyStringClear(dy); - dyStringPrintf(dy, "%d bases in %d locations", - psl->qBaseInsert, psl->qNumInsert); + dyStringPrintf(dy, "%d bases in %d locations", insBases, insCount); printTooltip(dy->string); } printf("</td><td class='%s'>%d ", - qcClassForIndel(psl->tBaseInsert), psl->tBaseInsert); - if (psl->tBaseInsert) + qcClassForIndel(delBases), delBases); + if (delBases) { dyStringClear(dy); - dyStringPrintf(dy, "%d bases in %d locations", - psl->tBaseInsert, psl->tNumInsert); + dyStringPrintf(dy, "%d bases in %d locations", delBases, delCount); printTooltip(dy->string); } printf("</td>"); } else printf("<td colspan=3 class='%s'> not alignable </td>", qcClassForLength(0)); } int snvCount = slCount(si->sncList) - alignedAmbigCount; printf("<td class='%s'>%d", qcClassForSNVs(snvCount), snvCount); if (snvCount > 0) { dyStringClear(dy); struct singleNucChange *snc; for (snc = si->sncList; snc != NULL; snc = snc->next)