0f5e8ee9ef8d65cf207610e97d1ce7c368623df5
angie
Wed Jan 27 15:13:20 2021 -0800
qBaseInsert and tBaseInsert may include sequence skipped on both sides, e.g. due to NNN's. Count actual inserted and deleted bases more carefully. refs #26868
diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 2a44636..6f64fe4 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1108,34 +1108,34 @@
"beginning and/or end")
"");
else
puts("
VCF Sample | \n"
"#Ns"
TOOLTIP("Number of no-call variants for this sample in uploaded VCF, "
"i.e. '.' used in genotype column")
" | ");
puts("#Mixed"
TOOLTIP("Number of IUPAC ambiguous bases, e.g. 'R' for 'A or G'")
" | ");
if (isFasta)
puts("Bases aligned"
TOOLTIP("Number of bases aligned to reference NC_045512.2 Wuhan/Hu-1, including "
"matches and mismatches")
- " | \nInsertions"
+ " | \nInserted bases"
TOOLTIP("Number of bases in aligned portion of uploaded sequence that are not present in "
"reference NC_045512.2 Wuhan/Hu-1")
- " | \nDeletions"
+ " | \nDeleted bases"
TOOLTIP("Number of bases in reference NC_045512.2 Wuhan/Hu-1 that are not "
"present in aligned portion of uploaded sequence")
" | ");
puts("#SNVs used for placement"
TOOLTIP("Number of single-nucleotide variants in uploaded sample "
"(does not include N's or mixed bases) used by UShER to place sample "
"in phylogenetic tree")
" | \n#Masked SNVs"
TOOLTIP("Number of single-nucleotide variants in uploaded sample that are masked "
"(not used for placement) because they occur at known "
"Problematic Sites")
" | \nNeighboring sample in tree"
TOOLTIP("A sample already in the tree that is a child of the node at which the uploaded "
"sample was placed, to give an example of a closely related sample")
@@ -1323,46 +1323,69 @@
si->ambigCount - alignedAmbigCount);
printTooltip(dy->string);
}
printf("");
if (isFasta)
{
struct psl *psl = si->psl;
if (psl)
{
int aliCount = psl->match + psl->misMatch + psl->repMatch;
printf(" | %d ", qcClassForLength(aliCount), aliCount);
dyStringClear(dy);
dyStringPrintf(dy, "bases %d - %d align to reference bases %d - %d",
psl->qStart+1, psl->qEnd, psl->tStart+1, psl->tEnd);
printTooltip(dy->string);
+ int insBases = 0, insCount = 0, delBases = 0, delCount = 0;
+ if (psl->qBaseInsert || psl->tBaseInsert)
+ {
+ // Tally up actual insertions and deletions; ignore skipped N bases.
+ int ix;
+ for (ix = 0; ix < psl->blockCount - 1; ix++)
+ {
+ int qGapStart = psl->qStarts[ix] + psl->blockSizes[ix];
+ int qGapEnd = psl->qStarts[ix+1];
+ int qGapLen = qGapEnd - qGapStart;
+ int tGapStart = psl->tStarts[ix] + psl->blockSizes[ix];
+ int tGapEnd = psl->tStarts[ix+1];
+ int tGapLen = tGapEnd - tGapStart;
+ if (qGapLen > tGapLen)
+ {
+ insCount++;
+ insBases += qGapLen - tGapLen;
+ }
+ else if (tGapLen > qGapLen)
+ {
+ delCount++;
+ delBases += tGapLen - qGapLen;
+ }
+ }
+ }
printf(" | %d ",
- qcClassForIndel(psl->qBaseInsert), psl->qBaseInsert);
- if (psl->qBaseInsert)
+ qcClassForIndel(insBases), insBases);
+ if (insBases)
{
dyStringClear(dy);
- dyStringPrintf(dy, "%d bases in %d locations",
- psl->qBaseInsert, psl->qNumInsert);
+ dyStringPrintf(dy, "%d bases in %d locations", insBases, insCount);
printTooltip(dy->string);
}
printf(" | %d ",
- qcClassForIndel(psl->tBaseInsert), psl->tBaseInsert);
- if (psl->tBaseInsert)
+ qcClassForIndel(delBases), delBases);
+ if (delBases)
{
dyStringClear(dy);
- dyStringPrintf(dy, "%d bases in %d locations",
- psl->tBaseInsert, psl->tNumInsert);
+ dyStringPrintf(dy, "%d bases in %d locations", delBases, delCount);
printTooltip(dy->string);
}
printf(" | ");
}
else
printf(" not alignable | ",
qcClassForLength(0));
}
int snvCount = slCount(si->sncList) - alignedAmbigCount;
printf("%d", qcClassForSNVs(snvCount), snvCount);
if (snvCount > 0)
{
dyStringClear(dy);
struct singleNucChange *snc;
for (snc = si->sncList; snc != NULL; snc = snc->next)
|