0f5e8ee9ef8d65cf207610e97d1ce7c368623df5
angie
  Wed Jan 27 15:13:20 2021 -0800
qBaseInsert and tBaseInsert may include sequence skipped on both sides, e.g. due to NNN's.  Count actual inserted and deleted bases more carefully.  refs #26868

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 2a44636..6f64fe4 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1108,34 +1108,34 @@
                  "beginning and/or end")
          "</th>");
 else
     puts("<th>VCF Sample</th>\n"
          "<th>#Ns"
          TOOLTIP("Number of no-call variants for this sample in uploaded VCF, "
                  "i.e. '.' used in genotype column")
          "</th>");
 puts("<th>#Mixed"
      TOOLTIP("Number of IUPAC ambiguous bases, e.g. 'R' for 'A or G'")
      "</th>");
 if (isFasta)
     puts("<th>Bases aligned"
          TOOLTIP("Number of bases aligned to reference NC_045512.2 Wuhan/Hu-1, including "
                  "matches and mismatches")
-         "</th>\n<th>Insertions"
+         "</th>\n<th>Inserted bases"
          TOOLTIP("Number of bases in aligned portion of uploaded sequence that are not present in "
                  "reference NC_045512.2 Wuhan/Hu-1")
-         "</th>\n<th>Deletions"
+         "</th>\n<th>Deleted bases"
          TOOLTIP("Number of bases in reference NC_045512.2 Wuhan/Hu-1 that are not "
                  "present in aligned portion of uploaded sequence")
          "</th>");
 puts("<th>#SNVs used for placement"
      TOOLTIP("Number of single-nucleotide variants in uploaded sample "
              "(does not include N's or mixed bases) used by UShER to place sample "
              "in phylogenetic tree")
      "</th>\n<th>#Masked SNVs"
      TOOLTIP("Number of single-nucleotide variants in uploaded sample that are masked "
              "(not used for placement) because they occur at known "
              "<a href='https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473/12' "
              "target=_blank>Problematic Sites</a>")
      "</th>\n<th>Neighboring sample in tree"
      TOOLTIP("A sample already in the tree that is a child of the node at which the uploaded "
              "sample was placed, to give an example of a closely related sample")
@@ -1323,46 +1323,69 @@
                                si->ambigCount - alignedAmbigCount);
             printTooltip(dy->string);
             }
         printf("</td>");
         if (isFasta)
             {
             struct psl *psl = si->psl;
             if (psl)
                 {
                 int aliCount = psl->match + psl->misMatch + psl->repMatch;
                 printf("<td class='%s'>%d ", qcClassForLength(aliCount), aliCount);
                 dyStringClear(dy);
                 dyStringPrintf(dy, "bases %d - %d align to reference bases %d - %d",
                                psl->qStart+1, psl->qEnd, psl->tStart+1, psl->tEnd);
                 printTooltip(dy->string);
+                int insBases = 0, insCount = 0, delBases = 0, delCount = 0;
+                if (psl->qBaseInsert || psl->tBaseInsert)
+                    {
+                    // Tally up actual insertions and deletions; ignore skipped N bases.
+                    int ix;
+                    for (ix = 0;  ix < psl->blockCount - 1;  ix++)
+                        {
+                        int qGapStart = psl->qStarts[ix] + psl->blockSizes[ix];
+                        int qGapEnd = psl->qStarts[ix+1];
+                        int qGapLen = qGapEnd - qGapStart;
+                        int tGapStart = psl->tStarts[ix] + psl->blockSizes[ix];
+                        int tGapEnd = psl->tStarts[ix+1];
+                        int tGapLen = tGapEnd - tGapStart;
+                        if (qGapLen > tGapLen)
+                            {
+                            insCount++;
+                            insBases += qGapLen - tGapLen;
+                            }
+                        else if (tGapLen > qGapLen)
+                            {
+                            delCount++;
+                            delBases += tGapLen - qGapLen;
+                            }
+                        }
+                    }
                 printf("</td><td class='%s'>%d ",
-                       qcClassForIndel(psl->qBaseInsert), psl->qBaseInsert);
-                if (psl->qBaseInsert)
+                       qcClassForIndel(insBases), insBases);
+                if (insBases)
                     {
                     dyStringClear(dy);
-                    dyStringPrintf(dy, "%d bases in %d locations",
-                                   psl->qBaseInsert, psl->qNumInsert);
+                    dyStringPrintf(dy, "%d bases in %d locations", insBases, insCount);
                     printTooltip(dy->string);
                     }
                 printf("</td><td class='%s'>%d ",
-                       qcClassForIndel(psl->tBaseInsert), psl->tBaseInsert);
-                if (psl->tBaseInsert)
+                       qcClassForIndel(delBases), delBases);
+                if (delBases)
                     {
                     dyStringClear(dy);
-                    dyStringPrintf(dy, "%d bases in %d locations",
-                                   psl->tBaseInsert, psl->tNumInsert);
+                    dyStringPrintf(dy, "%d bases in %d locations", delBases, delCount);
                     printTooltip(dy->string);
                     }
                 printf("</td>");
                 }
             else
                 printf("<td colspan=3 class='%s'> not alignable </td>",
                        qcClassForLength(0));
             }
         int snvCount = slCount(si->sncList) - alignedAmbigCount;
         printf("<td class='%s'>%d", qcClassForSNVs(snvCount), snvCount);
         if (snvCount > 0)
             {
             dyStringClear(dy);
             struct singleNucChange *snc;
             for (snc = si->sncList;  snc != NULL;  snc = snc->next)