6c08bf0d066841b266c00d1da5e5be5652cde7c8
angie
  Thu Jan 28 14:49:34 2021 -0800
Now that the correct numbers of inserted and deleted bases are reported, make the tooltips more helpful by reporting the bases and locations.  sorta refs #26868

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 6f64fe4..db037f4 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1250,39 +1250,41 @@
 
 static void appendExcludingNs(struct dyString *dy, struct seqInfo *si)
 /* Append a note to dy about how many N bases and start and/or end are excluded from statistic. */
 {
 dyStringAppend(dy, "excluding ");
 if (si->nCountStart)
     dyStringPrintf(dy, "%d N bases at start", si->nCountStart);
 if (si->nCountStart && si->nCountEnd)
     dyStringAppend(dy, " and ");
 if (si->nCountEnd)
     dyStringPrintf(dy, "%d N bases at end", si->nCountEnd);
 }
 
 static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta,
                                struct usherResults *ur, struct tempName *jsonTns[],
-                               struct hash *sampleMetadata, struct mutationAnnotatedTree *bigTree)
+                               struct hash *sampleMetadata, struct mutationAnnotatedTree *bigTree,
+                               struct dnaSeq *refGenome)
 /* Show a table with composition & alignment stats for each sequence that passed basic QC. */
 {
 if (seqInfoList)
     {
     puts("<table class='seqSummary'>");
     printSummaryHeader(isFasta);
     puts("<tbody>");
     struct dyString *dy = dyStringNew(0);
+    struct dyString *dyExtra = dyStringNew(0);
     struct seqInfo *si;
     for (si = seqInfoList;  si != NULL;  si = si->next)
         {
         puts("<tr>");
         printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | "));
         if (isFasta)
             {
             if (si->nCountStart || si->nCountEnd)
                 {
                 int effectiveLength = si->seq->size - (si->nCountStart + si->nCountEnd);
                 dyStringClear(dy);
                 dyStringPrintf(dy, "%d ", effectiveLength);
                 appendExcludingNs(dy, si);
                 dyStringPrintf(dy, " (original size %d)", si->seq->size);
                 printf("<td class='%s'>%d", qcClassForLength(effectiveLength), effectiveLength);
@@ -1327,66 +1329,94 @@
         if (isFasta)
             {
             struct psl *psl = si->psl;
             if (psl)
                 {
                 int aliCount = psl->match + psl->misMatch + psl->repMatch;
                 printf("<td class='%s'>%d ", qcClassForLength(aliCount), aliCount);
                 dyStringClear(dy);
                 dyStringPrintf(dy, "bases %d - %d align to reference bases %d - %d",
                                psl->qStart+1, psl->qEnd, psl->tStart+1, psl->tEnd);
                 printTooltip(dy->string);
                 int insBases = 0, insCount = 0, delBases = 0, delCount = 0;
                 if (psl->qBaseInsert || psl->tBaseInsert)
                     {
                     // Tally up actual insertions and deletions; ignore skipped N bases.
+                    dyStringClear(dy);
+                    dyStringClear(dyExtra);
                     int ix;
                     for (ix = 0;  ix < psl->blockCount - 1;  ix++)
                         {
                         int qGapStart = psl->qStarts[ix] + psl->blockSizes[ix];
                         int qGapEnd = psl->qStarts[ix+1];
                         int qGapLen = qGapEnd - qGapStart;
                         int tGapStart = psl->tStarts[ix] + psl->blockSizes[ix];
                         int tGapEnd = psl->tStarts[ix+1];
                         int tGapLen = tGapEnd - tGapStart;
                         if (qGapLen > tGapLen)
                             {
                             insCount++;
-                            insBases += qGapLen - tGapLen;
+                            int insLen = qGapLen - tGapLen;
+                            insBases += insLen;
+                            if (isNotEmpty(dy->string))
+                                dyStringAppend(dy, ", ");
+                            if (insLen <= 12)
+                                {
+                                char insSeq[insLen+1];
+                                safencpy(insSeq, sizeof insSeq, si->seq->dna + qGapEnd - insLen,
+                                         insLen);
+                                touppers(insSeq);
+                                dyStringPrintf(dy, "%d-%d:%s",
+                                               tGapEnd, tGapEnd+1, insSeq);
+                                }
+                            else
+                                dyStringPrintf(dy, "%d-%d:%d bases",
+                                               tGapEnd, tGapEnd+1, insLen);
                             }
                         else if (tGapLen > qGapLen)
                             {
                             delCount++;
-                            delBases += tGapLen - qGapLen;
+                            int delLen = tGapLen - qGapLen;;
+                            delBases += delLen;
+                            if (isNotEmpty(dyExtra->string))
+                                dyStringAppend(dyExtra, ", ");
+                            if (delLen <= 12)
+                                {
+                                char delSeq[delLen+1];
+                                safencpy(delSeq, sizeof delSeq, refGenome->dna + tGapEnd - delLen,
+                                         delLen);
+                                touppers(delSeq);
+                                dyStringPrintf(dyExtra, "%d-%d:%s",
+                                               tGapEnd - delLen + 1, tGapEnd, delSeq);
+                                }
+                            else
+                                dyStringPrintf(dyExtra, "%d-%d:%d bases",
+                                               tGapEnd - delLen + 1, tGapEnd, delLen);
                             }
                         }
                     }
                 printf("</td><td class='%s'>%d ",
                        qcClassForIndel(insBases), insBases);
                 if (insBases)
                     {
-                    dyStringClear(dy);
-                    dyStringPrintf(dy, "%d bases in %d locations", insBases, insCount);
                     printTooltip(dy->string);
                     }
                 printf("</td><td class='%s'>%d ",
                        qcClassForIndel(delBases), delBases);
                 if (delBases)
                     {
-                    dyStringClear(dy);
-                    dyStringPrintf(dy, "%d bases in %d locations", delBases, delCount);
-                    printTooltip(dy->string);
+                    printTooltip(dyExtra->string);
                     }
                 printf("</td>");
                 }
             else
                 printf("<td colspan=3 class='%s'> not alignable </td>",
                        qcClassForLength(0));
             }
         int snvCount = slCount(si->sncList) - alignedAmbigCount;
         printf("<td class='%s'>%d", qcClassForSNVs(snvCount), snvCount);
         if (snvCount > 0)
             {
             dyStringClear(dy);
             struct singleNucChange *snc;
             for (snc = si->sncList;  snc != NULL;  snc = snc->next)
                 {
@@ -1619,31 +1649,32 @@
         struct subtreeInfo *ti;
         int ix;
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             AllocVar(jsonTns[ix]);
             trashDirFile(jsonTns[ix], "ct", "subtreeAuspice", ".json");
             treeToAuspiceJson(ti, db, refGenome, bigGenePredFile, sampleMetadata,
                               jsonTns[ix]->forCgi, source);
             }
         puts("<p></p>");
         makeButtonRow(jsonTns, subtreeCount, isFasta);
         printf("<p>If you have metadata you wish to display, click a 'view subtree in Nextstrain' "
                "button, and then you can drag on a CSV file to "
                "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
                "</p>\n");
-        summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree);
+        summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree,
+                           refGenome);
         reportTiming(&startTime, "write summary table (including reading in lineages)");
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
             printf("<h3>Subtree %d: ", ix+1);
             if (subtreeUserSampleCount > 1)
                 printf("%d related samples", subtreeUserSampleCount);
             else if (subtreeCount > 1)
                 printf("Unrelated sample");
             printf("</h3>\n");
             makeNextstrainButton("viewNextstrainSub", ix, jsonTns);
             puts("<br>");
             // Make a sub-subtree with only user samples for display:
             struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
             subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);