ffab4ecbdda06b66cf091326108670702e498d0e
hiram
  Fri Dec 5 11:16:22 2025 -0800
new toga click code from Michaerl Hiller for version 2 TOGA refs #35776

diff --git src/hg/hgc/togaClick.c src/hg/hgc/togaClick.c
index 00a4c8c3a32..123f3246292 100644
--- src/hg/hgc/togaClick.c
+++ src/hg/hgc/togaClick.c
@@ -1,25 +1,25 @@
 /* togaClick - click handling for TOGA tracks */
 #include "common.h"
 #include "hgc.h"
 #include "togaClick.h"
 #include "string.h"
 #include "htmshell.h"
 #include "chromAlias.h"
 
 
-struct togaDataBB *togaDataBBLoad(char **row)
+struct togaDataBB *togaDataBBLoad(char **row, bits16 fieldCount)
 /* Load a togaData from row fetched with select * from togaData
  * from database.  Dispose of this with togaDataFree(). */
 {
     struct togaDataBB *ret;
     AllocVar(ret);
     ret->projection = cloneString(row[0]);
     ret->ref_trans_id = cloneString(row[1]);
     ret->ref_region = cloneString(row[2]);
     ret->query_region = cloneString(row[3]);
     ret->chain_score = cloneString(row[4]);
 
     ret->chain_synteny = cloneString(row[5]);
     ret->chain_flank = cloneString(row[6]);
     ret->chain_gl_cds_fract = cloneString(row[7]);
     ret->chain_loc_cds_fract = cloneString(row[8]);
@@ -27,30 +27,45 @@
 
     ret->chain_intron_cov = cloneString(row[10]);
     ret->status = cloneString(row[11]);
     ret->perc_intact_ign_M = cloneString(row[12]);
     ret->perc_intact_int_M = cloneString(row[13]);
     ret->intact_codon_prop = cloneString(row[14]);
 
     ret->ouf_prop = cloneString(row[15]);
     ret->mid_intact = cloneString(row[16]);
     ret->mid_pres = cloneString(row[17]);
     ret->prot_alignment = cloneString(row[18]);
     ret->svg_line = cloneString(row[19]);
     ret->ref_link = cloneString(row[20]);
     ret->inact_mut_html_table = cloneString(row[21]);
     ret->exon_ali_html = cloneString(row[22]);
+
+    /* read two optional new items, CDSseq and frame-corrected protein */
+    ret->CDSseq = NULL;
+    if (fieldCount >= 35)   /* 0-11 are bed core fields. This fct gets a pointer starting with element 12. 12+23 = 35 */
+       ret->CDSseq = cloneString(row[23]);
+    ret->protseqFrameCorrected = NULL;
+    if (fieldCount >= 36)   /* 12+24 = 36 */
+       ret->protseqFrameCorrected = cloneString(row[24]);
+    /* number and percent of mutated exons (additional data now shown for the gene loss status) */
+    ret->numExonsMutated = NULL;
+    ret->percentExonsMutated = NULL;
+    if (fieldCount >= 37)   /* 12+25 = 37 */
+       ret->numExonsMutated = cloneString(row[25]);
+    if (fieldCount >= 38)   /* 12+26 = 38 */
+       ret->percentExonsMutated = cloneString(row[26]);
     return ret;
 }
 
 
 struct togaData *togaDataLoad(char **row)
 /* Load a togaData from row fetched with select * from togaData
  * from database.  Dispose of this with togaDataFree(). */
 {
     struct togaData *ret;
     AllocVar(ret);
     ret->projection = cloneString(row[0]);
     ret->ref_trans_id = cloneString(row[1]);
     ret->ref_region = cloneString(row[2]);
     ret->query_region = cloneString(row[3]);
     ret->chain_score = cloneString(row[4]);
@@ -253,30 +268,42 @@
         str += 10;
         char ch;
         while ((ch = *str++) != '<') {
             if (ch != '-') {
                 putchar(ch);
                 ++printed_char_num;
             }
             if (printed_char_num == 80) {
                 printed_char_num = 0;
                 printf("<BR>");
             }
         }
     }
 }
 
+void print_with_newlines(const char *str) {
+    int line_length = 80; // Number of characters per line
+    int length = strlen(str);
+    int i = 0;
+
+    while (i < length) {
+        /* Print up to 80 characters or the remainder of the string */
+        int chars_to_print = (length - i < line_length) ? (length - i) : line_length;
+        printf("%.*s<BR>", chars_to_print, &str[i]);
+        i += chars_to_print;
+    }
+}
 
 
 void doHillerLabTOGAGeneBig(char *database, struct trackDb *tdb, char *item, char *table_name)
 /* Put up TOGA Gene track info. */
 // To think about -> put into a single bigBed
 // string: HTML formatted inact mut
 // string: HTML formatted exon ali section
 {
 int start = cartInt(cart, "o");
 int end = cartInt(cart, "t");
 char *chrom = cartString(cart, "c");
 char *fileName = bbiNameFromSettingOrTable(tdb, NULL, tdb->table);
 struct bbiFile *bbi =  bigBedFileOpenAlias(hReplaceGbdb(fileName), chromAliasFindAliases);
 struct lm *lm = lmInit(0);
 struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm);
@@ -286,32 +313,32 @@
     {
     if (!(bb->start == start && bb->end == end))
 	continue;
 
     // our names are unique
     char *name = cloneFirstWordByDelimiterNoSkip(bb->rest, '\t');
     boolean match = (isEmpty(name) && isEmpty(item)) || sameOk(name, item);
     if (!match)
         continue;
 
     char startBuf[16], endBuf[16];
     bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bbi->fieldCount);
     break;
     }
 
-printf("<h3>Projection %s</h3><BR>\n", item);
-struct togaDataBB       *info = togaDataBBLoad(&fields[11]);  // Bogdan: why 11? 0-11 are bed-like fields likely 
+printf("<h3>Projection v2 %s</h3>\n", item);
+struct togaDataBB *info = togaDataBBLoad(&fields[11], bbi->fieldCount);  // Bogdan: why 11? 0-11 are bed-like fields likely
 
 printf("<B>Reference transcript: </B>%s<BR>", info->ref_link);
 printf("<B>Genomic locus in reference: </B>%s<BR>\n", info->ref_region);
 printf("<B>Genomic locus in query: </B>%s<BR>\n", info->query_region);
 
 printf("<B>Projection classification: </B>%s<BR>\n", info->status);
 printf("<B>Probability that query locus is orthologous: </B>%s<BR>\n", info->chain_score);
 // list of chain features (for orthology classification)
 printf("<a data-toggle=\"collapse\" href=\"#collapseChain\">Show features used for ortholog probability</a>\n");
 printf("<div id=\"collapseChain\" class=\"panel-collapse collapse\">\n");
 printf("<ul>\n");
 printf("<li>Synteny (log10 value): %s</li>\n", info->chain_synteny);
 printf("<li>Global CDS fraction: %s</li>\n", info->chain_gl_cds_fract);
 printf("<li>Local CDS fraction: %s</li>\n", info->chain_loc_cds_fract);
 printf("<li>Local intron fraction: %s</li>\n", info->chain_intron_cov);
@@ -342,105 +369,125 @@
 printf("<li>&quot;global CDS fraction&quot; as C / A. Chains with a high value have alignments that largely overlap coding exons,");
 printf("which is a hallmark of paralogous or processed pseudogene chains. In contrast, chains with a low value also align many ");
 printf("intronic and intergenic regions, which is a hallmark of orthologous chains. </li>\n");
 printf("<li>&quot;local CDS fraction&quot; as c / a. Orthologous chains tend to have a lower value, as intronic ");
 printf("regions partially align. This feature is not computed for single-exon genes. </li>\n");
 printf("<li>&quot;local intron fraction&quot; as i / I. Orthologous chains tend to have a higher value.");
 printf("This feature is not computed for single-exon genes. </li>\n");
 printf("<li>&quot;flank fraction&quot; as f / 20,000. Orthologous chains tend to have higher values,");
 printf("as flanking intergenic regions partially align. This feature is important to detect orthologous loci of single-exon genes. </li>\n");
 printf("<li>&quot;synteny&quot; as log10 of the number of genes, whose coding exons overlap by at least one base aligning");
 printf("blocks of this chain. Orthologous chains tend to cover several genes located in a conserved order, resulting in higher synteny values. </li>\n");
 printf("<li>&quot;local CDS coverage&quot; as c / CDS, which is only used for single-exon genes. </li>\n");
 printf("</ul>\n");
 
 
-printf("</ul>\n</div>\n<BR>\n");
-htmlHorizontalLine();
+printf("</ul>\n</div>\n");
+printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
 
 // show inact mut plot
 printf("<h4>Visualization of inactivating mutations on exon-intron structure</h4>\n");
-printf("%s<BR>\n", info->svg_line);
+printf("%s\n", info->svg_line);
 printf("<BR>Exons shown in grey are missing (often overlap assembly gaps).\nExons shown in");
 printf(" red or blue are deleted or do not align at all.\nRed indicates that the exon deletion ");
 printf("shifts the reading frame, while blue indicates that exon deletion(s) are framepreserving.<br>\n");
 
 // GLP features
 printf("<a data-toggle=\"collapse\" href=\"#collapseGLP\">Show features used for transcript classification</a>\n");
 printf("<div id=\"collapseGLP\" class=\"panel-collapse collapse\">\n");
 printf("<ul>\n");
 printf("<li>Percent intact, ignoring missing sequence: %s</li>\n", info->perc_intact_ign_M);
 printf("<li>Percent intact, treating missing as intact sequence: %s</li>\n", info->perc_intact_int_M);
 printf("<li>Proportion of intact codons: %s</li>\n", info->intact_codon_prop);
 printf("<li>Percent of CDS not covered by this chain (0 unless the chain covers only a part of the gene): %s</li>\n", info->ouf_prop);
 if (sameWord(info->mid_intact, ONE_))
 {
     printf("<li>Middle 80 percent of CDS intact: %s</li>\n", YES_);
 } else {
     printf("<li>Middle 80 percent of CDS intact: %s</li>\n", NO_);
 }
 if (sameWord(info->mid_pres, ONE_))
 {
     printf("<li>Middle 80 percent of CDS present: %s</li>\n", YES_);
 } else {
     printf("<li>Middle 80 percent of CDS present: %s</li>\n", NO_);
 }
-printf("</ul>\n</div>\n<BR>\n");
-
+if (info->numExonsMutated != NULL && info->percentExonsMutated != NULL) {
+    printf("<li>Number of exons with inactivating mutations: %s (%s%% of the present exons; threshold is 20%%)</li>\n", info->numExonsMutated, info->percentExonsMutated);
+}
+printf("</ul>\n</div>\n");
 
-htmlHorizontalLine();
 
-printf("<h4>Predicted protein sequence</h4><BR>\n");
+printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
+printf("<h4>Predicted protein sequence</h4>\n");
 
 printf("<a data-toggle=\"collapse\" href=\"#collapseProt\">Show protein sequence of query</a>\n");
 printf("<div id=\"collapseProt\" class=\"panel-collapse collapse\">\n");
 // printf("<TT>{protein seq of the query without dashes or other things. Should end with *}\n");
 printf("<TT>");
 HLprintQueryProtSeqForAli(info->prot_alignment);
-printf("\n<BR>\n</TT>\n</div>\n");
+printf("\n</TT>\n</div><BR>\n");
+
+if (info->protseqFrameCorrected != NULL) {
+  printf("<a data-toggle=\"collapse\" href=\"#collapseProtFrameCorrected\">Show frame-corrected protein sequence of query (potential frameshifts are masked)</a>\n");
+  printf("<div id=\"collapseProtFrameCorrected\" class=\"panel-collapse collapse\">\n");
+  printf("<TT>");
+  print_with_newlines(info->protseqFrameCorrected);
+  printf("\n</TT>\n</div>\n");
+}
+
+if (info->CDSseq != NULL) {
+  printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
+  printf("<h4>Predicted coding (DNA) sequence</h4>\n");
+  printf("<a data-toggle=\"collapse\" href=\"#collapseCDS\">Show coding sequence of query</a>\n");
+  printf("<div id=\"collapseCDS\" class=\"panel-collapse collapse\">\n");
+  printf("<TT>");
+  print_with_newlines(info->CDSseq);
+  printf("\n</TT>\n</div>\n");
+}
 
 // and show protein sequence
-htmlHorizontalLine();
-printf("<h4>Protein sequence alignment</h4><BR>\n");
+printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
+printf("<h4>Protein sequence alignment</h4>\n");
 printf("<a data-toggle=\"collapse\" href=\"#collapseProtAli\">Show alignment between reference and query</a>\n");
 printf("<div id=\"collapseProtAli\" class=\"panel-collapse collapse\">\n");
-printf("<TT>%s</TT><BR>\n", info->prot_alignment);
-printf("</div>\n<BR><BR>\n");
+printf("<TT>%s</TT>\n", info->prot_alignment);
+printf("</div>\n");
 
 // show inactivating mutations if required
-printf("<h4>List of inactivating mutations</h4><BR>\n");
-
+printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
+printf("<h4>List of inactivating mutations</h4>\n");
 printf("<a data-toggle=\"collapse\" href=\"#collapseMuts\">Show inactivating mutations</a>\n");
 printf("<div id=\"collapseMuts\" class=\"panel-collapse collapse\">\n");
 printf("<table border = \"1\" width = \"640\">\n");  // init table
 printf("<tr><th>Exon number</th><th>Codon number</th><th>Mutation class</th><th>Mutation</th><th>Treated as inactivating</th><th>Mutation ID</th>\n");
 printf("</tr>\n");
 printf("%s\n", info->inact_mut_html_table);
 printf("</table>\n");
-printf("</div>\n<BR>\n");
+printf("</div>\n\n");
 
 // show exons data
-htmlHorizontalLine();
-printf("<h4>Exon alignments</h4><BR>\n");
+printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
+printf("<h4>Exon alignments</h4>\n");
 
-printf("<a data-toggle=\"collapse\" href=\"#collapseExons\">Show exon sequences and features</a><BR><BR>\n");
+printf("<a data-toggle=\"collapse\" href=\"#collapseExons\">Show exon sequences and features</a>\n");
 printf("<div id=\"collapseExons\" class=\"panel-collapse collapse\">\n");
 // printf("%s\n", info->exon_ali_string);
 printf("%s\n", info->exon_ali_html);
 
-htmlHorizontalLine();
-printf("</div>\n<BR>\n");
+printf("<hr style='margin-bottom:-0.5em;color:black;'>\n");
+printf("</div>\n<BR><BR>\n");
 
 // TODO: check whether I need this
 hPrintf("<link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css\">");
 hPrintf("<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js\"></script>");
 hPrintf("<script src=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js\"></script>");
 
 
 printTrackHtml(tdb);  // and do I need this?
 }
 
 
 void doHillerLabTOGAGene(char *database, struct trackDb *tdb, char *item, char *table_name)
 /* Put up TOGA Gene track info. */
 {
     //int start = cartInt(cart, "o");