ffab4ecbdda06b66cf091326108670702e498d0e hiram Fri Dec 5 11:16:22 2025 -0800 new toga click code from Michaerl Hiller for version 2 TOGA refs #35776 diff --git src/hg/hgc/togaClick.c src/hg/hgc/togaClick.c index 00a4c8c3a32..123f3246292 100644 --- src/hg/hgc/togaClick.c +++ src/hg/hgc/togaClick.c @@ -1,25 +1,25 @@ /* togaClick - click handling for TOGA tracks */ #include "common.h" #include "hgc.h" #include "togaClick.h" #include "string.h" #include "htmshell.h" #include "chromAlias.h" -struct togaDataBB *togaDataBBLoad(char **row) +struct togaDataBB *togaDataBBLoad(char **row, bits16 fieldCount) /* Load a togaData from row fetched with select * from togaData * from database. Dispose of this with togaDataFree(). */ { struct togaDataBB *ret; AllocVar(ret); ret->projection = cloneString(row[0]); ret->ref_trans_id = cloneString(row[1]); ret->ref_region = cloneString(row[2]); ret->query_region = cloneString(row[3]); ret->chain_score = cloneString(row[4]); ret->chain_synteny = cloneString(row[5]); ret->chain_flank = cloneString(row[6]); ret->chain_gl_cds_fract = cloneString(row[7]); ret->chain_loc_cds_fract = cloneString(row[8]); @@ -27,30 +27,45 @@ ret->chain_intron_cov = cloneString(row[10]); ret->status = cloneString(row[11]); ret->perc_intact_ign_M = cloneString(row[12]); ret->perc_intact_int_M = cloneString(row[13]); ret->intact_codon_prop = cloneString(row[14]); ret->ouf_prop = cloneString(row[15]); ret->mid_intact = cloneString(row[16]); ret->mid_pres = cloneString(row[17]); ret->prot_alignment = cloneString(row[18]); ret->svg_line = cloneString(row[19]); ret->ref_link = cloneString(row[20]); ret->inact_mut_html_table = cloneString(row[21]); ret->exon_ali_html = cloneString(row[22]); + + /* read two optional new items, CDSseq and frame-corrected protein */ + ret->CDSseq = NULL; + if (fieldCount >= 35) /* 0-11 are bed core fields. This fct gets a pointer starting with element 12. 12+23 = 35 */ + ret->CDSseq = cloneString(row[23]); + ret->protseqFrameCorrected = NULL; + if (fieldCount >= 36) /* 12+24 = 36 */ + ret->protseqFrameCorrected = cloneString(row[24]); + /* number and percent of mutated exons (additional data now shown for the gene loss status) */ + ret->numExonsMutated = NULL; + ret->percentExonsMutated = NULL; + if (fieldCount >= 37) /* 12+25 = 37 */ + ret->numExonsMutated = cloneString(row[25]); + if (fieldCount >= 38) /* 12+26 = 38 */ + ret->percentExonsMutated = cloneString(row[26]); return ret; } struct togaData *togaDataLoad(char **row) /* Load a togaData from row fetched with select * from togaData * from database. Dispose of this with togaDataFree(). */ { struct togaData *ret; AllocVar(ret); ret->projection = cloneString(row[0]); ret->ref_trans_id = cloneString(row[1]); ret->ref_region = cloneString(row[2]); ret->query_region = cloneString(row[3]); ret->chain_score = cloneString(row[4]); @@ -253,30 +268,42 @@ str += 10; char ch; while ((ch = *str++) != '<') { if (ch != '-') { putchar(ch); ++printed_char_num; } if (printed_char_num == 80) { printed_char_num = 0; printf("<BR>"); } } } } +void print_with_newlines(const char *str) { + int line_length = 80; // Number of characters per line + int length = strlen(str); + int i = 0; + + while (i < length) { + /* Print up to 80 characters or the remainder of the string */ + int chars_to_print = (length - i < line_length) ? (length - i) : line_length; + printf("%.*s<BR>", chars_to_print, &str[i]); + i += chars_to_print; + } +} void doHillerLabTOGAGeneBig(char *database, struct trackDb *tdb, char *item, char *table_name) /* Put up TOGA Gene track info. */ // To think about -> put into a single bigBed // string: HTML formatted inact mut // string: HTML formatted exon ali section { int start = cartInt(cart, "o"); int end = cartInt(cart, "t"); char *chrom = cartString(cart, "c"); char *fileName = bbiNameFromSettingOrTable(tdb, NULL, tdb->table); struct bbiFile *bbi = bigBedFileOpenAlias(hReplaceGbdb(fileName), chromAliasFindAliases); struct lm *lm = lmInit(0); struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm); @@ -286,32 +313,32 @@ { if (!(bb->start == start && bb->end == end)) continue; // our names are unique char *name = cloneFirstWordByDelimiterNoSkip(bb->rest, '\t'); boolean match = (isEmpty(name) && isEmpty(item)) || sameOk(name, item); if (!match) continue; char startBuf[16], endBuf[16]; bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bbi->fieldCount); break; } -printf("<h3>Projection %s</h3><BR>\n", item); -struct togaDataBB *info = togaDataBBLoad(&fields[11]); // Bogdan: why 11? 0-11 are bed-like fields likely +printf("<h3>Projection v2 %s</h3>\n", item); +struct togaDataBB *info = togaDataBBLoad(&fields[11], bbi->fieldCount); // Bogdan: why 11? 0-11 are bed-like fields likely printf("<B>Reference transcript: </B>%s<BR>", info->ref_link); printf("<B>Genomic locus in reference: </B>%s<BR>\n", info->ref_region); printf("<B>Genomic locus in query: </B>%s<BR>\n", info->query_region); printf("<B>Projection classification: </B>%s<BR>\n", info->status); printf("<B>Probability that query locus is orthologous: </B>%s<BR>\n", info->chain_score); // list of chain features (for orthology classification) printf("<a data-toggle=\"collapse\" href=\"#collapseChain\">Show features used for ortholog probability</a>\n"); printf("<div id=\"collapseChain\" class=\"panel-collapse collapse\">\n"); printf("<ul>\n"); printf("<li>Synteny (log10 value): %s</li>\n", info->chain_synteny); printf("<li>Global CDS fraction: %s</li>\n", info->chain_gl_cds_fract); printf("<li>Local CDS fraction: %s</li>\n", info->chain_loc_cds_fract); printf("<li>Local intron fraction: %s</li>\n", info->chain_intron_cov); @@ -342,105 +369,125 @@ printf("<li>"global CDS fraction" as C / A. Chains with a high value have alignments that largely overlap coding exons,"); printf("which is a hallmark of paralogous or processed pseudogene chains. In contrast, chains with a low value also align many "); printf("intronic and intergenic regions, which is a hallmark of orthologous chains. </li>\n"); printf("<li>"local CDS fraction" as c / a. Orthologous chains tend to have a lower value, as intronic "); printf("regions partially align. This feature is not computed for single-exon genes. </li>\n"); printf("<li>"local intron fraction" as i / I. Orthologous chains tend to have a higher value."); printf("This feature is not computed for single-exon genes. </li>\n"); printf("<li>"flank fraction" as f / 20,000. Orthologous chains tend to have higher values,"); printf("as flanking intergenic regions partially align. This feature is important to detect orthologous loci of single-exon genes. </li>\n"); printf("<li>"synteny" as log10 of the number of genes, whose coding exons overlap by at least one base aligning"); printf("blocks of this chain. Orthologous chains tend to cover several genes located in a conserved order, resulting in higher synteny values. </li>\n"); printf("<li>"local CDS coverage" as c / CDS, which is only used for single-exon genes. </li>\n"); printf("</ul>\n"); -printf("</ul>\n</div>\n<BR>\n"); -htmlHorizontalLine(); +printf("</ul>\n</div>\n"); +printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); // show inact mut plot printf("<h4>Visualization of inactivating mutations on exon-intron structure</h4>\n"); -printf("%s<BR>\n", info->svg_line); +printf("%s\n", info->svg_line); printf("<BR>Exons shown in grey are missing (often overlap assembly gaps).\nExons shown in"); printf(" red or blue are deleted or do not align at all.\nRed indicates that the exon deletion "); printf("shifts the reading frame, while blue indicates that exon deletion(s) are framepreserving.<br>\n"); // GLP features printf("<a data-toggle=\"collapse\" href=\"#collapseGLP\">Show features used for transcript classification</a>\n"); printf("<div id=\"collapseGLP\" class=\"panel-collapse collapse\">\n"); printf("<ul>\n"); printf("<li>Percent intact, ignoring missing sequence: %s</li>\n", info->perc_intact_ign_M); printf("<li>Percent intact, treating missing as intact sequence: %s</li>\n", info->perc_intact_int_M); printf("<li>Proportion of intact codons: %s</li>\n", info->intact_codon_prop); printf("<li>Percent of CDS not covered by this chain (0 unless the chain covers only a part of the gene): %s</li>\n", info->ouf_prop); if (sameWord(info->mid_intact, ONE_)) { printf("<li>Middle 80 percent of CDS intact: %s</li>\n", YES_); } else { printf("<li>Middle 80 percent of CDS intact: %s</li>\n", NO_); } if (sameWord(info->mid_pres, ONE_)) { printf("<li>Middle 80 percent of CDS present: %s</li>\n", YES_); } else { printf("<li>Middle 80 percent of CDS present: %s</li>\n", NO_); } -printf("</ul>\n</div>\n<BR>\n"); - +if (info->numExonsMutated != NULL && info->percentExonsMutated != NULL) { + printf("<li>Number of exons with inactivating mutations: %s (%s%% of the present exons; threshold is 20%%)</li>\n", info->numExonsMutated, info->percentExonsMutated); +} +printf("</ul>\n</div>\n"); -htmlHorizontalLine(); -printf("<h4>Predicted protein sequence</h4><BR>\n"); +printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); +printf("<h4>Predicted protein sequence</h4>\n"); printf("<a data-toggle=\"collapse\" href=\"#collapseProt\">Show protein sequence of query</a>\n"); printf("<div id=\"collapseProt\" class=\"panel-collapse collapse\">\n"); // printf("<TT>{protein seq of the query without dashes or other things. Should end with *}\n"); printf("<TT>"); HLprintQueryProtSeqForAli(info->prot_alignment); -printf("\n<BR>\n</TT>\n</div>\n"); +printf("\n</TT>\n</div><BR>\n"); + +if (info->protseqFrameCorrected != NULL) { + printf("<a data-toggle=\"collapse\" href=\"#collapseProtFrameCorrected\">Show frame-corrected protein sequence of query (potential frameshifts are masked)</a>\n"); + printf("<div id=\"collapseProtFrameCorrected\" class=\"panel-collapse collapse\">\n"); + printf("<TT>"); + print_with_newlines(info->protseqFrameCorrected); + printf("\n</TT>\n</div>\n"); +} + +if (info->CDSseq != NULL) { + printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); + printf("<h4>Predicted coding (DNA) sequence</h4>\n"); + printf("<a data-toggle=\"collapse\" href=\"#collapseCDS\">Show coding sequence of query</a>\n"); + printf("<div id=\"collapseCDS\" class=\"panel-collapse collapse\">\n"); + printf("<TT>"); + print_with_newlines(info->CDSseq); + printf("\n</TT>\n</div>\n"); +} // and show protein sequence -htmlHorizontalLine(); -printf("<h4>Protein sequence alignment</h4><BR>\n"); +printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); +printf("<h4>Protein sequence alignment</h4>\n"); printf("<a data-toggle=\"collapse\" href=\"#collapseProtAli\">Show alignment between reference and query</a>\n"); printf("<div id=\"collapseProtAli\" class=\"panel-collapse collapse\">\n"); -printf("<TT>%s</TT><BR>\n", info->prot_alignment); -printf("</div>\n<BR><BR>\n"); +printf("<TT>%s</TT>\n", info->prot_alignment); +printf("</div>\n"); // show inactivating mutations if required -printf("<h4>List of inactivating mutations</h4><BR>\n"); - +printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); +printf("<h4>List of inactivating mutations</h4>\n"); printf("<a data-toggle=\"collapse\" href=\"#collapseMuts\">Show inactivating mutations</a>\n"); printf("<div id=\"collapseMuts\" class=\"panel-collapse collapse\">\n"); printf("<table border = \"1\" width = \"640\">\n"); // init table printf("<tr><th>Exon number</th><th>Codon number</th><th>Mutation class</th><th>Mutation</th><th>Treated as inactivating</th><th>Mutation ID</th>\n"); printf("</tr>\n"); printf("%s\n", info->inact_mut_html_table); printf("</table>\n"); -printf("</div>\n<BR>\n"); +printf("</div>\n\n"); // show exons data -htmlHorizontalLine(); -printf("<h4>Exon alignments</h4><BR>\n"); +printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); +printf("<h4>Exon alignments</h4>\n"); -printf("<a data-toggle=\"collapse\" href=\"#collapseExons\">Show exon sequences and features</a><BR><BR>\n"); +printf("<a data-toggle=\"collapse\" href=\"#collapseExons\">Show exon sequences and features</a>\n"); printf("<div id=\"collapseExons\" class=\"panel-collapse collapse\">\n"); // printf("%s\n", info->exon_ali_string); printf("%s\n", info->exon_ali_html); -htmlHorizontalLine(); -printf("</div>\n<BR>\n"); +printf("<hr style='margin-bottom:-0.5em;color:black;'>\n"); +printf("</div>\n<BR><BR>\n"); // TODO: check whether I need this hPrintf("<link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css\">"); hPrintf("<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js\"></script>"); hPrintf("<script src=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js\"></script>"); printTrackHtml(tdb); // and do I need this? } void doHillerLabTOGAGene(char *database, struct trackDb *tdb, char *item, char *table_name) /* Put up TOGA Gene track info. */ { //int start = cartInt(cart, "o");