da33d70c10ddaae6c3290cb900d7d0cc2b6ee01b hiram Tue Mar 17 13:57:24 2026 -0700 allow calculation of GC percent on the fly with code help from claude refs #35958 diff --git src/hg/hgc/togaClick.c src/hg/hgc/togaClick.c index de092e4773d..dadc3da2016 100644 --- src/hg/hgc/togaClick.c +++ src/hg/hgc/togaClick.c @@ -1,722 +1,725 @@ /* togaClick - click handling for TOGA tracks */ #include "common.h" #include "hgc.h" #include "togaClick.h" #include "string.h" #include "htmshell.h" #include "chromAlias.h" struct togaDataBB *togaDataBBLoad(char **row, bits16 fieldCount) /* Load a togaData from row fetched with select * from togaData * from database. Dispose of this with togaDataFree(). */ { struct togaDataBB *ret; AllocVar(ret); ret->projection = cloneString(row[0]); ret->ref_trans_id = cloneString(row[1]); ret->ref_region = cloneString(row[2]); ret->query_region = cloneString(row[3]); ret->chain_score = cloneString(row[4]); ret->chain_synteny = cloneString(row[5]); ret->chain_flank = cloneString(row[6]); ret->chain_gl_cds_fract = cloneString(row[7]); ret->chain_loc_cds_fract = cloneString(row[8]); ret->chain_exon_cov = cloneString(row[9]); ret->chain_intron_cov = cloneString(row[10]); ret->status = cloneString(row[11]); ret->perc_intact_ign_M = cloneString(row[12]); ret->perc_intact_int_M = cloneString(row[13]); ret->intact_codon_prop = cloneString(row[14]); ret->ouf_prop = cloneString(row[15]); ret->mid_intact = cloneString(row[16]); ret->mid_pres = cloneString(row[17]); ret->prot_alignment = cloneString(row[18]); ret->svg_line = cloneString(row[19]); ret->ref_link = cloneString(row[20]); ret->inact_mut_html_table = cloneString(row[21]); ret->exon_ali_html = cloneString(row[22]); /* read two optional new items, CDSseq and frame-corrected protein */ ret->CDSseq = NULL; if (fieldCount >= 35) /* 0-11 are bed core fields. This fct gets a pointer starting with element 12. 12+23 = 35 */ ret->CDSseq = cloneString(row[23]); ret->protseqFrameCorrected = NULL; if (fieldCount >= 36) /* 12+24 = 36 */ ret->protseqFrameCorrected = cloneString(row[24]); /* number and percent of mutated exons (additional data now shown for the gene loss status) */ ret->numExonsMutated = NULL; ret->percentExonsMutated = NULL; if (fieldCount >= 37) /* 12+25 = 37 */ ret->numExonsMutated = cloneString(row[25]); if (fieldCount >= 38) /* 12+26 = 38 */ ret->percentExonsMutated = cloneString(row[26]); return ret; } struct togaData *togaDataLoad(char **row) /* Load a togaData from row fetched with select * from togaData * from database. Dispose of this with togaDataFree(). */ { struct togaData *ret; AllocVar(ret); ret->projection = cloneString(row[0]); ret->ref_trans_id = cloneString(row[1]); ret->ref_region = cloneString(row[2]); ret->query_region = cloneString(row[3]); ret->chain_score = cloneString(row[4]); ret->chain_synteny = cloneString(row[5]); ret->chain_flank = cloneString(row[6]); ret->chain_gl_cds_fract = cloneString(row[7]); ret->chain_loc_cds_fract = cloneString(row[8]); ret->chain_exon_cov = cloneString(row[9]); ret->chain_intron_cov = cloneString(row[10]); ret->status = cloneString(row[11]); ret->perc_intact_ign_M = cloneString(row[12]); ret->perc_intact_int_M = cloneString(row[13]); ret->intact_codon_prop = cloneString(row[14]); ret->ouf_prop = cloneString(row[15]); ret->mid_intact = cloneString(row[16]); ret->mid_pres = cloneString(row[17]); ret->prot_alignment = cloneString(row[18]); ret->svg_line = cloneString(row[19]); return ret; } void togaDataBBFree(struct togaDataBB **pEl) /* Free a single dynamically allocated togaDatasuch as created * with togaDataLoad(). */ { struct togaDataBB *el; if ((el = *pEl) == NULL) return; freeMem(el->projection); freeMem(el->ref_trans_id); freeMem(el->ref_region); freeMem(el->query_region); freeMem(el->chain_score); freeMem(el->chain_synteny); freeMem(el->chain_flank); freeMem(el->chain_gl_cds_fract); freeMem(el->chain_loc_cds_fract); freeMem(el->chain_exon_cov); freeMem(el->chain_intron_cov); freeMem(el->status); freeMem(el->perc_intact_ign_M); freeMem(el->perc_intact_int_M); freeMem(el->intact_codon_prop); freeMem(el->ouf_prop); freeMem(el->mid_intact); freeMem(el->mid_pres); freeMem(el->prot_alignment); freeMem(el->svg_line); freeMem(el->ref_link); freeMem(el->inact_mut_html_table); freeMem(el->exon_ali_html); freez(pEl); } void togaDataFree(struct togaData **pEl) /* Free a single dynamically allocated togaDatasuch as created * with togaDataLoad(). */ { struct togaData *el; if ((el = *pEl) == NULL) return; freeMem(el->projection); freeMem(el->ref_trans_id); freeMem(el->ref_region); freeMem(el->query_region); freeMem(el->chain_score); freeMem(el->chain_synteny); freeMem(el->chain_flank); freeMem(el->chain_gl_cds_fract); freeMem(el->chain_loc_cds_fract); freeMem(el->chain_exon_cov); freeMem(el->chain_intron_cov); freeMem(el->status); freeMem(el->perc_intact_ign_M); freeMem(el->perc_intact_int_M); freeMem(el->intact_codon_prop); freeMem(el->ouf_prop); freeMem(el->mid_intact); freeMem(el->mid_pres); freeMem(el->prot_alignment); freeMem(el->svg_line); freez(pEl); } struct togaNucl *togaNuclLoad(char **row) /* Load a togaNucl from row fetched with select * from togaNucl * from database. Dispose of this with togaNuclFree(). */ { struct togaNucl *ret; AllocVar(ret); ret->transcript = cloneString(row[0]); ret->exon_num = cloneString(row[1]); ret->exon_region = cloneString(row[2]); ret->pid = cloneString(row[3]); ret->blosum = cloneString(row[4]); ret->gaps = cloneString(row[5]); ret->ali_class = cloneString(row[6]); ret->exp_region = cloneString(row[7]); ret->in_exp_region = cloneString(row[8]); ret->alignment = cloneString(row[9]); return ret; } void togaNuclFree(struct togaNucl **pEl) /* Free a single dynamically allocated togaNucl such as created * with togaNuclLoad(). */ { struct togaNucl *el; if ((el = *pEl) == NULL) return; freeMem(el->transcript); freeMem(el->exon_num); freeMem(el->exon_region); freeMem(el->pid); freeMem(el->blosum); freeMem(el->gaps); freeMem(el->ali_class); freeMem(el->exp_region); freeMem(el->in_exp_region); freeMem(el->alignment); freez(pEl); } struct togaInactMut *togaInactMutLoad(char **row) /* Load a togaInactMut from row fetched with select * from togaInactMut * from database. Dispose of this with togaInactMutFree(). */ { struct togaInactMut *ret; AllocVar(ret); ret->transcript = cloneString(row[0]); ret->exon_num = cloneString(row[1]); ret->position = cloneString(row[2]); ret->mut_class = cloneString(row[3]); ret->mutation = cloneString(row[4]); ret->is_inact = cloneString(row[5]); ret->mut_id = cloneString(row[6]); return ret; } void togaInactMutFree(struct togaInactMut **pEl) /* Free a single dynamically allocated togaInactMut such as created * with togaInactMutLoad(). */ { struct togaInactMut *el; if ((el = *pEl) == NULL) return; freeMem(el->transcript); freeMem(el->exon_num); freeMem(el->position); freeMem(el->mut_class); freeMem(el->mutation); freeMem(el->is_inact); freeMem(el->mut_id); freez(pEl); } void extractHLTOGAsuffix(char *suffix) /* Extract suffix from TOGA table name. Prefix must be HLTOGAannot */ { int suff_len = strlen(suffix); if (suff_len <= HLTOGA_BED_PREFIX_LEN) // we cannot chop first PREFIX_LEN characters { // TODO: NOT SURE IF IT WORKS; but this must not happen char empty[5] = { '\0' }; strcpy(suffix, empty); } else { // just start the string 11 characters upstream memmove(suffix, suffix + HLTOGA_BED_PREFIX_LEN, suff_len - HLTOGA_BED_PREFIX_LEN + 1); } } void HLprintQueryProtSeqForAli(char *proteinAlignment) { // take protein sequence alignment // print only the query sequence char *str = proteinAlignment; int printed_char_num = 0; while ((str = strstr(str, "que:")) != NULL) { str += 10; char ch; while ((ch = *str++) != '<') { if (ch != '-') { putchar(ch); ++printed_char_num; } if (printed_char_num == 80) { printed_char_num = 0; printf("
"); } } } } void print_with_newlines(const char *str) { int line_length = 80; // Number of characters per line int length = strlen(str); int i = 0; while (i < length) { /* Print up to 80 characters or the remainder of the string */ int chars_to_print = (length - i < line_length) ? (length - i) : line_length; printf("%.*s
", chars_to_print, &str[i]); i += chars_to_print; } } static void panelPrompt(char *target, char *prompt) -/* output span element for an expandable text element */ +/* output span element for an expandable text element + * this opens a
element, you need to close it
after your text in + * this section has been output. + */ { printf(" %s:\n", target, prompt); printf("\n"); printf("
\n"); // show inact mut plot printf("

Visualization of inactivating mutations on exon-intron structure

\n"); printf("%s\n", info->svg_line); printf("
Exons shown in grey are missing (often overlap assembly gaps).\nExons shown in"); printf(" red or blue are deleted or do not align at all.\nRed indicates that the exon deletion "); printf("shifts the reading frame, while blue indicates that exon deletion(s) are framepreserving.
\n"); // GLP features panelPrompt("collapseGLP", "Show features used for transcript classification"); printf("

\n\n"); printf("
\n"); printf("

Predicted protein sequence

\n"); panelPrompt("collapseProt", "Show protein sequence of query"); printf("

"); HLprintQueryProtSeqForAli(info->prot_alignment); printf("\n

\n
\n"); if (info->protseqFrameCorrected != NULL) { panelPrompt("collapseProtFrameCorrected", "Show frame-corrected protein sequence of query (potential frameshifts are masked)"); printf("

"); print_with_newlines(info->protseqFrameCorrected); printf("\n

\n\n"); } if (info->CDSseq != NULL) { printf("
\n"); printf("

Predicted coding (DNA) sequence

\n"); panelPrompt("collapseCDS", "Show coding sequence of query"); printf("

"); print_with_newlines(info->CDSseq); printf("\n

\n\n"); } // and show protein sequence printf("
\n"); printf("

Protein sequence alignment

\n"); panelPrompt("collapseProtAli", "Show alignment between reference and query"); printf("

%s\n", info->prot_alignment); printf("

\n"); // show inactivating mutations if required printf("
\n"); printf("

List of inactivating mutations

\n"); panelPrompt("collapseMuts", "Show inactivating mutations"); printf("

\n"); // init table printf("\n"); printf("\n"); printf("%s\n", info->inact_mut_html_table); printf("
Exon numberCodon numberMutation classMutationTreated as inactivatingMutation ID

\n"); printf("\n\n"); // show exons data printf("
\n"); printf("

Exon alignments

\n"); panelPrompt("collapseExons", "Show exon sequences and features"); printf("

%s

\n", info->exon_ali_html); printf("
\n"); printf("\n

\n"); printTrackHtml(tdb); // and do I need this? } void doHillerLabTOGAGene(char *database, struct trackDb *tdb, char *item, char *table_name) /* Put up TOGA Gene track info. */ { //int start = cartInt(cart, "o"); char headerTitle[512]; char suffix[512]; strcpy(suffix, table_name); extractHLTOGAsuffix(suffix); safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); printf("

TOGA gene annotation

\n"); // htmlHorizontalLine(); if (startsWith("bigBed", tdb->type)) { doHillerLabTOGAGeneBig(database, tdb, item, table_name); return; } struct sqlConnection *conn = hAllocConn(database); // define TOGA table names: initate with pre-defined prefixes char togaDataTableName[256]; char togaNuclTableName[256]; char togaInactMutTableName[256]; strcpy(togaDataTableName, HLTOGA_DATA_PREFIX); strcpy(togaNuclTableName, HLTOGA_NUCL_PREFIX); strcpy(togaInactMutTableName, HLTOGA_INACT_PREFIX); // add suffix strcat(togaDataTableName, suffix); strcat(togaNuclTableName, suffix); strcat(togaInactMutTableName, suffix); if (hTableExists(database, togaDataTableName)) { printf("

Projection %s


\n", item); char query[256]; struct sqlResult *sr = NULL; char **row; struct togaData *info = NULL; sqlSafef(query, sizeof(query), "select * from %s where transcript='%s'", togaDataTableName, item); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { info = togaDataLoad(row); // parse sql output // fill HTML template: printf("Reference transcript: %s
", info->ref_trans_id, info->ref_trans_id); printf("Genomic locus in reference: %s
\n", info->ref_region); printf("Genomic locus in query: %s
\n", info->query_region); printf("Projection classification: %s
\n", info->status); printf("Probability that query locus is orthologous: %s
\n", info->chain_score); // list of chain features (for orthology classification) panelPrompt("collapseChain", "Show features used for ortholog probability"); printf("

\n"); printf("
\nFeature description:\n"); printf("For each projection (one reference transcript and one overlapping chain),\n"); printf("TOGA computes the following features by intersecting the reference coordinates of aligning\n"); printf("blocks in the chain with different gene parts (coding exons, UTR (untranslated region) exons, introns)\n"); printf("and the respective intergenic regions.\n
\n"); printf("We define the following variables:\n\n"); printf("Using these variables, TOGA computes the following features:\n"); printf("\n"); printf("\n\n
\n"); htmlHorizontalLine(); // show inact mut plot printf("

Visualization of inactivating mutations on exon-intron structure

\n"); printf("%s
\n", info->svg_line); printf("
Exons shown in grey are missing (often overlap assembly gaps).\nExons shown in"); printf(" red or blue are deleted or do not align at all.\nRed indicates that the exon deletion "); printf("shifts the reading frame, while blue indicates that exon deletion(s) are framepreserving.
\n"); // GLP features panelPrompt("collapseGLP", "Show features used for transcript classification"); printf("

\n\n
\n"); printf("

Query protein sequence


"); panelPrompt("collapseProt", "Show protein sequence of query"); printf("

{protein seq of the query without dashes or other things. Should end with *}\n"); printf("
\n

\n\n"); // and show protein sequence htmlHorizontalLine(); printf("

Protein sequence alignment


\n"); panelPrompt("collapseProtAli", "Show alignment between reference and query"); printf("

%s


\n", info->prot_alignment); printf("\n

\n"); // do not forget to free toga data struct togaDataFree(&info); } else { // no data found, need to report this printf("

No found data for %s

\n", item); } sqlFreeResult(&sr); } // show inactivating mutations if required printf("

List of inactivating mutations


\n"); if (hTableExists(database, togaInactMutTableName)) { char query[256]; struct sqlResult *sr = NULL; char **row; sqlSafef(query, sizeof(query), "select * from %s where transcript='%s'", togaInactMutTableName, item); sr = sqlGetResult(conn, query); panelPrompt("collapseMuts", "Show inactivating mutations"); printf("\n"); // init table printf("\n"); printf("\n"); while ((row = sqlNextRow(sr)) != NULL) { struct togaInactMut *info = NULL; info = togaInactMutLoad(row); printf("\n"); printf("\n", info->exon_num); printf("\n", info->position); printf("\n", info->mut_class); printf("\n", info->mutation); if (sameWord(info->is_inact, ONE_)){ printf("\n", YES_); } else { printf("\n", NO_); } printf("\n", info->mut_id); printf("\n"); togaInactMutFree(&info); } sqlFreeResult(&sr); printf("
Exon numberCodon numberMutation classMutationTreated as inactivatingMutation ID
%s%s%s%s%s%s%s
\n"); printf("\n
\n"); } else { printf("Sorry, cannot find TOGAInactMut table.
\n"); } // show exons data htmlHorizontalLine(); printf("

Exon alignments


\n"); if (hTableExists(database, togaNuclTableName)) { char query[256]; struct sqlResult *sr = NULL; char **row; panelPrompt("collapseExons", "Show exon sequences and features"); sqlSafef(query, sizeof(query), "select * from %s where transcript='%s'", togaNuclTableName, item); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct togaNucl *info = NULL; info = togaNuclLoad(row); printf("
Exon number: %s

\n", info->exon_num); printf("Exon region: %s
\n", info->exon_region); printf("Nucleotide percent identity: %s | BLOSUM: %s
\n", info->pid, info->blosum); if (sameWord(info->gaps, ONE_)){ printf("Intersects assembly gaps: %s
\n", YES_); } else { printf("Intersects assembly gaps: %s
\n", NO_); } printf("Exon alignment class: %s
\n", info->ali_class); if (sameWord(info->in_exp_region, ONE_)){ printf("Detected within expected region (%s): %s
\n", info->exp_region, YES_); } else { printf("Detected within expected region (%s): %s
\n", info->exp_region, NO_); } // printf("Expected region: %s
\n", info->exp_region); printf("
\n"); printf("Sequence alignment between reference and query exon:
\n"); printf("%s
\n", info->alignment); togaNuclFree(&info); } sqlFreeResult(&sr); printf("\n

\n"); } else { printf("Sorry, cannot find TOGANucl table.
\n"); } htmlHorizontalLine(); // TODO: check whether I need this printf("%s", hgTracksPathAndSettings()); printTrackHtml(tdb); // and do I need this? hFreeConn(&conn); }