f1ce2d85ae54bc5304772a9fbf5395f4b8a43e3c angie Mon Oct 25 14:08:19 2010 -0700 Redmine Feature #953 (mRNA non-synonomous codons don't work whengenome is a stop): In cds.c, genomic stop codons were encoded into grayIx as 'X' which unfortunately is also an error return from dnautil.c's lookupCodon. Genomic 'X' codons (including stop codons) were excluded from detection of non-synonymous codons -- thus the invisibility of difference when genome has a stop codon but mRNAs don't. Since 'X' can be caused by something valid like an N in the codon, I don't think it makes sense to exclude it from comparison. Also, I changed the grayIx encoding to use 'J' for stop codon (not a valid peptide code and not X). Also did some refactoring to abstract out the grayIx encoding a bit, and make names a bit more accurate. diff --git src/hg/hgTracks/cds.c src/hg/hgTracks/cds.c index 71d8583..33799c1 100644 --- src/hg/hgTracks/cds.c +++ src/hg/hgTracks/cds.c @@ -363,6 +363,21 @@ } +// We need an uppercase alpha character to represent the stop codon in +// the alpha-offset grayIx scheme. J is in neither dnautil.c's +// codonTable nor axt.c's blosum62 peptide scoring matrix. X was +// formerly used to represent stop codon, but unfortunately it also is +// the error return from dnautil.c's lookupCodon (and it is in +// blosum62, so maybe it is a valid peptide in some contexts?). +#define GRAYIX_STOP_CODON_ALPHA 'J' + +// In addition to the alpha-offset encoding of peptide and alternating +// shade, grayIx can take on several negative values to represent colors +// for special cases: +#define GRAYIX_CDS_START -1 +#define GRAYIX_CDS_ERROR -2 +#define GRAYIX_CDS_STOP -3 +#define GRAYIX_CDS_SYN_PROT -4 static Color colorAndCodonFromGrayIx(struct hvGfx *hvg, char *codon, int grayIx, Color ixColor) @@ -370,22 +385,22 @@ * are both encoded in the grayIx*/ { Color color; -if (grayIx == -2) +if (grayIx == GRAYIX_CDS_ERROR) { color = cdsColor[CDS_ERROR]; sprintf(codon,"X"); } -else if (grayIx == -1) +else if (grayIx == GRAYIX_CDS_START) { color = cdsColor[CDS_START]; sprintf(codon,"M"); } -else if (grayIx == -3) +else if (grayIx == GRAYIX_CDS_STOP) { color = cdsColor[CDS_STOP]; sprintf(codon,"*"); } -else if (grayIx == -4) +else if (grayIx == GRAYIX_CDS_SYN_PROT) { color = cdsColor[CDS_SYN_PROT]; sprintf(codon,"*"); @@ -406,14 +421,19 @@ { color = ixColor; sprintf(codon,"%c",grayIx + 'A' - 1); + if (codon[0] == GRAYIX_STOP_CODON_ALPHA) + codon[0] = '*'; } else if (grayIx > 26) { color = lighterShade(hvg, ixColor,1.5); sprintf(codon,"%c",grayIx - 26 + 'A' - 1); + if (codon[0] == GRAYIX_STOP_CODON_ALPHA) + codon[0] = '*'; } else { + errAbort("colorAndCodonFromGrayIx: invalid grayIx %d", grayIx); color = cdsColor[CDS_ERROR]; sprintf(codon,"X"); } @@ -421,19 +441,38 @@ } -static int setColorByCds(DNA *dna, bool codonFirstColor, boolean *foundStart, - boolean reverse, boolean colorStopStart) +static char baseColorLookupCodon(DNA *dna) +/* Call dnautil's lookupCodon, but translate stop codon '\0' to '*' for display. */ { -char codonChar; +char peptide; +if (sameString(chromName, "chrM")) + peptide = lookupMitoCodon(dna); +else + peptide = lookupCodon(dna); +if (peptide == '\0') + peptide = '*'; +return peptide; +} +static int peptideToGrayIx(char peptide, boolean codonFirstColor) +/* Encode peptide (a letter or '*') and alternating gray shade into our alpha-offset scheme. */ +{ +if (peptide == '*') + peptide = GRAYIX_STOP_CODON_ALPHA; +if (codonFirstColor) + return(peptide - 'A' + 1); +else + return(peptide - 'A' + 1 + 26); +} + +static int codonToGrayIx(DNA *dna, bool codonFirstColor, boolean *foundStart, + boolean reverse, boolean colorStopStart) +/* Return grayIx encoding the codon and color (or alternating shades). */ +{ if (reverse) reverseComplement(dna,strlen(dna)); -if (sameString(chromName, "chrM")) - codonChar = lookupMitoCodon(dna); -else - codonChar = lookupCodon(dna); - +char codonChar = baseColorLookupCodon(dna); if (codonChar == 'M' && foundStart != NULL && !(*foundStart)) *foundStart = TRUE; @@ -448,28 +487,19 @@ } #endif -if (codonChar == 0) +if (codonChar == '*') { if (colorStopStart) - return(-3); //stop codon - else - { - if (codonFirstColor) - return('X' - 'A' + 1); + return(GRAYIX_CDS_STOP); else - return('X' - 'A' + 1 + 26); - } + return peptideToGrayIx(codonChar, codonFirstColor); } else if (codonChar == 'X') - return(-2); //bad input to lookupCodon + return(GRAYIX_CDS_ERROR); // bad input to lookupCodon, e.g. 'n' base else if (colorStopStart && codonChar == 'M') - { - return(-1); //start codon - } -else if (codonFirstColor) - return(codonChar - 'A' + 1); + return(GRAYIX_CDS_START); else - return(codonChar - 'A' + 1 + 26); + return peptideToGrayIx(codonChar, codonFirstColor); } @@ -477,9 +507,11 @@ /* returns TRUE if amino acids have a positive blosum62 score i.e. I, V or R, K else FALSE + Note: X is a valid code in the default scheme (blosum62), so don't pass in + dnautil.c's X which is an error return... */ { -static struct axtScoreScheme *ss; +static struct axtScoreScheme *ss = NULL; if (ss == NULL) ss = axtScoreSchemeProteinDefault(); if ((ss->matrix[aa1][aa2]) > 0) @@ -488,27 +520,20 @@ return FALSE; } -static int setColorByDiff(DNA *rna, char genomicCodon, bool codonFirstColor) +static int mrnaCodonToGrayIx(DNA *rna, char genomicCodon, bool codonFirstColor) /* Difference ==> red, otherwise keep the alternating shades. */ { -char rnaCodon = lookupCodon(rna); - -/* Translate lookupCodon stop codon result into what genomicCodon would have - * for a stop codon: */ -if (rnaCodon == '\0') - rnaCodon = '*'; +char rnaCodon = baseColorLookupCodon(rna); -if (genomicCodon != 'X' && genomicCodon != rnaCodon) +if (genomicCodon != rnaCodon) { - if (protEquivalent(genomicCodon, rnaCodon)) - return(-4); // yellow, "synonymous" protein + if (genomicCodon != 'X' && rnaCodon != 'X' && protEquivalent(genomicCodon, rnaCodon)) + return(GRAYIX_CDS_SYN_PROT); // yellow, "synonymous" protein else - return(-3); //red (reusing stop codon color) + return(GRAYIX_CDS_STOP); } -else if (codonFirstColor) - return(genomicCodon - 'A' + 1); else - return(genomicCodon - 'A' + 1 + 26); + return peptideToGrayIx(genomicCodon, codonFirstColor); } @@ -975,7 +1000,7 @@ sf->start = winEnd - sf->start + winStart - 3; sf->end = sf->start + 3; } - sf->grayIx = setColorByCds(codon, sf->start % 6 < 3, NULL, FALSE, TRUE); + sf->grayIx = codonToGrayIx(codon, sf->start % 6 < 3, NULL, FALSE, TRUE); zeroBytes(codon, 4); slAddHead(&sfList, sf); } @@ -1145,9 +1170,9 @@ sf->end = currentEnd; sf->grayIx = ((posStrand && currentEnd <= cdsEnd) || (!posStrand && currentStart >= cdsStart)) ? - setColorByCds(tempCodonSeq, altColor, &foundStart, + codonToGrayIx(tempCodonSeq, altColor, &foundStart, !posStrand, colorStopStart) : - -2; + GRAYIX_CDS_ERROR; slAddHead(&sfList, sf); } break; @@ -1167,11 +1192,11 @@ char *thisDna = getCachedDna(currentStart, currentEnd); memcpy(currentCodon, thisDna, 3); currentCodon[3] = '\0'; - sf->grayIx = setColorByCds(currentCodon, altColor, &foundStart, + sf->grayIx = codonToGrayIx(currentCodon, altColor, &foundStart, !posStrand, colorStopStart); } else - sf->grayIx = -2; + sf->grayIx = GRAYIX_CDS_ERROR; } /*start of a coding block with less than 3 bases*/ else if (currentSize < 3) @@ -1181,10 +1206,10 @@ sf->start = currentStart; sf->end = currentEnd; if (strlen(partialCodonSeq) == 3) - sf->grayIx = setColorByCds(partialCodonSeq, altColor, + sf->grayIx = codonToGrayIx(partialCodonSeq, altColor, &foundStart, !posStrand, colorStopStart); else - sf->grayIx = -2; + sf->grayIx = GRAYIX_CDS_ERROR; strcpy(partialCodonSeq,"" ); /*update frame based on bases appended*/ @@ -1338,7 +1363,7 @@ boolean startColor = FALSE; /* re-set color of this block based on mrna codons rather than * genomic, but keep the odd/even cycle of dark/light shades. */ - int mrnaGrayIx = setColorByCds(mrnaBases, (grayIx > 26), NULL, + int mrnaGrayIx = codonToGrayIx(mrnaBases, (grayIx > 26), NULL, FALSE, TRUE); if (color == cdsColor[CDS_START]) startColor = TRUE; @@ -1374,14 +1399,14 @@ /* Color codons red wherever mrna differs from genomic; * keep the odd/even cycle of dark/light shades. */ colorAndCodonFromGrayIx(hvg, genomicCodon, grayIx, ixColor); - int mrnaGrayIx = setColorByDiff(mrnaBases, genomicCodon[0], + int mrnaGrayIx = mrnaCodonToGrayIx(mrnaBases, genomicCodon[0], (grayIx > 26)); color = colorAndCodonFromGrayIx(hvg, mrnaCodon, mrnaGrayIx, ixColor); - safef(mrnaCodon, sizeof(mrnaCodon), "%c", lookupCodon(mrnaBases)); - if (mrnaCodon[0] == '\0') - mrnaCodon[0] = '*'; - if (genomicCodon[0] != 'X' && mrnaCodon[0] != genomicCodon[0]) + // Look up mrnaCodon again because if mrnaGrayIx is GRAYIX_SYN_PROT, + // codon value is lost: + safef(mrnaCodon, sizeof(mrnaCodon), "%c", baseColorLookupCodon(mrnaBases)); + if (mrnaCodon[0] != genomicCodon[0]) { drawScaledBoxSampleWithText(hvg, s, e, scale, xOff, y, heightPer, color, lf->score, font, @@ -1488,13 +1513,11 @@ mrnaBases, &queryInsertion); if (queryInsertion) color = cdsColor[CDS_QUERY_INSERTION]; - mrnaCodon = lookupCodon(mrnaBases); - if (mrnaCodon == '\0') - mrnaCodon = '*'; + mrnaCodon = baseColorLookupCodon(mrnaBases); colorAndCodonFromGrayIx(hvg, genomicCodon, sf->grayIx, dummyColor); if (queryInsertion) drawScaledBox(hvg, s, e, scale, xOff, y, heightPer, color); - if ((genomicCodon[0] != 'X' && mrnaCodon != genomicCodon[0])) + if (mrnaCodon != genomicCodon[0]) { if (mrnaCodon != genomicCodon[0] && protEquivalent(genomicCodon[0], mrnaCodon)) color = cdsColor[CDS_SYN_PROT];