57d12692815c7c7eca4915efd3be0b6c21358702 braney Thu Sep 25 18:17:32 2014 -0700 add a mode to mafGene that let's it put out a unique character for eachcodon. diff --git src/lib/dnautil.c src/lib/dnautil.c index 7d8a665..6b08ccd 100644 --- src/lib/dnautil.c +++ src/lib/dnautil.c @@ -10,117 +10,118 @@ * Amino acids are stored as single character upper case. * * This file is copyright 2002 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "dnautil.h" struct codonTable /* The dread codon table. */ { DNA *codon; /* Lower case. */ AA protCode; /* Upper case. The "Standard" code */ AA mitoCode; /* Upper case. Vertebrate Mitochondrial translations */ + AA uniqCode; /* unique code for each codon */ }; struct codonTable codonTable[] = /* The master codon/protein table. */ { - {"ttt", 'F', 'F',}, - {"ttc", 'F', 'F',}, - {"tta", 'L', 'L',}, - {"ttg", 'L', 'L',}, - - {"tct", 'S', 'S',}, - {"tcc", 'S', 'S',}, - {"tca", 'S', 'S',}, - {"tcg", 'S', 'S',}, - - {"tat", 'Y', 'Y',}, - {"tac", 'Y', 'Y',}, - {"taa", 0, 0,}, - {"tag", 0, 0,}, - - {"tgt", 'C', 'C',}, - {"tgc", 'C', 'C',}, - {"tga", 0, 'W',}, - {"tgg", 'W', 'W',}, - - - {"ctt", 'L', 'L',}, - {"ctc", 'L', 'L',}, - {"cta", 'L', 'L',}, - {"ctg", 'L', 'L',}, - - {"cct", 'P', 'P',}, - {"ccc", 'P', 'P',}, - {"cca", 'P', 'P',}, - {"ccg", 'P', 'P',}, - - {"cat", 'H', 'H',}, - {"cac", 'H', 'H',}, - {"caa", 'Q', 'Q',}, - {"cag", 'Q', 'Q',}, - - {"cgt", 'R', 'R',}, - {"cgc", 'R', 'R',}, - {"cga", 'R', 'R',}, - {"cgg", 'R', 'R',}, - - - {"att", 'I', 'I',}, - {"atc", 'I', 'I',}, - {"ata", 'I', 'M',}, - {"atg", 'M', 'M',}, - - {"act", 'T', 'T',}, - {"acc", 'T', 'T',}, - {"aca", 'T', 'T',}, - {"acg", 'T', 'T',}, - - {"aat", 'N', 'N',}, - {"aac", 'N', 'N',}, - {"aaa", 'K', 'K',}, - {"aag", 'K', 'K',}, - - {"agt", 'S', 'S',}, - {"agc", 'S', 'S',}, - {"aga", 'R', 0,}, - {"agg", 'R', 0,}, - - - {"gtt", 'V', 'V',}, - {"gtc", 'V', 'V',}, - {"gta", 'V', 'V',}, - {"gtg", 'V', 'V',}, - - {"gct", 'A', 'A',}, - {"gcc", 'A', 'A',}, - {"gca", 'A', 'A',}, - {"gcg", 'A', 'A',}, - - {"gat", 'D', 'D',}, - {"gac", 'D', 'D',}, - {"gaa", 'E', 'E',}, - {"gag", 'E', 'E',}, - - {"ggt", 'G', 'G',}, - {"ggc", 'G', 'G',}, - {"gga", 'G', 'G',}, - {"ggg", 'G', 'G',}, + {"ttt", 'F', 'F', 'a'}, + {"ttc", 'F', 'F', 'b'}, + {"tta", 'L', 'L', 'c'}, + {"ttg", 'L', 'L', 'd'}, + + {"tct", 'S', 'S', 'e'}, + {"tcc", 'S', 'S', 'f'}, + {"tca", 'S', 'S', 'g'}, + {"tcg", 'S', 'S', 'h'}, + + {"tat", 'Y', 'Y', 'i'}, + {"tac", 'Y', 'Y', 'j'}, + {"taa", 0, 0, 'k'}, + {"tag", 0, 0, 'l'}, + + {"tgt", 'C', 'C', 'm'}, + {"tgc", 'C', 'C', 'n'}, + {"tga", 0, 'W', 'o'}, + {"tgg", 'W', 'W', 'p'}, + + + {"ctt", 'L', 'L', 'q'}, + {"ctc", 'L', 'L', 'r'}, + {"cta", 'L', 'L', 's'}, + {"ctg", 'L', 'L', 't'}, + + {"cct", 'P', 'P', 'u'}, + {"ccc", 'P', 'P', 'v'}, + {"cca", 'P', 'P', 'w'}, + {"ccg", 'P', 'P', 'x'}, + + {"cat", 'H', 'H', 'y'}, + {"cac", 'H', 'H', 'z'}, + {"caa", 'Q', 'Q', 'A'}, + {"cag", 'Q', 'Q', 'B'}, + + {"cgt", 'R', 'R', 'C'}, + {"cgc", 'R', 'R', 'D'}, + {"cga", 'R', 'R', 'E'}, + {"cgg", 'R', 'R', 'F'}, + + + {"att", 'I', 'I', 'G'}, + {"atc", 'I', 'I', 'H'}, + {"ata", 'I', 'M', 'I'}, + {"atg", 'M', 'M', 'J'}, + + {"act", 'T', 'T', 'K'}, + {"acc", 'T', 'T', 'L'}, + {"aca", 'T', 'T', 'M'}, + {"acg", 'T', 'T', 'N'}, + + {"aat", 'N', 'N', 'O'}, + {"aac", 'N', 'N', 'P'}, + {"aaa", 'K', 'K', 'Q'}, + {"aag", 'K', 'K', 'R'}, + + {"agt", 'S', 'S', 'S'}, + {"agc", 'S', 'S', 'T'}, + {"aga", 'R', 0, 'U'}, + {"agg", 'R', 0, 'V'}, + + + {"gtt", 'V', 'V', 'W'}, + {"gtc", 'V', 'V', 'X'}, + {"gta", 'V', 'V', 'Y'}, + {"gtg", 'V', 'V', 'Z'}, + + {"gct", 'A', 'A', '1'}, + {"gcc", 'A', 'A', '2'}, + {"gca", 'A', 'A', '3'}, + {"gcg", 'A', 'A', '4'}, + + {"gat", 'D', 'D', '5'}, + {"gac", 'D', 'D', '6'}, + {"gaa", 'E', 'E', '7'}, + {"gag", 'E', 'E', '8'}, + + {"ggt", 'G', 'G', '9'}, + {"ggc", 'G', 'G', '0'}, + {"gga", 'G', 'G', '@'}, + {"ggg", 'G', 'G', '$'}, }; /* A table that gives values 0 for t 1 for c 2 for a 3 for g * (which is order aa's are in biochemistry codon tables) * and gives -1 for all others. */ int ntVal[256]; int ntValLower[256]; /* NT values only for lower case. */ int ntValUpper[256]; /* NT values only for upper case. */ int ntVal5[256]; int ntValNoN[256]; /* Like ntVal, but with T_BASE_VAL in place of -1 for nonexistent ones. */ DNA valToNt[(N_BASE_VAL|MASKED_BASE_BIT)+1]; @@ -273,30 +274,51 @@ if (!inittedNtVal) initNtVal(); ix = 0; for (i=0; i<3; ++i) { int bv = ntVal[(int)dna[i]]; if (bv<0) return 'X'; ix = (ix<<2) + bv; } c = codonTable[ix].mitoCode; c = toupper(c); return c; } +AA lookupUniqCodon(DNA *dna) +{ +int ix; +int i; +char c; + +if (!inittedNtVal) + initNtVal(); +ix = 0; +for (i=0; i<3; ++i) + { + int bv = ntVal[(int)dna[i]]; + if (bv<0) + return 'X'; + ix = (ix<<2) + bv; + } +c = codonTable[ix].uniqCode; +c = toupper(c); +return c; +} + Codon codonVal(DNA *start) /* Return value from 0-63 of codon starting at start. * Returns -1 if not a codon. */ { int v1,v2,v3; if ((v1 = ntVal[(int)start[0]]) < 0) return -1; if ((v2 = ntVal[(int)start[1]]) < 0) return -1; if ((v3 = ntVal[(int)start[2]]) < 0) return -1; return ((v1<<4) + (v2<<2) + v3); }