57d12692815c7c7eca4915efd3be0b6c21358702 braney Thu Sep 25 18:17:32 2014 -0700 add a mode to mafGene that let's it put out a unique character for eachcodon. diff --git src/hg/lib/mafGene.c src/hg/lib/mafGene.c index 047d49c..c871b0c 100644 --- src/hg/lib/mafGene.c +++ src/hg/lib/mafGene.c @@ -59,47 +59,50 @@ /* is the sequence all dashes ? */ static boolean allDashes(char *seq) { while (*seq) if (*seq++ != '-') return FALSE; return TRUE; } /* translate a nuc sequence into amino acids. If there * are any dashes in any of the three nuc positions * make the AA a dash. */ static aaSeq *doTranslate(struct dnaSeq *inSeq, unsigned offset, - unsigned inSize, boolean stop) + unsigned inSize, boolean stop, boolean doUniq) { aaSeq *seq; DNA *dna = inSeq->dna; AA *pep, aa; int i, lastCodon; int actualSize = 0; assert(offset <= inSeq->size); if ((inSize == 0) || (inSize > (inSeq->size - offset))) inSize = inSeq->size - offset; lastCodon = offset + inSize - 3; AllocVar(seq); seq->dna = pep = needLargeMem(inSize/3+1); for (i=offset; i <= lastCodon; i += 3) { + if (doUniq) + aa = lookupUniqCodon(dna+i); + else aa = lookupCodon(dna+i); if (aa == 'X') { if ((dna[i] == '-') || (dna[i+1] == '-') || (dna[i+2] == '-')) aa = '-'; } if (aa == 0) { if (stop) break; else aa = 'Z'; } @@ -234,31 +237,31 @@ verbose(2, "size %d\n", size); si->size = size; si->nucSequence = needMem(size + 1); memset(si->nucSequence, '-', size); si->aaSequence = needMem(size/3 + 1); hashAdd(siHash, si->name, si); slAddHead(&siList, si); } slReverse(&siList); return siList; } static void outSpeciesExons(FILE *f, char *dbName, struct speciesInfo *si, - struct exonInfo *giList, boolean doBlank, boolean doTable, int numCols) + struct exonInfo *giList, boolean doBlank, boolean doTable, boolean doUniq, int numCols) { int exonNum = 1; struct dnaSeq thisSeq; aaSeq *outSeq; int exonCount = 0; struct exonInfo *gi = giList; for(; gi; gi = gi->next) { if (gi->exonSize > 1) exonCount++; } for(gi = giList; gi; gi = gi->next, exonNum++) { @@ -292,31 +295,31 @@ &siTemp->nucSequence[gi->exonStart+1], gi->exonSize - 1); ptr += gi->exonSize - 1; break; } int lastFrame = (gi->frame + gi->exonSize) % 3; if (lastFrame == 1) /* delete the last nucleotide */ --ptr; else if (lastFrame == 2) /* add one more nucleotide from * the next exon */ *ptr++ = siTemp->nucSequence[gi->exonStart + gi->exonSize]; *ptr++ = 0; /* null terminate */ thisSeq.dna = exonBuffer; thisSeq.size = ptr - exonBuffer; - outSeq = doTranslate(&thisSeq, 0, 0, FALSE); + outSeq = doTranslate(&thisSeq, 0, 0, FALSE, doUniq); char buffer[10 * 1024]; safef(buffer, sizeof buffer, "%s_%s_%d_%d %d %d %d %s", gi->name, siTemp->name, exonNum, exonCount, outSeq->size, gi->frame, lastFrame, siTemp->curPosString->name); if (doBlank || !allDashes(outSeq->dna)) { if (doTable) { if (numCols == -1) fprintf(f, "%s ", buffer); @@ -404,31 +407,31 @@ fprintf(f, "\n"); } fprintf(f, "\n"); } fprintf(f, "\n"); } /* translate nuc sequence into an sequence of amino acids */ static void translateProtein(struct speciesInfo *si) { struct dnaSeq thisSeq; aaSeq *outSeq; thisSeq.dna = si->nucSequence; thisSeq.size = si->size; -outSeq = doTranslate(&thisSeq, 0, 0, FALSE); +outSeq = doTranslate(&thisSeq, 0, 0, FALSE, FALSE); si->aaSequence = outSeq->dna; si->aaSize = outSeq->size; } static char *allPos(struct speciesInfo *si) { char *ptr = bigBuffer; struct slName *names = si->posStrings; int size = sizeof bigBuffer; for(; names ; names = names->next) { int sz = safef(ptr, size, "%s", names->name); ptr += sz; size -= sz; @@ -439,38 +442,39 @@ size--; } } return bigBuffer; } /* output a particular species sequence to the file stream */ static void writeOutSpecies(FILE *f, char *dbName, struct speciesInfo *si, struct exonInfo *giList, unsigned options, int numCols) { boolean inExons = options & MAFGENE_EXONS; boolean noTrans = options & MAFGENE_NOTRANS; boolean doBlank = options & MAFGENE_OUTBLANK; boolean doTable = options & MAFGENE_OUTTABLE; +boolean doUniq = options & MAFGENE_UNIQUEAA; if (inExons) { if (noTrans) outSpeciesExonsNoTrans(f, dbName, si, giList, doBlank, doTable, numCols); else - outSpeciesExons(f, dbName, si, giList, doBlank, doTable, numCols); + outSpeciesExons(f, dbName, si, giList, doBlank, doTable, doUniq, numCols); return; } struct exonInfo *lastGi; for(lastGi = giList; lastGi->next ; lastGi = lastGi->next) ; if (noTrans) { for(; si ; si = si->next) { if (doBlank || !allDashes(si->nucSequence)) { char buffer[10 * 1024];