57d12692815c7c7eca4915efd3be0b6c21358702
braney
  Thu Sep 25 18:17:32 2014 -0700
add a mode to mafGene that let's it put out a unique character for eachcodon.

diff --git src/hg/lib/mafGene.c src/hg/lib/mafGene.c
index 047d49c..c871b0c 100644
--- src/hg/lib/mafGene.c
+++ src/hg/lib/mafGene.c
@@ -59,47 +59,50 @@
 /* is the sequence all dashes ? */
 static boolean allDashes(char *seq)
 {
 while (*seq)
     if (*seq++ != '-')
 	return FALSE;
 
 return TRUE;
 }
 
 /* translate a nuc sequence into amino acids. If there
  * are any dashes in any of the three nuc positions
  * make the AA a dash.
  */
 static aaSeq *doTranslate(struct dnaSeq *inSeq, unsigned offset, 
-    unsigned inSize, boolean stop)
+    unsigned inSize, boolean stop, boolean doUniq)
 {
 aaSeq *seq;
 DNA *dna = inSeq->dna;
 AA *pep, aa;
 int i, lastCodon;
 int actualSize = 0;
 
 assert(offset <= inSeq->size);
 if ((inSize == 0) || (inSize > (inSeq->size - offset)))
     inSize = inSeq->size - offset;
 lastCodon = offset + inSize - 3;
 
 AllocVar(seq);
 seq->dna = pep = needLargeMem(inSize/3+1);
 for (i=offset; i <= lastCodon; i += 3)
     {
+    if (doUniq)
+	aa = lookupUniqCodon(dna+i);
+    else
 	aa = lookupCodon(dna+i);
     if (aa == 'X')
 	{
 	if ((dna[i] == '-') ||
 	    (dna[i+1] == '-') ||
 	    (dna[i+2] == '-'))
 	    aa = '-';
 	}
     if (aa == 0)
 	{
         if (stop)
 	    break;
 	else
 	    aa = 'Z';
 	}
@@ -234,31 +237,31 @@
     verbose(2, "size %d\n", size);
     si->size = size;
     si->nucSequence = needMem(size + 1);
     memset(si->nucSequence, '-', size);
     si->aaSequence = needMem(size/3 + 1);
     hashAdd(siHash, si->name, si);
     slAddHead(&siList, si);
     }
 slReverse(&siList);
 
 return siList;
 }
 
 
 static void outSpeciesExons(FILE *f, char *dbName, struct speciesInfo *si, 
-    struct exonInfo *giList, boolean doBlank, boolean doTable, int numCols)
+    struct exonInfo *giList, boolean doBlank, boolean doTable, boolean doUniq, int numCols)
 {
 int exonNum = 1;
 struct dnaSeq thisSeq;
 aaSeq *outSeq;
 int exonCount = 0;
 struct exonInfo *gi = giList;
 
 for(; gi; gi = gi->next)
     {
     if (gi->exonSize > 1)
 	exonCount++;
     }
 
 for(gi = giList; gi; gi = gi->next, exonNum++)
     {
@@ -292,31 +295,31 @@
 		    &siTemp->nucSequence[gi->exonStart+1], gi->exonSize - 1);
 		ptr += gi->exonSize - 1;
 		break;
 	    }
 
 	int lastFrame = (gi->frame + gi->exonSize) % 3;
 	if (lastFrame == 1) /* delete the last nucleotide */
 	    --ptr;
 	else if (lastFrame == 2) /* add one more nucleotide from
 	                          * the next exon */
 	    *ptr++ = siTemp->nucSequence[gi->exonStart + gi->exonSize];
 	*ptr++ = 0;   /* null terminate */
 
 	thisSeq.dna = exonBuffer;
 	thisSeq.size = ptr - exonBuffer;
-	outSeq =  doTranslate(&thisSeq, 0,  0, FALSE);
+	outSeq =  doTranslate(&thisSeq, 0,  0, FALSE, doUniq);
 	char buffer[10 * 1024];
 
 	safef(buffer, sizeof buffer,  "%s_%s_%d_%d %d %d %d %s",
 	    gi->name, 
 	    siTemp->name, exonNum, exonCount, 
 	    outSeq->size,
 	    gi->frame, lastFrame,
 	    siTemp->curPosString->name);
 
 	if (doBlank || !allDashes(outSeq->dna))
 	    {
 	    if (doTable)
 		{
 		if (numCols == -1)
 		    fprintf(f, "%s ", buffer);
@@ -404,31 +407,31 @@
 	fprintf(f, "\n");
 	}
     fprintf(f, "\n");
     }
 fprintf(f, "\n");
 }
 
 /* translate nuc sequence into an sequence of amino acids */
 static void translateProtein(struct speciesInfo *si)
 {
 struct dnaSeq thisSeq;
 aaSeq *outSeq;
 
 thisSeq.dna = si->nucSequence;
 thisSeq.size = si->size;
-outSeq =  doTranslate(&thisSeq, 0,  0, FALSE);
+outSeq =  doTranslate(&thisSeq, 0,  0, FALSE, FALSE);
 si->aaSequence  = outSeq->dna;
 si->aaSize = outSeq->size;
 }
 
 static char *allPos(struct speciesInfo *si)
 {
 char *ptr = bigBuffer;
 struct slName *names = si->posStrings;
 int size = sizeof bigBuffer;
 
 for(; names ; names = names->next)
     {
     int sz = safef(ptr, size, "%s", names->name);
     ptr += sz;
     size -= sz;
@@ -439,38 +442,39 @@
 	size--;
 	}
     }
 
 return bigBuffer;
 }
 
 /* output a particular species sequence to the file stream */
 static void writeOutSpecies(FILE *f, char *dbName, struct speciesInfo *si, 
     struct exonInfo *giList, unsigned options, int numCols)
 {
 boolean inExons = options & MAFGENE_EXONS;
 boolean noTrans = options & MAFGENE_NOTRANS;
 boolean doBlank = options & MAFGENE_OUTBLANK;
 boolean doTable = options & MAFGENE_OUTTABLE;
+boolean doUniq = options & MAFGENE_UNIQUEAA;
 
 if (inExons)
     {
     if (noTrans)
 	outSpeciesExonsNoTrans(f, dbName, si, giList, doBlank, 
 	    doTable, numCols);
     else
-	outSpeciesExons(f, dbName, si, giList, doBlank, doTable, numCols);
+	outSpeciesExons(f, dbName, si, giList, doBlank, doTable, doUniq, numCols);
     return;
     }
 
 struct exonInfo *lastGi;
 
 for(lastGi = giList; lastGi->next ; lastGi = lastGi->next)
     ;
 
 if (noTrans)
     {
     for(; si ; si = si->next)
 	{
 	if (doBlank || !allDashes(si->nucSequence))
 	    {
 	    char buffer[10 * 1024];