2b2874d85add602f6753ae525337a1155d2298ea
braney
  Mon May 25 06:35:18 2020 -0700
allow for fine control of display of non-synonomous changes in mafSnp
display

diff --git src/hg/utils/paSNP/paSNP.c src/hg/utils/paSNP/paSNP.c
index 8ad34c7..2b4478f 100644
--- src/hg/utils/paSNP/paSNP.c
+++ src/hg/utils/paSNP/paSNP.c
@@ -1,105 +1,115 @@
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "phyloTree.h"
 #include "axt.h"
 #include "math.h"
 #include "pa.h"
+#include "axt.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "paSNP - given species list and protein alignments, generate bed file with SNPs\n"
   "usage:\n"
   "   paSNP order.lst alignments.fa out.spec\n"
   "arguments:\n"
   "   order.lst        order of species in fasta\n"
   "   alignments.fa    fasta protein alignments\n"
   "   out.spec         file to put spectra in\n"
   "options:\n"
   "   -binCol          consider only binary columns\n"
   "   -fullCol         consider only full (no dash,X,or Z) columns\n"
+  "   -outScore        output BLOSUM62 score as third field\n"
   );
 }
 
 static struct optionSpec options[] = {
    {"binCol", OPTION_BOOLEAN},
    {"fullCol", OPTION_BOOLEAN},
+   {"outScore", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 boolean binCol = FALSE;
 boolean fullCol = FALSE;
+boolean outScore = FALSE;
 
 
 char **speciesNames;
 int totalCountS[1000];
 int aaCountS[1000][26];
 int aaCount[26];
 int dashCountS[1000];
 int numSpecies;
+struct axtScoreScheme *ss;
 
 void countAA( struct alignDetail *detail, int cNum, void *closure)
 {
 int ii;
 FILE *f = (FILE *) closure;
 
 char firstChar = 0;
 char *position = NULL;
 for(ii=0; ii < detail->numSpecies; ii++)
     {
     struct seqBuffer *sb = &detail->seqBuffers[ii];
 
     if (ii == 0)
 	{
 	position = sb->position;
 	firstChar = sb->buffer[cNum];
 	}
     else
 	if ((firstChar != sb->buffer[cNum]) && (sb->buffer[cNum] != '-'))
 	    {
 	    char strand = position[strlen(position) - 1];
-	    fprintf(f, "%s %s 1\n", 
+            int score = ss->matrix[(int)toupper(firstChar)][(int)sb->buffer[cNum]];
+	    fprintf(f, "%s %s %d\n", 
 		getPosString(position, strand, cNum, detail->startFrame, detail->endFrame),
-		 sb->species);
+		 sb->species, outScore ? score : 1);
 		 }
 	
 	
     }
 }
 
 
 void paSnp(char *orderFilename, char *fastaFile, char *outFile)
 {
 FILE *f = mustOpen(outFile, "w");
 
 alignFunc afunc = allColumns;
 columnFunc cfunc = countAA;
 
 if (binCol)
     afunc = binColumns;
 else if (fullCol)
     afunc = fullColumns;
 
 parseAli(orderFilename, fastaFile, afunc, cfunc, f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 
 if (argc != 4)
     usage();
 
 binCol = optionExists("binCol");
 fullCol = optionExists("fullCol");
 
 if (binCol && fullCol)
     errAbort("cannot set both binCol and fullCol");
 
+outScore = optionExists("outScore");
+
+ss = axtScoreSchemeFromProteinText(blosumText, "fake");
+
 paSnp(argv[1],argv[2],argv[3]);
 return 0;
 }