2b2874d85add602f6753ae525337a1155d2298ea braney Mon May 25 06:35:18 2020 -0700 allow for fine control of display of non-synonomous changes in mafSnp display diff --git src/hg/utils/paSNP/paSNP.c src/hg/utils/paSNP/paSNP.c index 8ad34c7..2b4478f 100644 --- src/hg/utils/paSNP/paSNP.c +++ src/hg/utils/paSNP/paSNP.c @@ -1,105 +1,115 @@ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "phyloTree.h" #include "axt.h" #include "math.h" #include "pa.h" +#include "axt.h" void usage() /* Explain usage and exit. */ { errAbort( "paSNP - given species list and protein alignments, generate bed file with SNPs\n" "usage:\n" " paSNP order.lst alignments.fa out.spec\n" "arguments:\n" " order.lst order of species in fasta\n" " alignments.fa fasta protein alignments\n" " out.spec file to put spectra in\n" "options:\n" " -binCol consider only binary columns\n" " -fullCol consider only full (no dash,X,or Z) columns\n" + " -outScore output BLOSUM62 score as third field\n" ); } static struct optionSpec options[] = { {"binCol", OPTION_BOOLEAN}, {"fullCol", OPTION_BOOLEAN}, + {"outScore", OPTION_BOOLEAN}, {NULL, 0}, }; boolean binCol = FALSE; boolean fullCol = FALSE; +boolean outScore = FALSE; char **speciesNames; int totalCountS[1000]; int aaCountS[1000][26]; int aaCount[26]; int dashCountS[1000]; int numSpecies; +struct axtScoreScheme *ss; void countAA( struct alignDetail *detail, int cNum, void *closure) { int ii; FILE *f = (FILE *) closure; char firstChar = 0; char *position = NULL; for(ii=0; ii < detail->numSpecies; ii++) { struct seqBuffer *sb = &detail->seqBuffers[ii]; if (ii == 0) { position = sb->position; firstChar = sb->buffer[cNum]; } else if ((firstChar != sb->buffer[cNum]) && (sb->buffer[cNum] != '-')) { char strand = position[strlen(position) - 1]; - fprintf(f, "%s %s 1\n", + int score = ss->matrix[(int)toupper(firstChar)][(int)sb->buffer[cNum]]; + fprintf(f, "%s %s %d\n", getPosString(position, strand, cNum, detail->startFrame, detail->endFrame), - sb->species); + sb->species, outScore ? score : 1); } } } void paSnp(char *orderFilename, char *fastaFile, char *outFile) { FILE *f = mustOpen(outFile, "w"); alignFunc afunc = allColumns; columnFunc cfunc = countAA; if (binCol) afunc = binColumns; else if (fullCol) afunc = fullColumns; parseAli(orderFilename, fastaFile, afunc, cfunc, f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); binCol = optionExists("binCol"); fullCol = optionExists("fullCol"); if (binCol && fullCol) errAbort("cannot set both binCol and fullCol"); +outScore = optionExists("outScore"); + +ss = axtScoreSchemeFromProteinText(blosumText, "fake"); + paSnp(argv[1],argv[2],argv[3]); return 0; }