4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/lib/mafScore.c src/lib/mafScore.c index 6cc2675..bd04cce 100644 --- src/lib/mafScore.c +++ src/lib/mafScore.c @@ -1,186 +1,186 @@ /* Score mafs and subsets of maf. * This module is from Webb Miller at PSU. * Some description of maf scoring is included in hgLoadMaf.c comments*/ /* Copyright (C) 2011 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "maf.h" typedef struct gap_scores { int E; int O; } gap_scores_t; #define CLEN(s) (sizeof((s))-1) #define NACHARS 128 #define SS(c,d) ss[(uchar)c][(uchar)d] #define GAP(w,x,y,z) gop[(gtype[w]<<6)+(gtype[x]<<4)+(gtype[y]<<2)+gtype[z]] #define DASH '-' typedef int ss_t[NACHARS][NACHARS]; typedef unsigned char uchar; static ss_t ss; static gap_scores_t ds; static int gop[256], gtype[128]; static const uchar nchars[] = "ACGT"; static const int simpleScoringMatrix[4][4] = { { 100, -10000, -10000, -10000}, { -10000, 100, -10000, -10000}, { -10000, -10000, 100, -10000}, { -10000, -10000, -10000, 100}, }; static const int HOXD70_sym[4][4] = { { 91, -114, -31, -123 }, {-114, 100, -125, -31 }, { -31, -125, 100, -114 }, {-123, -31, -114, 91 }, }; /* DNA_scores -------------------------- substitution scoring matrix for DNA */ static void DNA_scores(ss_t ss, const int matrix[4][4]) { int i, j, bad, a, b, A, B; for (i = 0; i < NACHARS; ++i) for (j = 0; j < NACHARS; ++j) ss[i][j] = -100; for (i = 0; i < (signed)CLEN(nchars); ++i) { A = nchars[i]; a = tolower(A); for (j = 0; j < (signed)CLEN(nchars); ++j) { B = nchars[j]; b = tolower(B); ss[A][B] = ss[a][B] = ss[A][b] = ss[a][b] = matrix[i][j]; } } bad = -1000; for (i = 0; i < NACHARS; ++i) ss['X'][i] = ss[i]['X'] = ss['x'][i] = ss[i]['x'] = bad; } static void gap_costs(int *gop, int *gtype, int gap_open) { int i, X, D; for (i = 0; i < 128; ++i) gtype[i] = 0; D = DASH; gtype[D] = 1; for (i = 0; i < 256; ++i) gop[i] = 0; X = (uchar)'A'; GAP(X,X,X,D) = gap_open; GAP(X,X,D,X) = gap_open; GAP(X,D,D,X) = gap_open; GAP(D,X,X,D) = gap_open; GAP(D,D,X,D) = gap_open; GAP(D,D,D,X) = gap_open; } static boolean ssInitialized = FALSE; void mafScoreUseSimple() /* use a simple scoring system useful for finding mismatches */ { int i; DNA_scores(ss, simpleScoringMatrix); ds.E = 30; ds.O = 400; for (i = 0; i < 128; ++i) ss[i][DASH] = ss[DASH][i] = 0; ss[DASH][DASH] = 0; gap_costs(gop, gtype, ds.O); /* quasi-natural gap costs */ ssInitialized = TRUE; } void mafScoreUseTraditional() /* use the tradition HOX scoring system */ { int i; DNA_scores(ss, HOXD70_sym); ds.E = 30; ds.O = 400; for (i = 0; i < 128; ++i) ss[i][DASH] = ss[DASH][i] = -ds.E; ss[DASH][DASH] = 0; gap_costs(gop, gtype, ds.O); /* quasi-natural gap costs */ ssInitialized = TRUE; } double mafScoreRangeMultiz(struct mafAli *maf, int start, int size) /* Return score of a subset of an alignment. Parameters are: * maf - the alignment * start - the (zero based) offset to start calculating score * size - the size of the subset * The following relationship should hold: * scoreRange(maf,start,size) = * scoreRange(maf,0,start+size) - scoreRange(maf,0,start) */ { uchar ai, ar, bi, br; int i; double score; struct mafComp *c1, *c2; if (start < 0 || size <= 0 || start+size > maf->textSize) { errAbort( "mafScoreRange: start = %d, size = %d, textSize = %d\n", start, size, maf->textSize); } if (!ssInitialized) mafScoreUseTraditional(); score = 0.0; for (i = start; i < start+size; ++i) { for (c1 = maf->components; c1 != NULL; c1 = c1->next) { if (c1->size == 0) continue; br = c1->text[i]; for (c2 = c1->next; c2 != NULL; c2 = c2->next) { if (c2->size == 0) continue; bi = c2->text[i]; score += SS(br, bi); if (i > 0) { ar = c1->text[i-1]; ai = c2->text[i-1]; score -= GAP(ar,ai,br,bi); } } } } return score; } double mafScoreMultiz(struct mafAli *maf) /* Return score of a maf (calculated rather than what is * stored in the structure. */ { return mafScoreRangeMultiz(maf, 0, maf->textSize); } double mafScoreMultizMaxCol(int species) /* Return maximum possible score for a column. */ { int i, count = 0; for (i=1; i<species; ++i) count += i; return 100.0*count; } void mafColMinMaxScore(struct mafAli *maf, double *retMin, double *retMax) /* Get min/max maf scores for a column. */ { *retMax = mafScoreMultizMaxCol(slCount(maf->components)); *retMin = -*retMax; }