2b59e335f95443402d6f63b4fd8c8494506c64fd hiram Fri Dec 13 14:50:46 2013 -0800 allow parsing of settings file from lastz_D tuning operations refs #12260 diff --git src/lib/axt.c src/lib/axt.c index 3272d63..3dca3a4 100644 --- src/lib/axt.c +++ src/lib/axt.c @@ -685,79 +685,147 @@ readInGulp(fileName, &string, NULL); ss = axtScoreSchemeFromProteinText(string, fileName); freeMem(string); return ss; } struct axtScoreScheme *axtScoreSchemeReadLf(struct lineFile *lf ) /* Read in scoring scheme from file. Looks like A C G T 91 -114 -31 -123 -114 100 -125 -31 -31 -125 100 -114 -123 -31 -114 91 O = 400, E = 30 + +2013-12-13 - upgrading to allow reading of newer format settings file +from lastz tuning output. This file has the matrix at the end of +the file, and other settings before that. Will include the other settings +in the 'ss->extra' field *WITHOUT* the O= and E= which should always +be O=400 and E=30 despite what the tuning settings file says. +Example settings file: +############################################################################# +# (a LASTZ scoring set, created by "LASTZ --infer") + +bad_score = X:-1736 # used for sub[X][*] and sub[*][X] +fill_score = -174 # used when sub[*][*] not otherwise defined + +# (score parameters added by expand_scores_file) + +T=2 +O=565 +E=43 +X=790 +Y=4865 +K=3000 +L=3000 + + A C G T +A 79 -84 -55 -128 +C -84 100 -174 -55 +G -55 -174 100 -84 +T -128 -55 -84 79 +############################################################################# + */ { -char *line, *row[4], *parts[32]; +char *line, *row[6], *parts[32]; int i,j, partCount; struct axtScoreScheme *ss; boolean gotO = FALSE, gotE = FALSE; static int trans[4] = {'a', 'c', 'g', 't'}; AllocVar(ss); ss->extra = NULL; -if (!lineFileRow(lf, row)) - shortScoreScheme(lf); -if (row[0][0] != 'A' || row[1][0] != 'C' || row[2][0] != 'G' +struct dyString *dyExtra = newDyString(128); + +int wordCount = lineFileChopNext(lf, row, ArraySize(row)); +if (!wordCount) + shortScoreScheme(lf); // empty file +boolean done = FALSE; +while (! done) + { + // a setting will have an '=' in either first or second word + if (stringIn("=",row[0]) || (wordCount > 1 && stringIn("=", row[1]))) + { + // collapse words to eliminate white space confusion + struct dyString *dy = newDyString(128); + for (i = 0; i < wordCount; ++i) + { + dyStringPrintf(dy, "%s", row[i]); + } + char *line=dyStringCannibalize(&dy); + // eliminate trailing comments + chopSuffixAt(line, '#'); + // only tag=value is left, extract those two: + chopString(line, "=", parts, ArraySize(parts)); + if (!(sameString(parts[0],"O") || sameString(parts[0],"E"))) + dyStringPrintf(dyExtra, "%s=%s,", parts[0], parts[1]); + freeMem(line); + wordCount = lineFileChopNext(lf, row, ArraySize(row)); + } + // not a setting, expecting a matrix definition + else if (row[0][0] != 'A' || row[1][0] != 'C' || row[2][0] != 'G' || row[3][0] != 'T') errAbort("%s doesn't seem to be a score matrix file", lf->fileName); + else // have reached the matrix definition at end of the file + { for (i=0; i<4; ++i) { - if (!lineFileRow(lf, row)) - shortScoreScheme(lf); - for (j=0; j<4; ++j) - ss->matrix[trans[i]][trans[j]] = lineFileNeedNum(lf, row, j); + wordCount = lineFileChopNext(lf, row, ArraySize(row)); + if (!wordCount) + shortScoreScheme(lf); // did not find four lines + int startColumn = 0; + if (5 == wordCount) // skip first column when there are 5 + startColumn = 1; + for (j=startColumn; j<(startColumn+4); ++j) + ss->matrix[trans[i]][trans[j-startColumn]] = lineFileNeedNum(lf, row, j); } if (lineFileNext(lf, &line, NULL)) { - ss->extra = cloneString(line); + dyStringPrintf(dyExtra, "%s,", line); partCount = chopString(line, " =,\t", parts, ArraySize(parts)); for (i=0; i<partCount-1; i += 2) { if (sameString(parts[i], "O")) { gotO = TRUE; ss->gapOpen = atoi(parts[i+1]); } if (sameString(parts[i], "E")) { gotE = TRUE; ss->gapExtend = atoi(parts[i+1]); } } if (!gotO || !gotE) errAbort("Expecting O = and E = in last line of %s", lf->fileName); if (ss->gapOpen <= 0 || ss->gapExtend <= 0) errAbort("Must have positive gap scores"); } else { ss->gapOpen = 400; ss->gapExtend = 30; } + done = TRUE; + } + } +ss->extra = dyStringCannibalize(&dyExtra); +if (',' == lastChar(ss->extra)) + trimLastChar(ss->extra); propagateCase(ss); return ss; } struct axtScoreScheme *axtScoreSchemeRead(char *fileName) /* Read in scoring scheme from file. Looks like A C G T 91 -114 -31 -123 -114 100 -125 -31 -31 -125 100 -114 -123 -31 -114 91 O = 400, E = 30 */ { struct lineFile *lf = lineFileOpen(fileName, TRUE);