src/lib/psl.c 1.81
1.81 2009/05/31 07:28:33 markd
don't modify input sequences when converting alignments
Index: src/lib/psl.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/psl.c,v
retrieving revision 1.80
retrieving revision 1.81
diff -b -B -U 4 -r1.80 -r1.81
--- src/lib/psl.c 19 May 2008 18:14:36 -0000 1.80
+++ src/lib/psl.c 31 May 2009 07:28:33 -0000 1.81
@@ -1680,91 +1680,46 @@
lineFileClose(&sf);
return hash;
}
-static int countInitialChars(char *s, char c)
-/* Count number of initial chars in s that match c. */
+static boolean isDelChar(char c)
+/* is this a indel character? */
{
-int count = 0;
-char d;
-while ((d = *s++) != 0)
- {
- if (c == d)
- ++count;
- else
- break;
- }
-return count;
+return (c == '-') || (c == '.') || (c == '=') || (c == '_');
}
-static int countTerminalChars(char *s, char c)
-/* Count number of terminal chars in s that match c. */
-{
-int len = strlen(s), i;
-int count = 0;
-for (i=len-1; i>=0; --i)
- {
- if (c == s[i])
- ++count;
- else
- break;
- }
-return count;
-}
-
-static int countNonInsert(char *s, int size)
-/* Count number of characters in initial part of s that
- * are not '-'. */
-{
-int count = 0;
-int i;
-for (i=0; i<size; ++i)
- if (*s++ != '-')
- ++count;
-return count;
-}
-
-static void trimAlignment(struct psl* psl, char** qStringPtr, char** tStringPtr,
+static void trimAlignment(struct psl* psl, char** qStrPtr, char** tStrPtr,
int* aliSizePtr)
/* remove leading or trailing indels from alignment */
{
-char* qString = *qStringPtr;
-char* tString = *tStringPtr;
+char* qStr = *qStrPtr;
+char* tStr = *tStrPtr;
int aliSize = *aliSizePtr;
-int qStartInsert = countInitialChars(qString, '-');
-int tStartInsert = countInitialChars(tString, '-');
-int qEndInsert = countTerminalChars(qString, '-');
-int tEndInsert = countTerminalChars(tString, '-');
-int startInsert = max(qStartInsert, tStartInsert);
-int endInsert = max(qEndInsert, tEndInsert);
-int qNonCount, tNonCount;
-
-if (startInsert > 0)
- {
- qNonCount = countNonInsert(qString, startInsert);
- tNonCount = countNonInsert(tString, startInsert);
- qString += startInsert;
- tString += startInsert;
- aliSize -= startInsert;
- psl->qStart += qNonCount;
- psl->tStart += tNonCount;
- }
-if (endInsert > 0)
- {
- aliSize -= endInsert;
- qNonCount = countNonInsert(qString+aliSize, endInsert);
- tNonCount = countNonInsert(tString+aliSize, endInsert);
- qString[aliSize] = 0;
- tString[aliSize] = 0;
- psl->qEnd -= qNonCount;
- psl->tEnd -= tNonCount;
+
+// skip lending indels
+while ((aliSize > 0) && (isDelChar(*qStr) || isDelChar(*tStr)))
+ {
+ if (!isDelChar(*qStr))
+ psl->qStart++;
+ else if (!isDelChar(*tStr))
+ psl->tStart++;
+ qStr++;
+ tStr++;
+ aliSize--;
+ }
+
+// skip trailing indels
+while ((aliSize > 0) && (isDelChar(qStr[aliSize-1]) || isDelChar(tStr[aliSize-1])))
+ {
+ if (!isDelChar(qStr[aliSize-1]))
+ psl->qEnd--;
+ else if (!isDelChar(tStr[aliSize-1]))
+ psl->tEnd--;
+ aliSize--;
}
-*qStringPtr = qString;
-*tStringPtr = tString;
+*qStrPtr = qStr;
+*tStrPtr = tStr;
*aliSizePtr = aliSize;
-/* recursive call to handle double gap */
-if (startInsert > 0 || endInsert > 0)
- trimAlignment(psl, qStringPtr, tStringPtr, aliSizePtr);
}
static void addBlock(struct psl* psl, int qs, int qe, int ts, int te,
int *blockSpace)
@@ -1782,9 +1737,9 @@
static void accumCounts(struct psl *psl, char prevQ, char prevT,
char q, char t, unsigned options)
/* accumulate block and base counts */
{
-if ((q != '-') && (t != '-'))
+if (!isDelChar(q) && !isDelChar(t))
{
/* aligned column. */
char qu = toupper(q);
char tu = toupper(t);
@@ -1799,20 +1754,20 @@
}
else
psl->misMatch++;
}
-else if ((q == '-') && (t != '-'))
+else if (isDelChar(q) && !isDelChar(t))
{
/* target insert */
psl->tBaseInsert++;
- if (prevQ != '-')
+ if (!isDelChar(prevQ))
psl->tNumInsert++;
}
-else if ((t == '-') && (q != '-'))
+else if (isDelChar(t) && !isDelChar(q))
{
/* query insert */
psl->qBaseInsert++;
- if (prevT != '-')
+ if (!isDelChar(prevT))
psl->qNumInsert++;
}
}
@@ -1868,24 +1823,24 @@
for (i=0; i<aliSize; ++i)
{
char q = qString[i];
char t = tString[i];
- if ((q == '-') && (t == '-'))
+ if (isDelChar(q) && isDelChar(t))
{
continue; /* nothing in this column, just ignore it */
}
- else if ((q == '-') || (t == '-'))
+ else if (isDelChar(q) || isDelChar(t))
{
/* insert in one of the columns */
if (!eitherInsert)
{
/* end of a block */
addBlock(psl, qs, qe, ts, te, &blockSpace);
eitherInsert = TRUE;
}
- if (q != '-')
+ if (!isDelChar(q))
qe += 1;
- if (t != '-')
+ if (!isDelChar(t))
te += 1;
}
else
{