src/lib/psl.c 1.81

1.81 2009/05/31 07:28:33 markd
don't modify input sequences when converting alignments
Index: src/lib/psl.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/psl.c,v
retrieving revision 1.80
retrieving revision 1.81
diff -b -B -U 4 -r1.80 -r1.81
--- src/lib/psl.c	19 May 2008 18:14:36 -0000	1.80
+++ src/lib/psl.c	31 May 2009 07:28:33 -0000	1.81
@@ -1680,91 +1680,46 @@
 lineFileClose(&sf);
 return hash;
 }
 
-static int countInitialChars(char *s, char c)
-/* Count number of initial chars in s that match c. */
+static boolean isDelChar(char c)
+/* is this a indel character? */
 {
-int count = 0;
-char d;
-while ((d = *s++) != 0)
-    {
-    if (c == d)
-        ++count;
-    else
-        break;
-    }
-return count;
+return (c == '-') || (c == '.') || (c == '=') || (c == '_');
 }
 
-static int countTerminalChars(char *s, char c)
-/* Count number of terminal chars in s that match c. */
-{
-int len = strlen(s), i;
-int count = 0;
-for (i=len-1; i>=0; --i)
-    {
-    if (c == s[i])
-        ++count;
-    else
-        break;
-    }
-return count;
-}
-
-static int countNonInsert(char *s, int size)
-/* Count number of characters in initial part of s that
- * are not '-'. */
-{
-int count = 0;
-int i;
-for (i=0; i<size; ++i)
-    if (*s++ != '-')
-        ++count;
-return count;
-}
-
-static void trimAlignment(struct psl* psl, char** qStringPtr, char** tStringPtr,
+static void trimAlignment(struct psl* psl, char** qStrPtr, char** tStrPtr,
                           int* aliSizePtr)
 /* remove leading or trailing indels from alignment */
 {
-char* qString = *qStringPtr;
-char* tString = *tStringPtr;
+char* qStr = *qStrPtr;
+char* tStr = *tStrPtr;
 int aliSize = *aliSizePtr;
-int qStartInsert = countInitialChars(qString, '-');
-int tStartInsert = countInitialChars(tString, '-');
-int qEndInsert = countTerminalChars(qString, '-');
-int tEndInsert = countTerminalChars(tString, '-');
-int startInsert = max(qStartInsert, tStartInsert);
-int endInsert = max(qEndInsert, tEndInsert);
-int qNonCount, tNonCount;
-
-if (startInsert > 0)
-    {
-    qNonCount = countNonInsert(qString, startInsert);
-    tNonCount = countNonInsert(tString, startInsert);
-    qString += startInsert;
-    tString += startInsert;
-    aliSize -= startInsert;
-    psl->qStart += qNonCount;
-    psl->tStart += tNonCount;
-    }
-if (endInsert > 0)
-    {
-    aliSize -= endInsert;
-    qNonCount = countNonInsert(qString+aliSize, endInsert);
-    tNonCount = countNonInsert(tString+aliSize, endInsert);
-    qString[aliSize] = 0;
-    tString[aliSize] = 0;
-    psl->qEnd -= qNonCount;
-    psl->tEnd -= tNonCount;
+
+// skip lending indels
+while ((aliSize > 0) && (isDelChar(*qStr) || isDelChar(*tStr)))
+    {
+    if (!isDelChar(*qStr))
+        psl->qStart++;
+    else if (!isDelChar(*tStr))
+        psl->tStart++;
+    qStr++;
+    tStr++;
+    aliSize--;
+    }
+
+// skip trailing indels
+while ((aliSize > 0) && (isDelChar(qStr[aliSize-1]) || isDelChar(tStr[aliSize-1])))
+    {
+    if (!isDelChar(qStr[aliSize-1]))
+        psl->qEnd--;
+    else if (!isDelChar(tStr[aliSize-1]))
+        psl->tEnd--;
+    aliSize--;
     }
-*qStringPtr = qString;
-*tStringPtr = tString;
+*qStrPtr = qStr;
+*tStrPtr = tStr;
 *aliSizePtr = aliSize;
-/* recursive call to handle double gap */
-if (startInsert > 0 || endInsert > 0)
-    trimAlignment(psl, qStringPtr, tStringPtr, aliSizePtr);
 }
 
 static void addBlock(struct psl* psl, int qs, int qe, int ts, int te,
                      int *blockSpace)
@@ -1782,9 +1737,9 @@
 static void accumCounts(struct psl *psl, char prevQ, char prevT,
                         char q, char t, unsigned options)
 /* accumulate block and base counts  */
 {
-if ((q != '-') && (t != '-'))
+if (!isDelChar(q) && !isDelChar(t))
     {
     /* aligned column. */
     char qu = toupper(q);
     char tu = toupper(t);
@@ -1799,20 +1754,20 @@
         }
     else
         psl->misMatch++;
     }
-else if ((q == '-') && (t != '-'))
+else if (isDelChar(q) && !isDelChar(t))
     {
     /* target insert */
     psl->tBaseInsert++;
-    if (prevQ != '-')
+    if (!isDelChar(prevQ))
         psl->tNumInsert++;
     }
-else if ((t == '-') && (q != '-'))
+else if (isDelChar(t) && !isDelChar(q))
     {
     /* query insert */
     psl->qBaseInsert++;
-    if (prevT != '-')
+    if (!isDelChar(prevT))
         psl->qNumInsert++;
     }
 }
 
@@ -1868,24 +1823,24 @@
 for (i=0; i<aliSize; ++i)
     {
     char q = qString[i];
     char t = tString[i];
-    if ((q == '-') && (t == '-'))
+    if (isDelChar(q) && isDelChar(t))
         {
         continue; /* nothing in this column, just ignore it */
         }
-    else if ((q == '-') || (t == '-'))
+    else if (isDelChar(q) || isDelChar(t))
         {
         /* insert in one of the columns */
 	if (!eitherInsert)
 	    {
             /* end of a block */
             addBlock(psl, qs, qe, ts, te, &blockSpace);
 	    eitherInsert = TRUE;
 	    }
-	if (q != '-')
+	if (!isDelChar(q))
             qe += 1;
-	if (t != '-')
+	if (!isDelChar(t))
             te += 1;
 	}
     else
         {