src/lib/common.c 1.129

1.129 2009/04/15 17:40:39 kent
Adding numerically away string compares.
Index: src/lib/common.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/common.c,v
retrieving revision 1.128
retrieving revision 1.129
diff -b -B -U 4 -r1.128 -r1.129
--- src/lib/common.c	17 Mar 2009 11:20:15 -0000	1.128
+++ src/lib/common.c	15 Apr 2009 17:40:39 -0000	1.129
@@ -1267,8 +1267,86 @@
    ++count;
 return count;
 }
 
+int countLeadingDigits(char *s)
+/* Return number of leading digits in s */
+{
+int count = 0;
+while (isdigit(*s))
+   {
+   ++count;
+   ++s;
+   }
+return count;
+}
+
+int countLeadingNondigits(char *s)
+/* Count number of leading non-digit characters in s. */
+{
+int count = 0;
+char c;
+while ((c = *s++) != 0)
+   {
+   if (isdigit(c))
+       break;
+   ++count;
+   }
+return count;
+}
+
+int cmpStringsWithEmbeddedNumbers(char *a, char *b)
+/* Compare strings such as gene names that may have embedded numbers,
+ * so that bmp4a comes before bmp14a */
+{
+for (;;)
+   {
+   /* Figure out number of digits at start, and do numerical comparison if there
+    * are any.  If numbers agree step over numerical part, otherwise return difference. */
+   int aNum = countLeadingDigits(a);
+   int bNum = countLeadingDigits(b);
+   if (aNum >= 0 && bNum >= 0)
+       {
+       int diff = atoi(a) - atoi(b);
+       if (diff != 0)
+           return diff;
+       a += aNum;
+       b += bNum;
+       }
+
+   /* Count number of non-digits at start. */
+   int aNonNum = countLeadingNondigits(a);
+   int bNonNum = countLeadingNondigits(b);
+
+   // If different sizes of non-numerical part, then don't match, let strcmp sort out how
+   if (aNonNum != bNonNum)
+       return strcmp(a,b);  
+   // If no characters left then they are the same!
+   else if (aNonNum == 0)
+       return 0;
+   // Non-numerical part is the same length and non-zero.  See if it is identical.  Return if not.
+   else
+       {
+       int diff = memcmp(a,b,aNonNum);   
+       if (diff != 0)
+            return diff;
+       a += aNonNum;
+       b += bNonNum;
+       }
+   }
+}
+
+int cmpWordsWithEmbeddedNumbers(char *a, char *b)
+/* Case insensitive version of cmpStringsWithEmbeddedNumbers. */
+{
+char *A = cloneString(a);
+char *B = cloneString(b);
+int diff = cmpStringsWithEmbeddedNumbers(strUpper(A), strUpper(B));
+freeMem(A);
+freeMem(B);
+return diff;
+}
+
 int countSame(char *a, char *b)
 /* Count number of characters that from start in a,b that are same. */
 {
 char c;