src/hg/encode/validateFiles/validateFiles.c 1.24
1.24 2009/08/10 18:57:18 braney
more mismatch tweaks
Index: src/hg/encode/validateFiles/validateFiles.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/validateFiles/validateFiles.c,v
retrieving revision 1.23
retrieving revision 1.24
diff -b -B -U 4 -r1.23 -r1.24
--- src/hg/encode/validateFiles/validateFiles.c 10 Aug 2009 17:22:49 -0000 1.23
+++ src/hg/encode/validateFiles/validateFiles.c 10 Aug 2009 18:57:18 -0000 1.24
@@ -110,8 +110,19 @@
{"version", OPTION_BOOLEAN},
{NULL, 0},
};
+boolean checkMismatch(int ch1, int ch2)
+// checkMismatch -- if the sequence has an N, we call this a mismatch
+// by default unless nMatch is set, in which case we don't call
+// it a mismatch
+{
+if (ch1 != 'n')
+ return ch1 != ch2;
+
+return !nMatch;
+}
+
void initArrays()
// Set up array of chars
// dnaChars: DNA chars ACGTNacgtn, and optionally include colorspace 0-3
// qualChars: fastq quality scores as ascii [!-~] (ord(!)=33, ord(~)=126)
@@ -599,9 +610,9 @@
}
for (i=0 ; i < g->size ; ++i)
{
char c = tolower(seq[i]);
- if (!((nMatch && c == 'n') || c == g->dna[i]))
+ if (checkMismatch(c, g->dna[i]))
++mm;
}
if (mm > mismatches)
{
@@ -645,16 +656,16 @@
mm1 = 0;
for (i=0 ; i < g1->size ; ++i)
{
char c = tolower(seq1[i]);
- if (!((nMatch && c == 'n') || c == g1->dna[i]))
+ if (checkMismatch(c, g1->dna[i]))
++mm1;
}
mm2 = 0;
for (i=0 ; i < g2->size ; ++i)
{
char c = tolower(seq2[i]);
- if (!((nMatch && c == 'n') || c == g2->dna[i]))
+ if (checkMismatch(c, g2->dna[i]))
++mm2;
}
if (mmPerPair)
{