src/hg/encode/validateFiles/validateFiles.c 1.43

1.43 2010/05/26 13:35:31 braney
move BAM specific code inside ifdef
Index: src/hg/encode/validateFiles/validateFiles.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/validateFiles/validateFiles.c,v
retrieving revision 1.42
retrieving revision 1.43
diff -b -B -U 4 -r1.42 -r1.43
--- src/hg/encode/validateFiles/validateFiles.c	21 May 2010 18:05:14 -0000	1.42
+++ src/hg/encode/validateFiles/validateFiles.c	26 May 2010 13:35:31 -0000	1.43
@@ -592,101 +592,8 @@
     }
 return TRUE;
 }
 
-boolean checkCigarMismatches(char *file, int line, char *chrom, unsigned chromStart, char strand, char *seq, unsigned int *cigarPacked, int nCigar)
-{
-static char cacheChrom[1024];
-static struct dnaSeq *cacheSeq = NULL;
-
-if (!genome)
-    errAbort("Need to specify genome if checking BAM files");
-
-if ((line % mmCheckOneInN) != 0)
-    return TRUE; // dont check if this is not one in N
-
-// read the whole chrom
-if ((cacheChrom == NULL) || !sameString(chrom, cacheChrom))
-    {
-    freeDnaSeq(&cacheSeq);
-    int size =  twoBitSeqSize(genome, chrom);
-    cacheSeq = twoBitReadSeqFragLower(genome, chrom, 0, size);
-    strcpy(cacheChrom, chrom);
-    verbose(2, "read in chrom %s size %d\n",cacheChrom, size);
-    }
-
-
-int curPosGenome = chromStart;
-int i, j;
-int mm = 0;
-char dna[10000], *dnaPtr = dna;
-for (i = 0;   (i < nCigar);  i++)
-    {
-    char op;
-    int size = bamUnpackCigarElement(cigarPacked[i], &op);
-    switch (op)
-	{
-	case 'M': // match or mismatch (gapless aligned block)
-            for (j=0 ; j < size; ++j)
-                {
-                *dnaPtr++ = cacheSeq->dna[curPosGenome];
-                curPosGenome++;
-                }
-	    break;
-	case 'I': // inserted in query
-	case 'S': // skipped query bases at beginning or end ("soft clipping")
-            for(j=0; j < size; j++)
-                *dnaPtr++ = '-';
-	    break;
-	case 'D': // deleted from query
-	case 'N': // long deletion from query (intron as opposed to small del)
-	    curPosGenome += size;
-	    break;
-	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
-	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
-	    break;
-	default:
-	    errAbort("unrecognized CIGAR op %c -- update me", op);
-	}
-    }
-
-int length = strlen(seq);
-if (matchFirst && (matchFirst < length))
-    length = matchFirst;
-int checkLength = length;
-
-int start, incr;
-if (strand == '-')
-    {
-    start = strlen(seq) - 1;
-    incr = -1;
-    }
-else
-    {
-    start = 0;
-    incr = 1;
-    }
-
-for (i = start; (strand == '-') ? i <= 0 : i < strlen(seq); i += incr)
-    {
-    char c = tolower(seq[i]);
-    if ((dna[i] == '-') || (checkMismatch(c,  dna[i])))
-        ++mm;
-
-    if (--length == 0)
-        break;
-    }
-
-
-if (mm > mismatches)
-    {
-    warn("Error [file=%s, line=%d]: too many mismatches (found %d/%d, maximum is %d) (%s: %d  query %s  dna %s )\n",
-         file, line, mm, checkLength, mismatches, chrom, chromStart, seq, dna);
-    return FALSE;
-    }
-
-return TRUE;
-}
 
 
 boolean checkMismatchesSeq(char *file, int line, char *chrom, unsigned chromStart, unsigned chromEnd, char strand, char *seq)
 {
@@ -1216,8 +1123,105 @@
     int *errs;
     int count;
     };
 
+boolean checkCigarMismatches(char *file, int line, char *chrom, unsigned chromStart, char strand, char *seq, unsigned int *cigarPacked, int nCigar)
+{
+static char cacheChrom[1024];
+static struct dnaSeq *cacheSeq = NULL;
+
+if (!genome)
+    errAbort("Need to specify genome if checking BAM files");
+
+if ((line % mmCheckOneInN) != 0)
+    return TRUE; // dont check if this is not one in N
+
+// read the whole chrom
+if ((cacheChrom == NULL) || !sameString(chrom, cacheChrom))
+    {
+    freeDnaSeq(&cacheSeq);
+    int size =  twoBitSeqSize(genome, chrom);
+    cacheSeq = twoBitReadSeqFragLower(genome, chrom, 0, size);
+    strcpy(cacheChrom, chrom);
+    verbose(2, "read in chrom %s size %d\n",cacheChrom, size);
+    }
+
+
+int curPosGenome = chromStart;
+int i, j;
+int mm = 0;
+char dna[10000], *dnaPtr = dna;
+for (i = 0;   (i < nCigar);  i++)
+    {
+    char op;
+    int size = bamUnpackCigarElement(cigarPacked[i], &op);
+    switch (op)
+	{
+	case 'M': // match or mismatch (gapless aligned block)
+            //printf("grabbing %d from %d\n",size,curPosGenome);
+            for (j=0 ; j < size; ++j)
+                {
+                *dnaPtr++ = cacheSeq->dna[curPosGenome];
+                curPosGenome++;
+                }
+	    break;
+	case 'I': // inserted in query
+	case 'S': // skipped query bases at beginning or end ("soft clipping")
+            for(j=0; j < size; j++)
+                *dnaPtr++ = '-';
+	    break;
+	case 'D': // deleted from query
+	case 'N': // long deletion from query (intron as opposed to small del)
+	    curPosGenome += size;
+	    break;
+	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
+	    break;
+	default:
+	    errAbort("unrecognized CIGAR op %c -- update me", op);
+	}
+    }
+
+*dnaPtr = 0;
+
+int length = strlen(seq);
+if (matchFirst && (matchFirst < length))
+    length = matchFirst;
+int checkLength = length;
+
+int start, incr;
+if (strand == '-')
+    {
+    start = strlen(seq) - 1;
+    incr = -1;
+    }
+else
+    {
+    start = 0;
+    incr = 1;
+    }
+
+for (i = start; (strand == '-') ? i <= 0 : i < strlen(seq); i += incr)
+    {
+    char c = tolower(seq[i]);
+    if ((dna[i] == '-') || (checkMismatch(c,  dna[i])))
+        ++mm;
+
+    if (--length == 0)
+        break;
+    }
+
+
+if (mm > mismatches)
+    {
+    warn("Error [file=%s, line=%d]: too many mismatches (found %d/%d, maximum is %d) (%s: %d  query %s  dna %s )\n",
+         file, line, mm, checkLength, mismatches, chrom, chromStart, seq, dna);
+    return FALSE;
+    }
+
+return TRUE;
+}
+
 int parseBamRecord(const bam1_t *bam, void *data)
 /* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam 
  * into a linkedFeatures item, and add it to tg->items. */
 {