src/hg/encode/validateFiles/validateFiles.c 1.33

1.33 2009/12/15 23:50:33 tdreszer
Never do in 1 checkin what can be spread out into 2
Index: src/hg/encode/validateFiles/validateFiles.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/validateFiles/validateFiles.c,v
retrieving revision 1.32
retrieving revision 1.33
diff -b -B -U 4 -r1.32 -r1.33
--- src/hg/encode/validateFiles/validateFiles.c	15 Dec 2009 22:59:06 -0000	1.32
+++ src/hg/encode/validateFiles/validateFiles.c	15 Dec 2009 23:50:33 -0000	1.33
@@ -578,15 +578,26 @@
 static struct dnaSeq *cacheSeq = NULL;
 static char cacheChrom[1024];
 static char bigArr[100 * 1024]; // 100K limit on tagAlign seqLen
 struct dnaSeq ourSeq;
+boolean chrMSizeAjustment=FALSE;
 
 if (!genome)
     return TRUE; // only check if 2bit file specified
 if (line % mmCheckOneInN != 0)
     return TRUE; // dont check if this is not one in N
 if (!isSort)
     {
+    //unsigned end = chromEnd;
+    if(sameString(chrom,"chrM"))
+        {
+        unsigned size =  twoBitSeqSize(genome, chrom);
+        if( chromEnd>size)
+            {
+            chrMSizeAjustment=TRUE;
+            chromEnd=size;
+            }
+        }
     g = twoBitReadSeqFragLower(genome, chrom, chromStart, chromEnd);
     }
 else
     {
@@ -611,13 +622,13 @@
 
 if (strand == '-')
     reverseComplement(g->dna, g->size);
 
-if (g->size != strlen(seq) || g->size != chromEnd-chromStart)
+if ((g->size != strlen(seq) || g->size != chromEnd-chromStart) && !chrMSizeAjustment)
     {
-    warn("Error [file=%s, line=%d]: sequence (%s) length (%d) does not match genomic coords (%d / %d - %s %d %d)",
+    warn("Error [file=%s, line=%d]: sequence (%s) length (%d) does not match genomic coords (%d / %d - %s %d %d %c)",
          file, line, seq, (int)strlen(seq), chromEnd-chromStart, g->size,
-	 chrom, chromStart, chromEnd);
+        chrom, chromStart, chromEnd, strand);
     return FALSE;
     }
 
 int length = g->size;