src/hg/encode/encodeValidate/doEncodeValidate.pl 1.150

1.150 2009/02/13 18:52:09 mikep
fixes to first 21 ccolumns of psl format (commas at end of last 3)
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.149
retrieving revision 1.150
diff -b -B -U 4 -r1.149 -r1.150
--- src/hg/encode/encodeValidate/doEncodeValidate.pl	13 Feb 2009 18:28:08 -0000	1.149
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl	13 Feb 2009 18:52:09 -0000	1.150
@@ -939,15 +939,20 @@
 }
 
 sub validatePsl
 # PSL format (for download) from Wold lab. 
+# EXAMPLE FROM http://genome.ucsc.edu/FAQ/FAQformat#format2
+# This adds 2 columns (sequence,<tab>sequence,) to the standard 21 columns
+# Only the first 21 are validated
+#
 # Sample first 6 lines
 #psLayout version 3
 #
 #match   mis-    rep.    N's     Q gap   Q gap   T gap   T gap   strand  Q               Q       Q       Q       T               T       T       T       block   blockSizes      qStarts  tStarts
 #        match   match           count   bases   count   bases           name            size    start   end     name            size    start   end     count
 #---------------------------------------------------------------------------------------------------------------------------------------------------------------
 #71      3       0       0       0       0       0       0       -       HWI-EAS229_75_30DY0AAXX:4:1:0:743/1     75      1       75      chr2    242951149       184181032       184181106       1  74,      0,      184181032,      agccttttacagcaacacctttacctctgctagatctttctgtagctcgtctgaagccatgggggctgggtcag,     agccttttccagcaacacctttacctcttctagatctttctgtagctcttctgaagccatgggggctgggtcag,
+#72      2       0       0       0       0       0       0       -       HWI-EAS229_75_30DY0AAXX:7:1:0:713/1     75      1       75      chr14   106368585       49540119        49540193        1  74,      0,      49540119,       cgggtgcgggccgagcagttctccgcacctccggtaaaggttcaggaccgggtgatggtctctgcagcagtcag,     ccggtgcgggccgagcagttctccgcacctccggtaaaggtgcaggaccgggtgatggtctctgcagcagtcag,
 {
     my ($path, $file, $type) = @_;
     my $lineNumber = 0;
     doTime("beginning validatePsl") if $opt_timing;
@@ -960,9 +965,9 @@
         next if $lineNumber == 3 and m/^match/;
         next if $lineNumber == 4 and m/^\s+match/;
         next if $lineNumber == 5 and m/^------/;
         die "Failed $type validation, file '$file'; line $lineNumber: line=[$_]\n" 
-	    unless m/^(\d+)\t(\d+)\t(\d+)\t(\d+)(\d+)\t(\d+)\t(\d+)\t(\d+)\t([+-])\t([A-Za-z0-9:>\|\/_-]+)\t(\d+)\t(\d+)\t(\d+)\t(\w+)\t(\d+)\t(\d+)\t(\d+)$/;
+	    unless m/^(\d+)\t(\d+)\t(\d+)\t(\d+)(\d+)\t(\d+)\t(\d+)\t(\d+)\t([+-][+-]?)\t([A-Za-z0-9:>\|\/_-]+)\t(\d+)\t(\d+)\t(\d+)\t(\w+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t([0-9,]+)\t([0-9,]+)\t([0-9,]+)/;
         last if($opt_quick && $lineNumber >= $quickCount);
     }
     $fh->close();
     HgAutomate::verbose(2, "File \'$file\' passed $type validation\n");