src/hg/encode/encodeValidate/doEncodeValidate.pl 1.159

1.159 2009/03/05 02:57:16 mikep
3 different flavors of rpkm; just check the cols are there
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.158
retrieving revision 1.159
diff -b -B -U 4 -r1.158 -r1.159
--- src/hg/encode/encodeValidate/doEncodeValidate.pl	4 Mar 2009 17:55:10 -0000	1.158
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl	5 Mar 2009 02:57:16 -0000	1.159
@@ -918,22 +918,37 @@
 
 sub validateRpkm
 # Wold lab format, has gene name and 2 floats 
 #   Allowing Gene name to be composed of any characters but <tab>
-# Example lines:-
-#HBG2    0.583   1973.85
-#RPS20   0.523   1910.01
-#RPLP0   1.312   1800.51
+#
+# Example format 1 (3 cols):-
+# HBG2    0.583   1973.85
+# RPS20   0.523   1910.01
+# RPLP0   1.312   1800.51
+#
+# Example format 2 (7 cols):- (*.accepted.rpkm)
+# ENSG00000003056 chr12   8989051 8989354 2.43    303     M6PR
+# ENSG00000006015 chr19   18560887        18561077        1.10    190     C19orf60
+# ENSG00000008516 chr16   3047223 3047380 0.61    157     MMP25
+#
+# Example format 3 (5 cols): (*.final.rpkm)
+#GID    gene    len_kb  RPKM    multi/all
+# OTTHUMG00000151214      IGLC2   0.722   3579.34 0.84
+# FAR3664 FAR3664 0.200   3216.32 0.94
+# OTTHUMG00000021144      TMSB4X  3.551   2767.52 0.35
 {
     my ($path, $file, $type) = @_;
     doTime("beginning validateRpkm") if $opt_timing;
     my $lineNumber = 0;
     my $fh = openUtil($path, $file);
     while(<$fh>) {
         chomp;
         $lineNumber++;
+        next if m/^#/;
+	my $cols = split;
         die "Failed $type validation, file '$file'; line $lineNumber: line=[$_]\n"
-            unless m/^([^\t]+)\t(\d+\.\d+)\t(\d+\.\d+)$/;
+	    unless $cols == 3 or $cols == 5 or $cols == 7;
+#            unless m/^([^\t]+)\t(\d+\.\d+)\t(\d+\.\d+)$/;
         last if($opt_quick && $lineNumber >= $quickCount);
     }
     $fh->close();
     HgAutomate::verbose(2, "File \'$file\' passed $type validation\n");