src/hg/encode/encodeValidate/doEncodeValidate.pl 1.169

1.169 2009/03/22 02:37:47 larrym
refactor openUtil into Encode.pm
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.168
retrieving revision 1.169
diff -b -B -U 4 -r1.168 -r1.169
--- src/hg/encode/encodeValidate/doEncodeValidate.pl	15 Mar 2009 02:00:04 -0000	1.168
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl	22 Mar 2009 02:37:47 -0000	1.169
@@ -349,22 +349,8 @@
     psl  => \&validatePsl,
     cBiP => \&validateFreepass,  # TODO: this is a dodge, because bed file is for different species, so chrom violations
     );
 
-sub openUtil
-{
-# Handles opening gzipped, tar gzipped, tar, as well as plain files
-    my ($path, $file) = @_;
-    my $fh = new IO::File;
-    my $filePath = defined($path) ? "$path/$file" : $file;
-    open($fh, Encode::isTar($filePath) ? "/bin/tar -Oxf $filePath |"
-	      : ( Encode::isTarZipped($filePath) ? "/bin/tar -Ozxf $filePath |"
-	          : ( Encode::isZipped($filePath) ? "/bin/gunzip -c $filePath |"
-		      : $filePath ))
-	) or die "Couldn't open file '$file'; error: $!\n";
-    return $fh;
-}
-
 my $floatRegEx = "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?";
 # my $floatRegEx = "[+-]?(?:\\.\\d+|\\d+(?:\\.\\d+|[eE]{1}?[+-]{1}?\\d+))";  # Tim's attempt
 # my $floatRegEx = "[+-]?(?:\\.\\d+|\\d+(?:\\.\\d+|))";                      # Original
 my %typeMap = (int => "[+-]?\\d+", uint => "\\d+", float => $floatRegEx, string => "\\S+");
@@ -449,9 +435,9 @@
 # open a file and validate each line with $validateList
 # $name is the caller's subroutine name (used in error and debug messages).
     my ($path, $file, $type, $maxRows, $name, $validateList) = @_;
     my $lineNumber = 0;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     my $regexp = listToRegExp($validateList);
     my $hasChrom = 0;
     for my $rec (@{$validateList}) {
         $hasChrom++ if($rec->{NAME} eq "chrom");
@@ -488,9 +474,9 @@
 sub validateFreepass
 {
     my ($path, $file, $type) = @_;
     doTime("beginning validateFreepass") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     #my $lineNumber = 0;
     #while(<$fh>) {
     #    chomp;
     #    $lineNumber++;
@@ -531,9 +517,9 @@
 # Validate each line of a bed 5 or greater file.
     my ($path, $file, $type) = @_;
     my $lineNumber = 0;
     doTime("beginning validateBed") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     while(<$fh>) {
         chomp;
         $lineNumber++;
         next if m/^#/; # allow comment lines, consistent with lineFile and hgLoadBed
@@ -576,9 +562,9 @@
 # Validate each line of a bedGraph file.
     my ($path, $file, $type) = @_;
     my $lineNumber = 0;
     doTime("beginning validateBedGraph") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     while(<$fh>) {
         chomp;
         $lineNumber++;
         next if m/^#/; # allow comment lines, consistent with lineFile and hgLoadBed
@@ -781,9 +767,9 @@
     # T01301010111200210102321210100112312
 
     my ($path, $file, $type) = @_;
     doTime("beginning validateCsfasta") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     my $line = 0;
     my $state = 'header';
     my $seqName;
     my $states = {header => {REGEX => "^>\\d+_\\d+_\\d+_\.\\d+.*", NEXT => 'seq'},
@@ -822,9 +808,9 @@
     # 16 8 5 12 20 24 19 8 13 17 11 23 8 24 8 7 17 4 20 8 29 7 3 16 3 4 8 20 17 9
 
     my ($path, $file, $type) = @_;
     doTime("beginning validateCsqual") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     my $line = 0;
     my $state = 'header';
     my $seqName;
     my $states = {header => {REGEX => "^>\\d+_\\d+_\\d+_\.\\d+", NEXT => 'qual'},
@@ -863,9 +849,9 @@
     doTime("beginning validateFasta") if $opt_timing;
     HgAutomate::verbose(2, "validateFasta($path,$file,$type)\n");
     return () if $opt_skipValidateFastQ;
     doTime("beginning validateFasta") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     my $line = 0;
     my $state = 'firstLine';
     my $seqName;
     my $seqNameRegEx = "[A-Za-z0-9_.:/-]+";
@@ -912,9 +898,9 @@
 {
     my ($path, $file, $type) = @_;
     doTime("beginning validateRpkm") if $opt_timing;
     my $lineNumber = 0;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     while(<$fh>) {
         chomp;
         $lineNumber++;
         next if m/^#/;
@@ -942,9 +928,9 @@
     my ($path, $file, $type) = @_;
     doTime("beginning validateBowtie") if $opt_timing;
     my $lineNumber = 0;
     doTime("beginning validateBedGraph") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     while(<$fh>) {
         chomp;
         $lineNumber++;
         next if m/^#/; # allow comment lines, consistent with lineFile and hgLoadBed
@@ -975,9 +961,9 @@
 {
     my ($path, $file, $type) = @_;
     my $lineNumber = 0;
     doTime("beginning validatePsl") if $opt_timing;
-    my $fh = openUtil($path, $file);
+    my $fh = Encode::openUtil($file, $path);
     while(<$fh>) {
         chomp;
         $lineNumber++;
         next if $lineNumber == 1 and m/^psLayout version \d+/; # check first line