src/hg/encode/encodeValidate/doEncodeValidate.pl 1.189

1.189 2009/08/28 21:49:23 tdreszer
Added support for 'validateSettings' in the DAF
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.188
retrieving revision 1.189
diff -b -B -U 4 -r1.188 -r1.189
--- src/hg/encode/encodeValidate/doEncodeValidate.pl	23 Jul 2009 18:08:57 -0000	1.188
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl	28 Aug 2009 21:49:23 -0000	1.189
@@ -659,9 +659,10 @@
 sub validateTagAlign
 {
     my ($path, $file, $type) = @_;
     # validate chroms, chromSize, etc. Assume hg18 like elsewhere
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -chromDb=hg18 -type=tagAlign $file"]);
+    my $paramList = validationSettings("validateFiles","tagAlign");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -chromDb=hg18 -type=tagAlign $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validateTagAlign : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validateTagAlign for '$file'");
@@ -673,9 +674,10 @@
 # This is like tag align but with two additional sequence fields appended; seq1 and seq2
 {
     my ($path, $file, $type) = @_;
     # validate chroms, chromSize, etc. Assume hg18 like elsewhere
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -chromDb=hg18 -type=pairedTagAlign $file"]);
+    my $paramList = validationSettings("validateFiles","pairedTagAlign");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $paramList $quickOpt -chromDb=hg18 -type=pairedTagAlign $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validatePairedTagAlign : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validatePairedTagAlign for '$file'");
@@ -702,9 +704,10 @@
 sub validateBroadPeak
 {
     my ($path, $file, $type) = @_;
     # validate chroms, chromSize, etc. Assume hg18 like elsewhere
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -chromDb=hg18 -type=broadPeak $file"]);
+    my $paramList = validationSettings("validateFiles","broadPeak");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -chromDb=hg18 -type=broadPeak $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validateBroadPeak : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validateBroadPeak for '$file'");
@@ -746,9 +749,10 @@
     # - fastq produced directly from Solexa has a 'solexa' quality score
     # - fastq defined by Sanger has a 'PHRED' quality score
     # - The 2 urls above show how to convert between both
     my ($path, $file, $type) = @_;
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=fastq $file"]);
+    my $paramList = validationSettings("validateFiles","fastq");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=fastq $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validateFastQ : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validateFastQ for '$file'");
@@ -774,9 +778,10 @@
     # T01301010111200210102321210100112312
     my ($path, $file, $type) = @_;
     doTime("beginning validateCsfasta") if $opt_timing;
     HgAutomate::verbose(2, "validateCsfasta($path,$file,$type)\n");
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=csfasta $file"]);
+    my $paramList = validationSettings("validateFiles","csfasta");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=csfasta $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validateCsfasta : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validateCsfasta for '$file'");
@@ -799,9 +804,10 @@
     # 16 8 5 12 20 24 19 8 13 17 11 23 8 24 8 7 17 4 20 8 29 7 3 16 3 4 8 20 17 9
     my ($path, $file, $type) = @_;
     doTime("beginning validateCsqual") if $opt_timing;
     HgAutomate::verbose(2, "validateCsqual($path,$file,$type)\n");
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=csqual $file"]);
+    my $paramList = validationSettings("validateFiles","csqual");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=csqual $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validateCsqual : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validateCsqual for '$file'");
@@ -821,9 +827,10 @@
 {
     my ($path, $file, $type) = @_;
     doTime("beginning validateFasta") if $opt_timing;
     HgAutomate::verbose(2, "validateFasta($path,$file,$type)\n");
-    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=fasta $file"]);
+    my $paramList = validationSettings("validateFiles","fasta");
+    my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=fasta $file"]);
     if(my $err = $safe->exec()) {
 	print STDERR  "ERROR: failed validateFasta : " . $safe->stderr() . "\n";
 	# don't show end-user pipe error(s)
 	return("failed validateFasta for '$file'");
@@ -1080,8 +1087,41 @@
     print OUT_FILE "type bed 3\n";
     print OUT_FILE "wgEncode 1\n\n";
 }
 
+sub validationSettings {
+    # parse validationSettings: "validationSettings allowReloads;validateFiles.tagAlign:mmCheckOnInN=100,mismatches=3"
+    my ($type, $fileType) = @_;
+
+    my @set = split('\;', $daf->{validationSettings});
+    if($type eq "validateFiles") {
+        for my $setting (@set) {
+            if($setting =~ /^validateFiles\./) {
+                my @pair = split('\:',$setting,2);
+                my @subTypes = split('\.',$pair[0],2);
+                if($fileType eq $subTypes[1]) {
+                    my $paramList = "";
+                    my @params = split('\,',$pair[1]);
+                    for my $param (@params) {
+                        $paramList .= " -" . $param;
+                    }
+                    HgAutomate::verbose(2, "validationSettings $type $fileType params: $paramList\n");
+                    return $paramList;
+                }
+            }
+        }
+        return "";
+    } else {
+        for my $setting (@set) {
+            if($setting eq $type) {
+                HgAutomate::verbose(2, "validationSettings $type found\n");
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 ############################################################################
 # Main
 
 my @ddfHeader;		# list of field names on the first line of DDF file
@@ -1239,8 +1279,23 @@
 if($hasReplicates) {
     $fields->{replicate}{required} = 1;
 }
 
+# DAF may contain option to allow Reloads
+if(validationSettings("allowReloads")) {
+    $opt_allowReloads = 1;
+}
+if(validationSettings("skipAutoCreation")) {
+    $opt_skipAutoCreation = 1;
+}
+if(validationSettings("skipValidateFiles")) {
+    $opt_skipValidateFiles = 1;
+}
+if(validationSettings("skipOutput")) {
+    $opt_skipAutoCreation = $opt_skipOutput = $opt_skipValidateFiles = 1;
+}
+
+
 # Open dataset descriptor file (DDF)
 my @glob = glob "*.DDF";
 push(@glob, glob "*.ddf");
 my $ddfFile = Encode::newestFile(@glob);