src/hg/encode/encodeValidate/doEncodeValidate.pl 1.189
1.189 2009/08/28 21:49:23 tdreszer
Added support for 'validateSettings' in the DAF
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.188
retrieving revision 1.189
diff -b -B -U 4 -r1.188 -r1.189
--- src/hg/encode/encodeValidate/doEncodeValidate.pl 23 Jul 2009 18:08:57 -0000 1.188
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl 28 Aug 2009 21:49:23 -0000 1.189
@@ -659,9 +659,10 @@
sub validateTagAlign
{
my ($path, $file, $type) = @_;
# validate chroms, chromSize, etc. Assume hg18 like elsewhere
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -chromDb=hg18 -type=tagAlign $file"]);
+ my $paramList = validationSettings("validateFiles","tagAlign");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -chromDb=hg18 -type=tagAlign $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateTagAlign : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateTagAlign for '$file'");
@@ -673,9 +674,10 @@
# This is like tag align but with two additional sequence fields appended; seq1 and seq2
{
my ($path, $file, $type) = @_;
# validate chroms, chromSize, etc. Assume hg18 like elsewhere
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -chromDb=hg18 -type=pairedTagAlign $file"]);
+ my $paramList = validationSettings("validateFiles","pairedTagAlign");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $paramList $quickOpt -chromDb=hg18 -type=pairedTagAlign $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validatePairedTagAlign : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validatePairedTagAlign for '$file'");
@@ -702,9 +704,10 @@
sub validateBroadPeak
{
my ($path, $file, $type) = @_;
# validate chroms, chromSize, etc. Assume hg18 like elsewhere
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -chromDb=hg18 -type=broadPeak $file"]);
+ my $paramList = validationSettings("validateFiles","broadPeak");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -chromDb=hg18 -type=broadPeak $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateBroadPeak : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateBroadPeak for '$file'");
@@ -746,9 +749,10 @@
# - fastq produced directly from Solexa has a 'solexa' quality score
# - fastq defined by Sanger has a 'PHRED' quality score
# - The 2 urls above show how to convert between both
my ($path, $file, $type) = @_;
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=fastq $file"]);
+ my $paramList = validationSettings("validateFiles","fastq");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=fastq $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateFastQ : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateFastQ for '$file'");
@@ -774,9 +778,10 @@
# T01301010111200210102321210100112312
my ($path, $file, $type) = @_;
doTime("beginning validateCsfasta") if $opt_timing;
HgAutomate::verbose(2, "validateCsfasta($path,$file,$type)\n");
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=csfasta $file"]);
+ my $paramList = validationSettings("validateFiles","csfasta");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=csfasta $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateCsfasta : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateCsfasta for '$file'");
@@ -799,9 +804,10 @@
# 16 8 5 12 20 24 19 8 13 17 11 23 8 24 8 7 17 4 20 8 29 7 3 16 3 4 8 20 17 9
my ($path, $file, $type) = @_;
doTime("beginning validateCsqual") if $opt_timing;
HgAutomate::verbose(2, "validateCsqual($path,$file,$type)\n");
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=csqual $file"]);
+ my $paramList = validationSettings("validateFiles","csqual");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=csqual $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateCsqual : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateCsqual for '$file'");
@@ -821,9 +827,10 @@
{
my ($path, $file, $type) = @_;
doTime("beginning validateFasta") if $opt_timing;
HgAutomate::verbose(2, "validateFasta($path,$file,$type)\n");
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt -type=fasta $file"]);
+ my $paramList = validationSettings("validateFiles","fasta");
+ my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=fasta $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateFasta : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateFasta for '$file'");
@@ -1080,8 +1087,41 @@
print OUT_FILE "type bed 3\n";
print OUT_FILE "wgEncode 1\n\n";
}
+sub validationSettings {
+ # parse validationSettings: "validationSettings allowReloads;validateFiles.tagAlign:mmCheckOnInN=100,mismatches=3"
+ my ($type, $fileType) = @_;
+
+ my @set = split('\;', $daf->{validationSettings});
+ if($type eq "validateFiles") {
+ for my $setting (@set) {
+ if($setting =~ /^validateFiles\./) {
+ my @pair = split('\:',$setting,2);
+ my @subTypes = split('\.',$pair[0],2);
+ if($fileType eq $subTypes[1]) {
+ my $paramList = "";
+ my @params = split('\,',$pair[1]);
+ for my $param (@params) {
+ $paramList .= " -" . $param;
+ }
+ HgAutomate::verbose(2, "validationSettings $type $fileType params: $paramList\n");
+ return $paramList;
+ }
+ }
+ }
+ return "";
+ } else {
+ for my $setting (@set) {
+ if($setting eq $type) {
+ HgAutomate::verbose(2, "validationSettings $type found\n");
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
############################################################################
# Main
my @ddfHeader; # list of field names on the first line of DDF file
@@ -1239,8 +1279,23 @@
if($hasReplicates) {
$fields->{replicate}{required} = 1;
}
+# DAF may contain option to allow Reloads
+if(validationSettings("allowReloads")) {
+ $opt_allowReloads = 1;
+}
+if(validationSettings("skipAutoCreation")) {
+ $opt_skipAutoCreation = 1;
+}
+if(validationSettings("skipValidateFiles")) {
+ $opt_skipValidateFiles = 1;
+}
+if(validationSettings("skipOutput")) {
+ $opt_skipAutoCreation = $opt_skipOutput = $opt_skipValidateFiles = 1;
+}
+
+
# Open dataset descriptor file (DDF)
my @glob = glob "*.DDF";
push(@glob, glob "*.ddf");
my $ddfFile = Encode::newestFile(@glob);