src/hg/encode/encodeValidate/doEncodeValidate.pl 1.225
1.225 2010/05/21 17:38:00 braney
add checking for sex of cell type for BAM's
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.224
retrieving revision 1.225
diff -b -B -U 4 -r1.224 -r1.225
--- src/hg/encode/encodeValidate/doEncodeValidate.pl 11 May 2010 20:25:02 -0000 1.224
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl 21 May 2010 17:38:00 -0000 1.225
@@ -180,9 +180,9 @@
sub validateFiles {
# Validate array of filenames, ordered by part
# Check files exist and are of correct data format
- my ($files, $type, $track, $daf) = @_;
+ my ($files, $type, $track, $daf, $cell) = @_;
my @newFiles;
my @errors;
my $regex = "\`\|\\\|\|\"\|\'";
doTime("beginning validateFiles") if $opt_timing;
@@ -212,9 +212,9 @@
pushError(\@errors, "File \'$file\' is empty");
} elsif(!(-r $file)) {
pushError(\@errors, "File \'$file\' is un-readable");
} else {
- pushError(\@errors, checkDataFormat($daf->{TRACKS}{$track}{type}, $file));
+ pushError(\@errors, checkDataFormat($daf->{TRACKS}{$track}{type}, $file, $cell));
}
}
$files = \@newFiles;
doTime("done validateFiles") if $opt_timing;
@@ -758,13 +758,29 @@
}
sub validateBam
{
- my ($path, $file, $type) = @_;
+ my ($path, $file, $type, $cell) = @_;
doTime("beginning validateBam") if $opt_timing;
HgAutomate::verbose(2, "validateBam($path,$file,$type)\n");
my $paramList = validationSettings("validateFiles","bam");
- my $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=BAM -chromDb=$daf->{assembly} $file"]);
+ my $sex = $terms{'Cell Line'}->{$cell}->{'sex'};
+ my $downloadDir = "/hive/groups/encode/dcc/pipeline/downloads/$assembly/referenceSequences";
+ my $infoFile = "$downloadDir/female.$assembly.chrom.sizes";
+ my $twoBitFile = "$downloadDir/female.$assembly.2bit";
+ if ($sex ne "F") {
+ $infoFile = "$downloadDir/male.$assembly.chrom.sizes";
+ $twoBitFile = "$downloadDir/male.$assembly.2bit";
+ }
+
+ # index the BAM file
+ my $safe = SafePipe->new(CMDS => ["samtools index $file"]);
+ if(my $err = $safe->exec()) {
+ print STDERR "ERROR: failed samtools index : " . $safe->stderr() . "\n";
+ # don't show end-user pipe error(s)
+ return("failed validateBam for '$file'");
+ }
+ $safe = SafePipe->new(CMDS => ["validateFiles $quickOpt $paramList -type=BAM -chromInfo=$infoFile -genome=$twoBitFile $file"]);
if(my $err = $safe->exec()) {
print STDERR "ERROR: failed validateBam : " . $safe->stderr() . "\n";
# don't show end-user pipe error(s)
return("failed validateBam for '$file'");
@@ -951,21 +967,21 @@
# Misc subroutines
sub validateDdfField {
# validate value for type of field
- my ($type, $val, $track, $daf) = @_;
+ my ($type, $val, $track, $daf, $cell) = @_;
$type =~ s/ /_/g;
HgAutomate::verbose(4, "Validating $type: " . (defined($val) ? $val : "") . "\n");
if($validators{$type}) {
- return $validators{$type}->($val, $type, $track, $daf);
+ return $validators{$type}->($val, $type, $track, $daf, $cell);
} else {
return $validators{'default'}->($val, $type, $track, $daf); # Considers the term controlled vocab
}
}
sub checkDataFormat {
# validate file type
- my ($format, $file) = @_;
+ my ($format, $file, $cell) = @_;
HgAutomate::verbose(3, "Checking data format for $file: $format\n");
my $type = $format;
if ($format =~ m/(bed) (\d+)/) {
$format = $1;
@@ -973,9 +989,9 @@
if ($format =~ m/(bedGraph) (\d+)/) {
$format = $1;
}
$formatCheckers{$format} || return "Data format \'$format\' is unknown\n";
- return $formatCheckers{$format}->($submitPath, $file, $type);
+ return $formatCheckers{$format}->($submitPath, $file, $type, $cell);
HgAutomate::verbose(3, "Done checking data format for $file: $format\n");
}
sub ddfKey
@@ -1464,9 +1480,10 @@
}
$line{files} = \@filenames;
my @metadataErrors;
for my $field (keys %line) {
- push(@metadataErrors, validateDdfField($field, $line{$field}, $view, $daf));
+ my $cell = $line{cell};
+ push(@metadataErrors, validateDdfField($field, $line{$field}, $view, $daf, $cell));
}
if(@metadataErrors) {
pushError(\@errors, @metadataErrors);
} else {