src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg 1.5
1.5 2010/05/13 09:15:46 krish
files, types, and checksums ooh my
Index: src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg 13 May 2010 08:32:16 -0000 1.4
+++ src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg 13 May 2010 09:15:46 -0000 1.5
@@ -158,9 +158,9 @@
"verbose=i"
);
# parse options
usage() if (!$ok);
-usage() if (scalar(@ARGV) < 2);
+usage() if (scalar(@ARGV) < 3);
# get options or set defaults
if (not defined $opt_instance) {
$opt_instance = "prod";
}
@@ -183,16 +183,20 @@
HgAutomate::verbose(4, "Config directory path: \'$configPath\'\n");
my $database = $ARGV[0];
my $compositeName = $ARGV[1];
+my $instrument = $ARGV[2];
# some counters we use
my $i;
my $j;
my $c;
my $f;
my $o;
+# get the project dir
+my $compositeDir = "/hive/groups/encode/dcc/analysis/ftp/pipeline/$database/$compositeName";
+
# read the cv.ra file
my %cvTerms = Encode::getControlledVocab($configPath);
# connect to the database and read the metadata table for the obj
@@ -299,9 +303,9 @@
my %cellLineInfo = %{$cellLines{$cell}};
my $organism = $cellLineInfo{"organism"};
my $provider = $cellLineInfo{"vendorName"};
my $growthProtocolUrl = "http://genome.ucsc.edu/ENCODE/protocols/cell/" . $cellLineInfo{"protocol"};
- my $growthProtocol = "Cells were grown according to the approved ENCODE cell culture protocols: $growthProtocolUrl";
+ my $growthProtocol = "Cells were grown according to ENCODE cell culture protocols: $growthProtocolUrl";
my $extractProtocolUrl = "http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=$database&g=$compositeName";
my $extractProtocol = "For extraction protocol details see: $extractProtocolUrl";
@@ -342,23 +346,41 @@
}
}
}
print "!Sample_biomaterial_provider = $provider" . "\n";
- #for $i (@currentMetadata) {
- # my %metadata = %{$i};
- # if ($metadata{"objType"} eq "file") {
- # my $file = $metadata{"fileName"};
- # print "!Sample_supplementary_file = $file" . "\n";
- # }
- #}
print "!Sample_growth_protocol = $growthProtocol" . "\n";
print "!Sample_molecule = $molecule" . "\n";
print "!Sample_extract_protocol = $extractProtocol" . "\n";
print "!Sample_data_processing = $dataProcessing" . "\n";
print "!Sample_library_strategy = $libraryStrategy" . "\n";
print "!Sample_library_source = $ibrarySource" . "\n";
print "!Sample_library_selection = $ibrarySelection" . "\n";
- print "!Sample_instrument_model = [required]" . "\n";
+ print "!Sample_instrument_model = $instrument" . "\n";
+ my $rawCount = 1;
+ for $i (@currentMetadata) {
+ my %metadata = %{$i};
+ my $filename = $metadata{"fileName"};
+ if ($metadata{"view"} eq "RawData") {
+ my ($name, $type, $compression) = split(/\./, $filename);
+ my $checksum = (split/\W+/, `md5sum $compositeDir/$filename`)[0];
+ print "!Sample_raw_file_$rawCount = $filename" . "\n";
+ print "!Sample_raw_file_type_$rawCount = $type" . "\n";
+ print "!Sample_raw_file_checksum_$rawCount = $checksum" . "\n";
+ ++$rawCount;
+ }
+ }
+ my $supplementCount = 1;
+ for $i (@currentMetadata) {
+ my %metadata = %{$i};
+ my $filename = $metadata{"fileName"};
+ if ($metadata{"view"} ne "RawData") {
+ my $checksum = (split/\W+/, `md5sum $compositeDir/$filename`)[0];
+ print "!Sample_supplementary_file_$supplementCount = $filename" . "\n";
+ print "!Sample_supplementary_file_checksum_$supplementCount = $checksum" . "\n";
+ print "!Sample_supplementary_file_build_$supplementCount = $database" . "\n";
+ ++$supplementCount;
+ }
+ }
print "\n";
exit;
}