src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg 1.5

1.5 2010/05/13 09:15:46 krish
files, types, and checksums ooh my
Index: src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg	13 May 2010 08:32:16 -0000	1.4
+++ src/hg/encode/encodeMkGeoPkg/encodeMkGeoPkg	13 May 2010 09:15:46 -0000	1.5
@@ -158,9 +158,9 @@
                     "verbose=i"
                     );
 # parse options
 usage() if (!$ok);
-usage() if (scalar(@ARGV) < 2);
+usage() if (scalar(@ARGV) < 3);
 # get options or set defaults
 if (not defined $opt_instance) {
     $opt_instance = "prod";
 }
@@ -183,16 +183,20 @@
 HgAutomate::verbose(4, "Config directory path: \'$configPath\'\n");
 
 my $database  = $ARGV[0];
 my $compositeName = $ARGV[1];
+my $instrument = $ARGV[2];
 
 # some counters we use
 my $i;
 my $j;
 my $c;
 my $f;
 my $o;
 
+# get the project dir
+my $compositeDir = "/hive/groups/encode/dcc/analysis/ftp/pipeline/$database/$compositeName";
+
 # read the cv.ra file
 my %cvTerms = Encode::getControlledVocab($configPath);
 
 # connect to the database and read the metadata table for the obj
@@ -299,9 +303,9 @@
     my %cellLineInfo = %{$cellLines{$cell}};
     my $organism = $cellLineInfo{"organism"};
     my $provider = $cellLineInfo{"vendorName"};
     my $growthProtocolUrl = "http://genome.ucsc.edu/ENCODE/protocols/cell/" . $cellLineInfo{"protocol"};
-    my $growthProtocol = "Cells were grown according to the approved ENCODE cell culture protocols: $growthProtocolUrl";
+    my $growthProtocol = "Cells were grown according to ENCODE cell culture protocols: $growthProtocolUrl";
 
     my $extractProtocolUrl = "http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=$database&g=$compositeName";
     my $extractProtocol    = "For extraction protocol details see: $extractProtocolUrl";
 
@@ -342,23 +346,41 @@
             }
         }
     }
     print "!Sample_biomaterial_provider = $provider" . "\n";
-    #for $i (@currentMetadata) {
-    #    my %metadata = %{$i};
-    #    if ($metadata{"objType"} eq "file") {
-    #        my $file = $metadata{"fileName"};
-    #        print "!Sample_supplementary_file = $file" . "\n";
-    #    }
-    #}
     print "!Sample_growth_protocol = $growthProtocol" . "\n";
     print "!Sample_molecule = $molecule" . "\n";
     print "!Sample_extract_protocol = $extractProtocol" . "\n";
     print "!Sample_data_processing = $dataProcessing" . "\n";
     print "!Sample_library_strategy = $libraryStrategy" . "\n";
     print "!Sample_library_source = $ibrarySource" . "\n";
     print "!Sample_library_selection = $ibrarySelection" . "\n";
-    print "!Sample_instrument_model = [required]" . "\n";
+    print "!Sample_instrument_model = $instrument" . "\n";
+    my $rawCount = 1;
+    for $i (@currentMetadata) {
+        my %metadata = %{$i};
+        my $filename = $metadata{"fileName"};
+        if ($metadata{"view"} eq "RawData") {
+            my ($name, $type, $compression) = split(/\./, $filename);
+            my $checksum = (split/\W+/, `md5sum $compositeDir/$filename`)[0];
+            print "!Sample_raw_file_$rawCount = $filename" . "\n";
+            print "!Sample_raw_file_type_$rawCount = $type" . "\n";
+            print "!Sample_raw_file_checksum_$rawCount = $checksum" . "\n";
+            ++$rawCount;
+        }
+    }
+    my $supplementCount = 1;
+    for $i (@currentMetadata) {
+        my %metadata = %{$i};
+        my $filename = $metadata{"fileName"};
+        if ($metadata{"view"} ne "RawData") {
+            my $checksum = (split/\W+/, `md5sum $compositeDir/$filename`)[0];
+            print "!Sample_supplementary_file_$supplementCount = $filename" . "\n";
+            print "!Sample_supplementary_file_checksum_$supplementCount = $checksum" . "\n";
+            print "!Sample_supplementary_file_build_$supplementCount = $database" . "\n";
+            ++$supplementCount;
+        }
+    }
     print "\n";
     
     exit;
 }