src/hg/encode/encodeLoad/doEncodeUnload.pl 1.5

1.5 2010/03/15 22:21:53 krish
added script to generate changes notes for ENCODE tracks
Index: src/hg/encode/encodeLoad/doEncodeUnload.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeLoad/doEncodeUnload.pl,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 1000000 -r1.4 -r1.5
--- src/hg/encode/encodeLoad/doEncodeUnload.pl	8 Oct 2008 00:06:46 -0000	1.4
+++ src/hg/encode/encodeLoad/doEncodeUnload.pl	15 Mar 2010 22:21:53 -0000	1.5
@@ -1,129 +1,161 @@
 #!/usr/bin/env perl
 
 # encodeUnload.pl - unload ENCODE data submission generated by the
 #                       automated submission pipeline
 # Reads load.ra for information about what to do
 
 # Writes error or log information to STDOUT
 # Returns 0 if unload succeeds.
 
 # DO NOT EDIT the /cluster/bin/scripts copy of this file -- 
 # edit the CVS'ed source at: ~/kent/src/hg/encode/encodeUnload/doEncodeUnload.pl
 #
 # $Id$
 
 use warnings;
 use strict;
 
 use Getopt::Long;
+use Cwd;
 use File::Basename;
 
 use lib "/cluster/bin/scripts";
 use Encode;
+use RAFile;
 use HgDb;
 use HgAutomate;
 
-use vars qw/$opt_verbose/;
+use vars qw/$opt_verbose $opt_configDir/;
 my $PROG = basename $0;
 
 sub usage
 {
     print STDERR <<END;
     usage: doEncodeUnload.pl submission_type project_submission_dir
 END
     exit(1);
 }
 
 sub genericUnload
 {
     my ($assembly, $db, $tableName) = @_;
     $db->dropTableIfExist($tableName);
 }
 
 sub unloadWig
 {
     my ($assembly, $db, $tableName) = @_;
     $db->dropTableIfExist($tableName);
 
     # remove symlink
     my $file = "/gbdb/$assembly/wib/$tableName.wib";
     if(-e $file) {
         HgAutomate::verbose(3, "removing wib '$file'\n");
         if(system("rm -f $file")) {
             die "unexpected error removing symlink $file";
         }
     }
 }
  
 ############################################################################
 # Main
 
 # Change dir to submission directory obtained from command-line
 
-GetOptions("verbose=i") || usage();
+my $wd = cwd();
+
+GetOptions("configDir=s", "verbose=i") || usage();
 $opt_verbose = 1 if (!defined $opt_verbose);
 if(@ARGV != 2) {
     usage();
 }
 
 my $submitType = $ARGV[0];	# currently not used
 my $submitDir = $ARGV[1];	# directory where data files are
+my $configPath;
+if (defined $opt_configDir) {
+    if ($opt_configDir =~ /^\//) {
+        $configPath = $opt_configDir;
+    } else {
+        $configPath = "$wd/$opt_configDir";
+    }
+} else {
+    $configPath = "$submitDir/../config"
+}
 
 # Add a suffix for non-production loads (to avoid loading over existing tables).
 
 my $tableSuffix = "";
 if(dirname($submitDir) =~ /_(.*)/) {
     if($1 ne 'prod') {
 	# yank out "beta" from encinstance_beta
         $tableSuffix = "_$1_" . basename($submitDir);;
     }
 } else {
     $tableSuffix = "_" . basename($submitDir);;
 }
 
 chdir($submitDir) || die "Couldn't chdir to '$submitDir'";
 
 my $unloadRa = 'out/unload.ra';
 if(!(-e $unloadRa)) {
     HgAutomate::verbose(2, "Skipping unload b/c '$unloadRa' doesn't exist\n");
     exit(0);
 }
 
 HgAutomate::verbose(2, "Unloading project in directory $submitDir\n");
 
+my $grants = Encode::getGrants($configPath);
+my $fields = Encode::getFields($configPath);
+my $daf = Encode::getDaf($submitDir, $grants, $fields);
+my $downloadDir = Encode::downloadDir($daf);
+
 # Unload resources listed in unload.ra
 my %ra = RAFile::readRaFile($unloadRa, 'tablename');
 my $db;
 for my $key (keys %ra) {
     my $h = $ra{$key};
     my $tablename = $h->{tablename} . $tableSuffix;
+    my $files = $h->{files};
+    my @files = split(/\s+/, $files);
 
     my $str = "\nkeyword: $key\n";
     for my $field (qw(tablename type assembly files)) {
         if($h->{$field}) {
             $str .= "$field: " . $h->{$field} . "\n";
         }
     }
     $str .= "\n";
     HgAutomate::verbose(3, $str);
 
     my $assembly = $h->{assembly};
     if(!defined($db)) {
         $db = HgDb->new(DB => $assembly);
     }
 
     HgAutomate::verbose(2, "Dropping table '$tablename'\n");
 
     my %extendedTypes = map { $_ => 1 } @Encode::extendedTypes;
     my $type = $h->{type};
     if (exists($h->{downloadOnly}) and $h->{downloadOnly}) { # dont unload stuff which is never loaded
     } elsif($type eq "genePred" || $type =~ /^bed/ || $type eq "gtf" || $extendedTypes{$type}) {
         genericUnload($assembly, $db, $tablename);
     } elsif ($type eq "wig") {
         unloadWig($assembly, $db, $tablename);
     } else {
         die "ERROR: unknown type: $h->{type} in load.ra ($PROG)\n";
     }
+
+    # delete the download files
+    my $target = "$downloadDir/$tablename.$type.gz";
+    if(@files == 1 && $files[0] =~ /^$Encode::autoCreatedPrefix/) {
+        $target = "$downloadDir/raw/$tablename.$type.gz";
+        if (! -d "$downloadDir/raw") {
+            mkdir "$downloadDir/raw" or die "Could not create dir [$downloadDir/raw] error: [$!]\n";
+            }
+    }
+    $target =~ s/ //g;  # removes space in ".bed 5.gz" for example
+    unlink $target;
 }
 
 exit(0);