src/hg/encode/encodeValidate/doEncodeValidate.pl 1.155

1.155 2009/02/19 21:44:31 larrym
deal with +/- in cell type
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.154
retrieving revision 1.155
diff -b -B -U 4 -r1.154 -r1.155
--- src/hg/encode/encodeValidate/doEncodeValidate.pl	13 Feb 2009 23:46:02 -0000	1.154
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl	19 Feb 2009 21:44:31 -0000	1.155
@@ -66,8 +66,10 @@
 our $time0 = time;
 our $timeStart = time;
 our %chromInfo;         # chromInfo from assembly for chrom validation
 our $maxBedRows=50_000_000; # number of rows to allow in a bed-type file
+our %tableNamesUsed;
+our ($grants, $fields, $daf);
 
 
 sub usage {
     print STDERR <<END;
@@ -122,8 +124,19 @@
     warn("# $msg : $t secs".($lines>0 ? "  ($lines lines, ".(int($lines/$t))." lines/sec)" : ""));
     $time0 = time;
 }
 
+sub dieTellWrangler
+{
+    my ($msg) = @_;
+    my $email;
+    if($grants->{$daf->{grant}} && $grants->{$daf->{grant}}{wranglerEmail}) {
+        $email = $grants->{$daf->{grant}}{wranglerEmail};
+    }
+    $msg .= "Please contact your wrangler" . (defined($email) ? " at $email" : "") . "\n";
+    die $msg;
+}
+
 ############################################################################
 # Validators for DDF columns -- extend when adding new metadata fields
 #
 # validators should return list of errors encountered (empty list means no errors were found).
@@ -1227,10 +1240,10 @@
     }
 }
 
 # labs is now in fact the list of grants (labs are w/n grants, and are not currently validated).
-my $grants = Encode::getGrants($configPath);
-my $fields = Encode::getFields($configPath);
+$grants = Encode::getGrants($configPath);
+$fields = Encode::getFields($configPath);
 
 if($opt_validateDaf) {
     if(-f $submitDir) {
         Encode::parseDaf($submitDir, $grants, $fields);
@@ -1240,9 +1253,9 @@
     print STDERR "DAF is valid\n";
     exit(0);
 }
 
-my $daf = Encode::getDaf($submitDir, $grants, $fields);
+$daf = Encode::getDaf($submitDir, $grants, $fields);
 
 my $db = HgDb->new(DB => $daf->{assembly});
 $db->getChromInfo(\%chromInfo);
 
@@ -1587,8 +1600,11 @@
         my %hash = map { $_ => $ddfLine->{$_} } @variables;
         for my $var (@variables) {
             my $val = $hash{$var};
             $val = ucfirst(lc($val));
+            # trailing + => Plus, - => Neg (e.g. H9ES-AFP+)
+            $val =~ s/\+$/Pos/;
+            $val =~ s/\-$/Neg/;
             $tableName = $tableName . $val;
         }
 
         my $shortSuffix = "";
@@ -1653,8 +1669,12 @@
     $tableName =~ s/[^A-Za-z0-9]//g;
 
     die "Table name [$tableName] too long, must be <= 64 chars, got [".length($tableName)."]\n" if length($tableName) > 64;
 
+    if($tableNamesUsed{$tableName}++) {
+        dieTellWrangler("System Error: identical tableName '$tableName' was generated by multiple data set\n");
+    }
+
     if(!$opt_allowReloads) {
         if($db->quickQuery("select count(*) from trackDb where tableName = ?", $tableName)) {
             die "view '$view' has already been loaded as track '$tableName'\nPlease contact your wrangler if you need to reload this data\n";
         }