6ab4a88e1c9d7de5e15ecc3a6699c53bb4c60935
angie
  Wed Feb 3 13:45:17 2021 -0800
Use Date::Parse to handle more data formats that are finding their way into BioSample.

diff --git src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl
index db3798a..037e12b 100755
--- src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl
+++ src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl
@@ -1,56 +1,54 @@
 #!/usr/bin/env perl
 
 use warnings;
 use strict;
+use Date::Parse;
+
 
 sub usage() {
   print STDERR "usage: $0 biosample.tab [gbMetadata.tab]\n";
   exit 1;
 }
 
 # Read in and store distilled BioSample metadata; stream through GenBank metadata
 # (from NCBI Virus / NCBI Datasets) and add in collection date and isolate name
 # from BioSample when missing from GenBank.  Report any conflicting dates.
 
 my @months = qw(jan feb mar apr may jun jul aug sep oct nov dec);
 
 sub normalizeDate($) {
   # Convert "25-Jan-2020" to 2020-01-25, "19-MAR-2020" to 2020-03-19...
   my ($dateIn) = @_;
+  $dateIn =~ s/-00//g;
+  if (! $dateIn) {
+    return "";
+  } elsif ($dateIn =~ /^\d\d\d\d(-\d\d)*$/) {
+    return $dateIn;
+  } else {
+    my ($ss,$mm,$hh,$day,$month,$year,$zone) = strptime($dateIn);
     my $dateOut = "";
-  if ($dateIn) {
-    if ($dateIn =~ /^\d\d\d\d(-\d\d)?(-\d\d)?$/) {
-      $dateOut = $dateIn;
-    } elsif ($dateIn =~ /^((\d\d?)-)?(\w\w\w)-(\d\d\d\d)$/) {
-      my ($day, $month, $year) = ($2, lc($3), $4);
-      my ($mIx) = grep { $months[$_] eq $month } (0 .. @months-1);
-      if (! defined $mIx) {
-        die "Unrecognized month '$month' in '$dateIn'";
-      }
-      $month = $mIx + 1;
     if ($day) {
-        $dateOut = sprintf("%04d-%02d-%02d", $year, $month, $day);
-      } else {
-        $dateOut = sprintf("%04d-%02d", $year, $month);
-      }
-    } else {
-      die "Unrecognized date format '$dateIn'";
-    }
+      $dateOut = sprintf("%04d-%02d-%02d", $year+1900, $month+1, $day);
+    } elsif ($month) {
+      $dateOut = sprintf("%04d-%02d", $year+1900, $month+1);
+    } elsif ($year) {
+      $dateOut = printf("%04d", $year+1900);
     }
     return $dateOut;
   }
+}
 
 my $biosampleFile = shift @ARGV;
 
 open(my $BIOSAMPLE, "<$biosampleFile") || die "Can't open $biosampleFile: %!\n";
 
 my %b2Name = ();
 my %b2Date = ();
 my %b2Country = ();
 while (<$BIOSAMPLE>) {
   my (undef, $bAcc, $name, $date, undef, undef, $country) = split("\t");
   $b2Name{$bAcc} = $name;
   $b2Date{$bAcc} = $date;
   $b2Country{$bAcc} = $country;
 }
 close($BIOSAMPLE);