d4caa9d01a63434e9859ed47971217ff9a20edb9
angie
  Sat Dec 23 19:14:20 2023 -0800
Add country and year to isolate names that don't have them, analogous to fixNcbiFastaNames.pl.

diff --git src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl
index 88dca79..df6b74b 100755
--- src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl
+++ src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl
@@ -65,30 +65,37 @@
         $gbDate = $bDate;
       } elsif ($bDate && $gbDate ne $bDate) {
         print STDERR join("\t", "dateMismatch", $gbAcc, $gbName, $gbDate, $bAcc, $bName, $bDate) .
           "\n";
       }
       if (! $gbName) {
         $gbName = $bName;
       } elsif (($gbName eq '1' || $gbName eq 'NA') && length($bName) > length($gbName)) {
         $gbName = $bName;
       } elsif ($gbName eq 'nasopharyngeal' && $bName =~ m/\d/) {
         $gbName = $bName;
       }
       if (! $gbGeo) {
         $gbGeo = $bCountry;
       }
+      if ($gbName !~ m@/@ && $gbGeo ne "" && $gbDate =~ /^\d{4}/) {
+        my $country = $gbGeo;
+        $country =~ s/:.*//;  $country =~ s/ //g;
+        my $year = $gbDate;
+        $year =~ s/^(\d{4}).*/$1/;
+        $gbName = "$country/$gbName/$year";
+      }
       print join("\t", $gbAcc, $bAcc, $gbDate, $gbGeo, $host, $gbName, $completeness, $len);
     } else {
       # BioSample file doesn't have info for this BioSample accession
       print STDERR "Missing BioSample info for $bAcc\n";
       $missingCount++;
       if ($missingCount > $maxMissing) {
         die "Too many missing BioSamples (> $maxMissing), quitting.\n";
       }
       # Pass through as-is
       print;
     }
   } else {
     # No associated BioSample, just pass through as-is
     print;
   }