5308f161d60d310ffe5a80e7755e835ff7ea7a9f
hiram
  Tue Sep 13 12:47:32 2022 -0700
OK to allow duplicate names no redmine

diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl
index 0f5318d..6fbdfba 100755
--- src/hg/utils/automation/asmHubChromAlias.pl
+++ src/hg/utils/automation/asmHubChromAlias.pl
@@ -96,32 +96,36 @@
   foreach my $seqName (sort keys %chromIndex) {
     printf "%s\t%s\n", $seqName, $chromIndex{$seqName};
   }
 }
 
 #  given an alias and a sequence name, add to result or verify identical
 #  to previous add
 sub addAlias($$$) {
   my ($source, $alias, $sequence) = @_;
   if ($alias eq "na") {
     return;
   }
   if ($sequence eq "na") {
     return;
   }
+  # it is OK to allow duplicate names, different naming authorities could
+  # have the same name, found for example in GCF_006542625.1_Asia_NLE_v1
+  # which has UCSC names identical to 'assembly' names and the hub has been
+  # build with UCSC names
   # do not need to add the sequence name itself
-  return if ($alias eq $sequence);
+  #  return if ($alias eq $sequence);
   if (!defined($aliasOut{$source})) {
      my %h;	# hash: key: alias name, value 'native' chrom name
      $aliasOut{$source} = \%h;
 #     printf STDERR "# creating aliasOut{'%s'}\n", $source;
   }
   my $hashPtr = $aliasOut{$source};
   # already done, verify it is equivalent to previous request
   if (defined($hashPtr->{$alias})) {
      if ($sequence ne $hashPtr->{$alias}) {
         printf STDERR "ERROR: additional alias '%s:%s' does not match previous '%s'\n", $alias, $sequence, $hashPtr->{$alias};
         exit 255;
      }
      return;
   }
   $hashPtr->{$alias} = $sequence;
@@ -296,42 +300,48 @@
   chomp $line;
   ++$dbgCount;
   my ($asmName, $gbkName, $refSeqName) = split('\t', $line);
   $asmName =~ s/ /_/g;	# some assemblies have spaces in chr names ...
   $asmName =~ s/:/_/g;	# one assembly had : in chr name
   if (defined($dupToSequence{$asmName})) {   # avoid duplicates
      printf STDERR "# skipping duplicate name $asmName\n";
      next;
   } elsif (defined($dupToSequence{$gbkName})) {   # avoid duplicates
      printf STDERR "# skipping duplicate name $gbkName\n";
      next;
   } elsif (defined($dupToSequence{$refSeqName})) {   # avoid duplicates
      printf STDERR "# skipping duplicate name $refSeqName\n";
      next;
   }
-  printf STDERR "# asmRpt: '%s'\t'%s'\t'%s'\n", $asmName, $gbkName, $refSeqName if ($dbgCount < 5);
 #  next if ($refSeqName eq "na");	# may not be any RefSeq name
 #  next if ($gbkName eq "na");	# may not be any GenBank name
   # fill in ncbiToUcsc for potentially the 'other' NCBI name
   if (defined($ncbiToUcsc{$refSeqName}) && !defined($ncbiToUcsc{$gbkName})) {
     $ncbiToUcsc{$gbkName} = $ncbiToUcsc{$refSeqName};
     $ucscToNcbi{$ncbiToUcsc{$refSeqName}} = $gbkName;
   }
   if (defined($ncbiToUcsc{$gbkName}) && !defined($ncbiToUcsc{$refSeqName})) {
     $ncbiToUcsc{$refSeqName} = $ncbiToUcsc{$gbkName};
     $ucscToNcbi{$ncbiToUcsc{$gbkName}} = $refSeqName;
   }
+  if (defined($ncbiToUcsc{$gbkName})) {
+     printf STDERR "# asmRpt: '%s'\t'%s'\t'%s'\t'%s'\n", $asmName, $gbkName, $refSeqName, $ncbiToUcsc{$gbkName} if ($dbgCount < 5);
+  } elsif (defined($ncbiToUcsc{$refSeqName})) {
+     printf STDERR "# asmRpt: '%s'\t'%s'\t'%s'\t'%s'\n", $asmName, $gbkName, $refSeqName, $ncbiToUcsc{$refSeqName} if ($dbgCount < 5);
+  } else {
+     printf STDERR "# asmRpt: '%s'\t'%s'\t'%s'\tno UCSC name\n", $asmName, $gbkName, $refSeqName if ($dbgCount < 5);
+  }
   if ($refSeqName ne "na") {
     my $seqName = $refSeqName;
     if (! $isRefSeq) {
       $seqName = $gbkName;
     }
     if ($ucscNames) {
        $seqName = $ncbiToUcsc{$seqName};
     }
     if (!defined($seqName)) {
        if (defined($aliasOut{"refseq"})) {
          if (defined($aliasOut{"refseq"}->{$refSeqName})) {
             $seqName = $aliasOut{"refseq"}->{$refSeqName};
          }
        }
     }