0f9627d32b1a95d56c1078595924d387375e487d
hiram
  Tue Sep 13 14:37:39 2022 -0700
the chromAlias needs to know about the NCBI asmId refs #29811

diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl
index 6fbdfba..ffa9187 100755
--- src/hg/utils/automation/asmHubChromAlias.pl
+++ src/hg/utils/automation/asmHubChromAlias.pl
@@ -1,43 +1,44 @@
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $argc = scalar(@ARGV);
-if ($argc != 1) {
-  printf STDERR "usage: asmHubChromAlias.pl asmId > asmId.chromAlias.tab\n\n";
+if ($argc != 2) {
+  printf STDERR "usage: asmHubChromAlias.pl asmId ncbiAsmId > asmId.chromAlias.tab\n\n";
   printf STDERR "where asmId is something like: GCF_000001735.4_TAIR10.1\n";
   printf STDERR "Outputs a tab file suitable for processing with ixIxx to\n";
   printf STDERR "create an index file to use in an assembly hub.\n";
   printf STDERR "This command assumes it is in a work directory in the\n";
   printf STDERR "assembly hub: .../asmId/trackData/chromAlias/\n";
   printf STDERR "and ../../download/ and ../../sequence/ have\n";
   printf STDERR "been created and processed for the hub build.\n";
   exit 255;
 }
 
 my %ucscToRefSeq;	# key is UCSC sequence name, value is RefSeq name
 my %ucscToGenbank;	# key is UCSC sequence name, value is GenBank name
 my $ucscNames = 0;	# == 1 if sequence is UCSC names, == 0 if NCBI names
 my $dupCount = 0;
 my %dupToSequence;	# key is duplicate name, value is target sequence name
 			# to manage duplicates coming in from assembly report
 my $sequenceCount = 0;
 my %sequenceSizes;	# key is sequence name, value is sequence size
 
 my $asmId = shift;
+my $ncbiAsmId = shift;
 
 my %aliasOut;	# key is source name, value is a hash pointer where
 		# key is alias name and value is chrom name
 	# typical source names: ucsc, refseq, genbank, ensembl, assembly, ncbi
 
 # the argument is the name of the 'native' source name
 # where 'native' are the names in the assembly itself, thus the
 # assembly names.  The other sources are aliases to that.
 sub showAlias($) {
   my ($native) = @_;
   # second line is the column headers
   printf "# %s", $native;
   foreach my $source (sort keys %aliasOut) {
     printf "\t%s", $source if ($source ne $native);
   }
@@ -123,31 +124,31 @@
   # already done, verify it is equivalent to previous request
   if (defined($hashPtr->{$alias})) {
      if ($sequence ne $hashPtr->{$alias}) {
         printf STDERR "ERROR: additional alias '%s:%s' does not match previous '%s'\n", $alias, $sequence, $hashPtr->{$alias};
         exit 255;
      }
      return;
   }
   $hashPtr->{$alias} = $sequence;
   return;
 }
 
 # asmSource - is this a genbank or refseq assembly
 my $asmSource = "genbank";
 my $isRefSeq = 0; #       == 0 for Genbank assembly, == 1 for RefSeq assembly
-if ($asmId =~ m/^GCF/) {
+if ($ncbiAsmId =~ m/^GCF/) {
 #  printf STDERR "# processing a RefSeq assembly\n";
   $isRefSeq = 1;
   $asmSource = "refseq";
 } else {
 #  printf STDERR "# processing a GenBank assembly\n";
 }
 
 my $twoBit = "../../$asmId.2bit";
 
 open (FH, "twoBitInfo $twoBit stdout|") or die "can not twoBitInfo $twoBit stdout";
 while (my $line = <FH>) {
   chomp $line;
   my ($name, $size) = split('\s+', $line);
   $sequenceSizes{$name} = $size;
   ++$sequenceCount;