5ab456ddf76e0e021fe6a7f5fb821888edbf1c2f
hiram
  Mon Nov 28 13:27:24 2022 -0800
more generic use of file names refs #30326

diff --git src/hg/utils/automation/chromAlias.pl src/hg/utils/automation/chromAlias.pl
index 6d30abd..9ba2119 100755
--- src/hg/utils/automation/chromAlias.pl
+++ src/hg/utils/automation/chromAlias.pl
@@ -1,75 +1,75 @@
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 use File::Basename;
 
 my $argc = scalar(@ARGV);
 if ($argc < 1) {
   printf STDERR "usage: chromAlias.pl <ucsc.refseq.tab> <ucsc.genbank.tab> \\\n\t<ucsc.ensembl.tab> <ucsc.others.tab> > <db>.chromAlias.tab\n";
   printf STDERR "must have at least one of these input files, others when available\n";
   printf STDERR "the names of the input files must be of this pattern so\n";
   printf STDERR "the name of the alias can be identified\n";
   exit 255;
 }
 
 my %names;  # key is name identifier (refseq, genbank, ensembl, flybase, etc...)
 		#  value is a hash with key identifer name, value ucsc chr name
 my %chrNames;	# key is UCSC chrom name, value is number of times seen
 
 while (my $file = shift @ARGV) {
   my $name = $file;
-  $name =~ s/ucsc.//;
+  $name =~ s/^[^.]+.//;
   $name =~ s/.tab//;
   printf STDERR "# working: %s\n", $name;
   my $namePtr;
   if (exists($names{$name})) {
      $namePtr = $names{$name};
   } else {
      my %nameHash;
      $namePtr = \%nameHash;
      $names{$name} = $namePtr;
   }
   open (FH, "<$file") or die "can not read $file";
   while (my $line = <FH>) {
      chomp $line;
      my ($chr, $other) = split('\t+', $line);
      if (exists($namePtr->{$chr})) {
        printf STDERR "# warning, identical UCSC chrom $chr in $name for $other\n";
        $namePtr->{$chr} = sprintf("%s\t%s", $namePtr->{$chr}, $other);
      } else {
        $namePtr->{$chr} = $other;
      }
      $chrNames{$chr} += 1;
    }
    close (FH);
 }
 
 foreach my $chr (sort keys %chrNames) {
   my %outNames;	# key is other identifier, value is csv list of sources
   foreach my $name (sort keys %names) {
     my $namePtr = $names{$name};
     if (exists($namePtr->{$chr})) {
       my $otherId = $namePtr->{$chr};
       if (! $otherId) {
         die "namePtr->chr exists but is |$otherId| for chr |$chr| (tab-sep?)";
       }
       my @a;
       if ($otherId =~ m/\t/) {
 	  @a = split('\t', $otherId);
       } else {
 	  $a[0] = $otherId;
       }
       for (my $i = 0; $i < scalar(@a); ++$i) {
          if (exists($outNames{$a[$i]})) {
 	    $outNames{$a[$i]} = sprintf("%s,%s", $outNames{$a[$i]}, $name);
          } else {
 	    $outNames{$a[$i]} = $name;
          }
       }
     }
   }
   foreach my $otherName (sort keys %outNames) {
 	printf "%s\t%s\t%s\n", $otherName, $chr, $outNames{$otherName};
   }
 }