5ab456ddf76e0e021fe6a7f5fb821888edbf1c2f hiram Mon Nov 28 13:27:24 2022 -0800 more generic use of file names refs #30326 diff --git src/hg/utils/automation/chromAlias.pl src/hg/utils/automation/chromAlias.pl index 6d30abd..9ba2119 100755 --- src/hg/utils/automation/chromAlias.pl +++ src/hg/utils/automation/chromAlias.pl @@ -1,75 +1,75 @@ #!/usr/bin/env perl use strict; use warnings; use File::Basename; my $argc = scalar(@ARGV); if ($argc < 1) { printf STDERR "usage: chromAlias.pl <ucsc.refseq.tab> <ucsc.genbank.tab> \\\n\t<ucsc.ensembl.tab> <ucsc.others.tab> > <db>.chromAlias.tab\n"; printf STDERR "must have at least one of these input files, others when available\n"; printf STDERR "the names of the input files must be of this pattern so\n"; printf STDERR "the name of the alias can be identified\n"; exit 255; } my %names; # key is name identifier (refseq, genbank, ensembl, flybase, etc...) # value is a hash with key identifer name, value ucsc chr name my %chrNames; # key is UCSC chrom name, value is number of times seen while (my $file = shift @ARGV) { my $name = $file; - $name =~ s/ucsc.//; + $name =~ s/^[^.]+.//; $name =~ s/.tab//; printf STDERR "# working: %s\n", $name; my $namePtr; if (exists($names{$name})) { $namePtr = $names{$name}; } else { my %nameHash; $namePtr = \%nameHash; $names{$name} = $namePtr; } open (FH, "<$file") or die "can not read $file"; while (my $line = <FH>) { chomp $line; my ($chr, $other) = split('\t+', $line); if (exists($namePtr->{$chr})) { printf STDERR "# warning, identical UCSC chrom $chr in $name for $other\n"; $namePtr->{$chr} = sprintf("%s\t%s", $namePtr->{$chr}, $other); } else { $namePtr->{$chr} = $other; } $chrNames{$chr} += 1; } close (FH); } foreach my $chr (sort keys %chrNames) { my %outNames; # key is other identifier, value is csv list of sources foreach my $name (sort keys %names) { my $namePtr = $names{$name}; if (exists($namePtr->{$chr})) { my $otherId = $namePtr->{$chr}; if (! $otherId) { die "namePtr->chr exists but is |$otherId| for chr |$chr| (tab-sep?)"; } my @a; if ($otherId =~ m/\t/) { @a = split('\t', $otherId); } else { $a[0] = $otherId; } for (my $i = 0; $i < scalar(@a); ++$i) { if (exists($outNames{$a[$i]})) { $outNames{$a[$i]} = sprintf("%s,%s", $outNames{$a[$i]}, $name); } else { $outNames{$a[$i]} = $name; } } } } foreach my $otherName (sort keys %outNames) { printf "%s\t%s\t%s\n", $otherName, $chr, $outNames{$otherName}; } }