86fd61c5e8efe6d7ac42baa32b891fc1cf9e61bf hiram Thu Sep 22 13:57:04 2022 -0700 add the option of using a customNames.tsv file to add arbitrary listings refs #29982 diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl index ffa9187..64fe006 100755 --- src/hg/utils/automation/asmHubChromAlias.pl +++ src/hg/utils/automation/asmHubChromAlias.pl @@ -145,30 +145,51 @@ } my $twoBit = "../../$asmId.2bit"; open (FH, "twoBitInfo $twoBit stdout|") or die "can not twoBitInfo $twoBit stdout"; while (my $line = ) { chomp $line; my ($name, $size) = split('\s+', $line); $sequenceSizes{$name} = $size; ++$sequenceCount; } close (FH); # printf STDERR "# counted %d sequence names in the twoBit file\n", $sequenceCount; +my %customName; # key is native sequence name, value is a custom alias +my $customNameCount = 0; + +if ( -s "customNames.tsv" ) { + open (FH, ") { + chomp $line; + my ($native, $alias) = split('\s+', $line); + if (!defined($sequenceSizes{$native})) { + printf STDERR "ERROR: processing customNames.tsv given native name\n"; + printf STDERR " '%s' that does not exist (alias: %s)\n", $native, $alias; + exit 255; + } + $customName{$native} = $alias; + ++$customNameCount; + addAlias("custom", $alias, $native); + } + close (FH); + printf STDERR "# read %d custom alias names from customNames.tsv\n", $customNameCount; +} + my $nameCount = 0; my %ncbiToUcsc; # key is NCBI sequence name, value is 'chr' UCSC chromosome name my %ucscToNcbi; # key is 'chr' UCSC name, value is NCBI sequence name open (FH, "cat ../../sequence/*.names|") or die "can not cat ../../sequence/*.names"; while (my $line = ) { chomp $line; my ($ucscName, $seqName) = split('\s+', $line); $ncbiToUcsc{$seqName} = $ucscName; $ucscToNcbi{$ucscName} = $seqName; ++$nameCount; $ucscNames = 1 if (defined($sequenceSizes{$ucscName})); if ($isRefSeq) { $ucscToRefSeq{$ucscName} = $seqName; } else { $ucscToGenbank{$ucscName} = $seqName;