57a2bac4726971e593dc92902803e55667782882 hiram Mon May 22 13:34:49 2023 -0700 adding Ensembl relations in chromAlias and new extended ensGene track to host Rapid Release ensGene refs #31332 diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl index 524146a..b910503 100755 --- src/hg/utils/automation/asmHubChromAlias.pl +++ src/hg/utils/automation/asmHubChromAlias.pl @@ -174,30 +174,51 @@ chomp $line; my ($native, $alias) = split('\s+', $line); if (!defined($sequenceSizes{$native})) { printf STDERR "ERROR: processing customNames.tsv given native name\n"; printf STDERR " '%s' that does not exist (alias: %s)\n", $native, $alias; exit 255; } $customName{$native} = $alias; ++$customNameCount; addAlias("custom", $alias, $native); } close (FH); printf STDERR "# read %d custom alias names from customNames.tsv\n", $customNameCount; } +my %ensemblName; # key is native sequence name, value is a ensembl alias +my $ensemblNameCount = 0; + +if ( -s "ensemblNames.tsv" ) { + open (FH, "<ensemblNames.tsv") or die "can not read ensemblNames.tsv"; + while (my $line = <FH>) { + chomp $line; + my ($native, $alias) = split('\s+', $line); + if (!defined($sequenceSizes{$native})) { + printf STDERR "ERROR: processing ensemblNames.tsv given native name\n"; + printf STDERR " '%s' that does not exist (alias: %s)\n", $native, $alias; + exit 255; + } + $ensemblName{$native} = $alias; + ++$ensemblNameCount; + addAlias("ensembl", $alias, $native); + } + close (FH); + printf STDERR "# read %d ensembl alias names from ensemblNames.tsv\n", $ensemblNameCount; +} + my $nameCount = 0; my %ncbiToUcsc; # key is NCBI sequence name, value is 'chr' UCSC chromosome name my %ucscToNcbi; # key is 'chr' UCSC name, value is NCBI sequence name open (FH, "cat ../../sequence/*.names|") or die "can not cat ../../sequence/*.names"; while (my $line = <FH>) { chomp $line; my ($ucscName, $seqName) = split('\s+', $line); $ncbiToUcsc{$seqName} = $ucscName; $ucscToNcbi{$ucscName} = $seqName; ++$nameCount; $ucscNames = 1 if (defined($sequenceSizes{$ucscName})); if ($isRefSeq) { $ucscToRefSeq{$ucscName} = $seqName; } else { $ucscToGenbank{$ucscName} = $seqName;