be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl index db4c464..d43d5aa 100755 --- src/hg/utils/automation/asmHubChromAlias.pl +++ src/hg/utils/automation/asmHubChromAlias.pl @@ -152,31 +152,33 @@ # foreach my $sequence (sort keys %chrNames) { # printf "%s\t%s\n", $chrNames{$sequence}, $sequence; # } ### next set of names are the equivalents declared by NCBI ### if they exist my %chr2acc; # key is sequence name, value is NCBI chromosome name my $asmStructCount = `ls ../../download/${asmId}_assembly_structure/*/*/chr2acc 2> /dev/null | wc -l`; chomp $asmStructCount; if ( $asmStructCount > 0 ) { printf STDERR "# second name set processing chr2acc files\n"; open (FH, "grep -h -v '^#' ../../download/${asmId}_assembly_structure/*/*/chr2acc|") or die "can not grep chr2acc files"; while (my $line = <FH>) { chomp $line; - my ($alias, $seqName) = split('\s+', $line); + my ($alias, $seqName) = split('\t', $line); + $alias =~ s/ /_/g; # some assemblies have spaces in chr names ... + $alias =~ s/:/_/g; # one assembly had : in a chr name $chr2acc{$seqName} = $alias; if ($ucscNames) { $seqName = $ncbiToUcsc{$seqName}; } addAlias($alias, $seqName); } close (FH); } # foreach my $sequence (sort keys %chr2acc) { # printf "%s\t%s\n", $chr2acc{$sequence}, $sequence; # } my %gbk2acc; # key is refseq name, value is genbank accession printf STDERR "# third set processing assembly_report\n";