b2b24fc1972e32973020518288369e2582a7ffcf hiram Wed Sep 30 12:39:12 2020 -0700 fixup errors in building a hub without UCSC names refs #24386 diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl index 68a5561..31409c1 100755 --- src/hg/utils/automation/asmHubChromAlias.pl +++ src/hg/utils/automation/asmHubChromAlias.pl @@ -8,35 +8,36 @@ printf STDERR "usage: asmHubChromAlias.pl asmId > asmId.chromAlias.tab\n\n"; printf STDERR "where asmId is something like: GCF_000001735.4_TAIR10.1\n"; printf STDERR "Outputs a tab file suitable for processing with ixIxx to\n"; printf STDERR "create an index file to use in an assembly hub.\n"; printf STDERR "This command assumes it is in a work directory in the\n"; printf STDERR "assembly hub: .../asmId/trackData/chromAlias/\n"; printf STDERR "and .../asmId/downloads/ and .../asmId/sequence/ have\n"; printf STDERR "been created and processed for the hub build.\n"; exit 255; } my %ucscToRefSeq; # key is UCSC sequence name, value is RefSeq name my %ucscToGenbank; # key is UCSC sequence name, value is GenBank name my $ucscNames = 0; # == 1 if sequence is UCSC names, == 0 if NCBI names my $dupCount = 0; +my $asmId = shift; my %aliasOut; # key is alias name, value is sequence name in this assembly sub showAlias() { - printf "# sequenceName\talias names\n"; + printf "# sequenceName\talias names\tassembly: %s\n", $asmId; my %chromIndex; # key is sequence name in assembly, value # is a tab separated list of aliases foreach my $alias (sort keys %aliasOut) { my $name = $aliasOut{$alias}; next if ($alias eq "na"); if (defined($chromIndex{$name})) { $chromIndex{$name} .= "\t" . $alias; } else { $chromIndex{$name} = $alias; } } foreach my $name (sort keys %chromIndex) { printf "%s\t%s\n", $name, $chromIndex{$name}; } } @@ -47,32 +48,30 @@ my ($alias, $sequence) = @_; # do not need to add the sequence name itself return if ($alias eq $sequence); # already done, verify it is equivalent to previous request if (defined($aliasOut{$alias})) { if ($sequence ne $aliasOut{$alias}) { printf STDERR "ERROR: additional alias '%s:%s' does not match previous '%s'\n", $alias, $sequence, $aliasOut{$alias}; exit 255; } return; } $aliasOut{$alias} = $sequence; return; } -my $asmId = shift; - my $refSeq = 0; # == 0 for Genbank assembly, == 1 for RefSeq assembly $refSeq = 1 if ($asmId =~ m/^GCF/); my $twoBit = "../../$asmId.2bit"; my $sequenceCount = 0; my %sequenceSizes; # key is sequence name, value is sequence size open (FH, "twoBitInfo $twoBit stdout|") or die "can not twoBitInfo $twoBit stdout"; while (my $line = <FH>) { chomp $line; my ($name, $size) = split('\s+', $line); $sequenceSizes{$name} = $size; ++$sequenceCount; }