324671bac877d29cafa50dbc8e944c8c554581aa hiram Mon Aug 24 15:04:28 2020 -0700 output bed file with every possible name in a database genome refs #24396 diff --git src/hg/utils/automation/chromAliasBedTest.pl src/hg/utils/automation/chromAliasBedTest.pl new file mode 100755 index 0000000..b6a7894 --- /dev/null +++ src/hg/utils/automation/chromAliasBedTest.pl @@ -0,0 +1,66 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +my $argc = scalar(@ARGV); + +if ($argc != 1) { + printf STDERR "usage: chromAliasBedTest.pl > chromAlias..bed\n"; + printf STDERR "writes to STDOUT a bed file that has every chrom name\n"; + exit 255; +} +# +----------+------------------+------+-----+---------+-------+ +# | Field | Type | Null | Key | Default | Extra | +# +----------+------------------+------+-----+---------+-------+ +# | chrom | varchar(255) | NO | PRI | NULL | | +# | size | int(10) unsigned | NO | | NULL | | +# | fileName | varchar(255) | YES | | NULL | | +# +----------+------------------+------+-----+---------+-------+ + +my $db = shift; + +my %chromSizes; # key is chrom value is size +open (FH, "hgsql -N -e 'select * from chromInfo;' $db|") or die "can not hgsql select from chromInfo.$db"; +while (my $line = ) { + chomp $line; + my ($chrom, $size, $fileName) = split('\s+', $line); + $chromSizes{$chrom} = $size; +} +close (FH); + +# foreach my $chr (sort keys %chromSizes) { +# printf "%s\t%d\n", $chr, $chromSizes{$chr}; +# } + +my %chromAlias; # key is external name value is UCSC name + +open (FH, "hgsql -N -e 'select * from chromAlias;' $db|") or die "can not hgsql select from chromAlias.$db"; +while (my $line = ) { + chomp $line; + my ($external, $ucsc) = split('\s+', $line); + $chromAlias{$external} = $ucsc; +} +close (FH); + +# output bed item for each external name +my %ucscUsed; # key is UCSC name, value is csv list of external names +foreach my $external (sort keys %chromAlias) { + my $ucscName = $chromAlias{$external}; + printf "%s\t0\t%s\t%s\n", $external, $chromSizes{$ucscName}, $ucscName; + if (defined($ucscUsed{$ucscName})) { + $ucscUsed{$ucscName} .= "," . $external; + } else { + $ucscUsed{$ucscName} = $external; + } +} + +# catch up for those UCSC names not mentioned in chromAlias +foreach my $ucscName (sort keys %chromSizes) { + if (!defined($ucscUsed{$ucscName})) { + printf "%s\t0\t%s\tnoAlias\n", $ucscName, $chromSizes{$ucscName}; + } else { + printf "%s\t0\t%s\t%s\n", $ucscName, $chromSizes{$ucscName}, $ucscUsed{$ucscName}; + } +} +