2113f4ab5376a69e9169e34db99d99a6ce20f8d5 hiram Fri Mar 25 12:05:18 2022 -0700 better name for refSeq variable refs #29111 diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl index e9d632a..798f776 100755 --- src/hg/utils/automation/asmHubChromAlias.pl +++ src/hg/utils/automation/asmHubChromAlias.pl @@ -112,74 +112,74 @@ my $hashPtr = $aliasOut{$source}; # already done, verify it is equivalent to previous request if (defined($hashPtr->{$alias})) { if ($sequence ne $hashPtr->{$alias}) { printf STDERR "ERROR: additional alias '%s:%s' does not match previous '%s'\n", $alias, $sequence, $hashPtr->{$alias}; exit 255; } return; } $hashPtr->{$alias} = $sequence; return; } # asmSource - is this a genbank or refseq assembly my $asmSource = "genbank"; -my $refSeq = 0; # == 0 for Genbank assembly, == 1 for RefSeq assembly +my $isRefSeq = 0; # == 0 for Genbank assembly, == 1 for RefSeq assembly if ($asmId =~ m/^GCF/) { # printf STDERR "# processing a RefSeq assembly\n"; - $refSeq = 1; + $isRefSeq = 1; $asmSource = "refseq"; } else { # printf STDERR "# processing a GenBank assembly\n"; } my $twoBit = "../../$asmId.2bit"; open (FH, "twoBitInfo $twoBit stdout|") or die "can not twoBitInfo $twoBit stdout"; while (my $line = <FH>) { chomp $line; my ($name, $size) = split('\s+', $line); $sequenceSizes{$name} = $size; ++$sequenceCount; } close (FH); # printf STDERR "# counted %d sequence names in the twoBit file\n", $sequenceCount; my $nameCount = 0; my %ncbiToUcsc; # key is NCBI sequence name, value is 'chr' UCSC chromosome name my %ucscToNcbi; # key is 'chr' UCSC name, value is NCBI sequence name open (FH, "cat ../../sequence/*.names|") or die "can not cat ../../sequence/*.names"; while (my $line = <FH>) { chomp $line; my ($ucscName, $seqName) = split('\s+', $line); $ncbiToUcsc{$seqName} = $ucscName; $ucscToNcbi{$ucscName} = $seqName; ++$nameCount; $ucscNames = 1 if (defined($sequenceSizes{$ucscName})); - if ($refSeq) { + if ($isRefSeq) { $ucscToRefSeq{$ucscName} = $seqName; } else { $ucscToGenbank{$ucscName} = $seqName; } } close (FH); # when not a UCSC named assembly, add the UCSC names as aliases if (! $ucscNames) { - if ($refSeq) { + if ($isRefSeq) { foreach my $ucscName (sort keys %ucscToRefSeq) { addAlias("ucsc", $ucscName, $ucscToRefSeq{$ucscName}); } } else { foreach my $ucscName (sort keys %ucscToGenbank) { addAlias("ucsc", $ucscName, $ucscToGenbank{$ucscName}); } } } my $dupsNotFound = 0; my $dupsList = "../../download/$asmId.dups.txt.gz"; if ( -s "$dupsList" ) { open (FH, "zcat $dupsList | awk '{print \$1, \$3}'|") or die "can not read $dupsList"; while (my $line = <FH>) { @@ -262,57 +262,57 @@ my $dbgCount = 0; # printf STDERR "# third set processing assembly_report\n"; # column 1 is the 'assembly' name # column 5 is the GenBank-Accn, column 7 is the RefSeq-Accn open (FH, "grep -v '^#' ../../download/${asmId}_assembly_report.txt | cut -d\$'\t' -f1,5,7|") or die "can not grep assembly_report"; while (my $line = <FH>) { chomp $line; ++$dbgCount; my ($asmName, $gbkName, $refSeqName) = split('\s+', $line); printf STDERR "# '%s'\t'%s'\t'%s'\n", $asmName, $gbkName, $refSeqName if ($dbgCount < 5); # next if ($refSeqName eq "na"); # may not be any RefSeq name # next if ($gbkName eq "na"); # may not be any GenBank name if ($refSeqName ne "na") { my $seqName = $refSeqName; - if (! $refSeq) { + if (! $isRefSeq) { $seqName = $gbkName; } if ($ucscNames) { $seqName = $ncbiToUcsc{$seqName}; } if (!defined($seqName)) { if (defined($aliasOut{"refseq"})) { if (defined($aliasOut{"refseq"}->{$refSeqName})) { $seqName = $aliasOut{"refseq"}->{$refSeqName}; } } } if (defined($seqName)) { if (defined($dupToSequence{$seqName})) { addAlias("refseq", $refSeqName, $dupToSequence{$seqName}); addAlias("assembly", $asmName, $dupToSequence{$seqName}); } else { addAlias("refseq", $refSeqName, $seqName); addAlias("assembly", $asmName, $seqName); } } } # if ($refSeqName ne "na") if ($gbkName ne "na") { my $seqName = $gbkName; - if ($refSeq) { + if ($isRefSeq) { $seqName = $refSeqName; } if ($ucscNames) { $seqName = $ncbiToUcsc{$seqName}; } if (!defined($seqName)) { if (defined($aliasOut{"genbank"})) { if (defined($aliasOut{"genbank"}->{$gbkName})) { $seqName = $aliasOut{"genbank"}->{$gbkName}; } } } if (defined($seqName)) { if (defined($dupToSequence{$seqName})) { addAlias("genbank", $gbkName, $dupToSequence{$seqName});