b1852a4dd2bbc53c29c822f0d6a0e40d1af7a713 hiram Wed Sep 23 10:11:32 2020 -0700 accomodate duplicate sequences refs #24396 diff --git src/hg/utils/automation/asmHubChromAlias.pl src/hg/utils/automation/asmHubChromAlias.pl index c8bfe1c..68a5561 100755 --- src/hg/utils/automation/asmHubChromAlias.pl +++ src/hg/utils/automation/asmHubChromAlias.pl @@ -91,32 +91,32 @@ $ucscNames = 1 if (defined($sequenceSizes{$ucscName})); if ($refSeq) { $ucscToRefSeq{$ucscName} = $seqName; } else { $ucscToGenbank{$ucscName} = $seqName; } } close (FH); my $dupsNotFound = 0; my $dupsList = "../../download/$asmId.dups.txt.gz"; if ( -s "$dupsList" ) { open (FH, "zcat $dupsList | awk '{print \$1, \$3}'|") or die "can not read $dupsList"; while (my $line = <FH>) { chomp $line; -# my ($dupAlias, $dupTarget) = split('\s+', $line); - my ($dupTarget, $dupAlias) = split('\s+', $line); + my ($dupAlias, $dupTarget) = split('\s+', $line); +### early version my ($dupTarget, $dupAlias) = split('\s+', $line); if ($ucscNames) { if (!defined($ncbiToUcsc{$dupTarget})) { printf STDERR "# ERROR: can not find dupTarget: $dupTarget in ncbiToUcsc for dupAlias: $dupAlias\n"; $dupsNotFound += 1; } else { addAlias($dupAlias, $ncbiToUcsc{$dupTarget}); } } elsif (defined($ucscToNcbi{$dupTarget})) { addAlias($dupAlias, $ucscToNcbi{$dupTarget}); } else { printf STDERR "# ERROR: can not find duplicate name $dupAlias for sequence $dupTarget\n"; $dupsNotFound += 1; } ++$dupCount; }