fa023b2cfad8f6c0b0482dc7d89849d6d645077f hiram Wed Oct 13 12:22:54 2021 -0700 attempt to better manage NCBI names no redmine diff --git src/hg/utils/automation/gpToIx.pl src/hg/utils/automation/gpToIx.pl index 8450c2d..0c7c598 100755 --- src/hg/utils/automation/gpToIx.pl +++ src/hg/utils/automation/gpToIx.pl @@ -16,35 +16,53 @@ printf STDERR "then run ixIxx on ix.txt:\n"; printf STDERR " ixIxx ix.txt out.ix out.ixx\n"; exit 255; } my $gpFile = shift; if ($gpFile =~ m/.gz$/) { open (FH, "zcat $gpFile|") or die "ERROR: gpToIx.pl can not read '$gpFile'"; } else { open (FH, "<$gpFile") or die "ERROR: gpToIx.pl can not read '$gpFile'"; } while (my $line = <FH>) { next if ($line =~ m/^#/); chomp ($line); - my ($name, $chrom, $strand, $txStart, $txEnd, $cdsStart, $cdsEnd, $exonCount, $exonStarts, $exonEnds, $score, $name2, $cdsStartStat, $cdsEndStat, $exonFrames) = split('\s+', $line); + my ($itemName, $chrom, $strand, $txStart, $txEnd, $cdsStart, $cdsEnd, $exonCount, $exonStarts, $exonEnds, $score, $name2, $cdsStartStat, $cdsEndStat, $exonFrames) = split('\s+', $line); + my $name = $itemName; my $extraNames = ""; + my $ncbiName = ""; + my $isNcbiName = 0; + $isNcbiName = 1 if ($name =~ m/\|/); + if ($isNcbiName) { + my @ncbiParts = split('\|', $name); + if (defined($ncbiParts[2])) { + $name = $ncbiParts[2]; + $name =~ s/^mrna.//; + $extraNames = $name; + } + } my $noSuffix=$name; $noSuffix =~ s/\.[0-9][0-9]*$//; - $extraNames = $noSuffix if (($noSuffix ne $name) && (length($noSuffix) > 0)); + if (($noSuffix ne $name) && (length($noSuffix) > 0)) { + if (length($extraNames) > 0) { + $extraNames .= "\t" . $noSuffix; + } else { + $extraNames = $noSuffix; + } + } if (defined($name2)) { if ($name !~ m/\Q$name2\E/i) { if (length($extraNames) > 0) { $extraNames .= "\t" . $name2; } else { $extraNames = $name2; } $noSuffix = $name2; $noSuffix =~ s/\.[0-9][0-9]*$//; $extraNames .= "\t" . $noSuffix if (($noSuffix ne $name2) && (length($noSuffix) > 0)); } } - printf "%s\t%s\n", $name, $extraNames if (length($extraNames) > 0); + printf "%s\t%s\n", $itemName, $extraNames if (length($extraNames) > 0); } close (FH);