fa023b2cfad8f6c0b0482dc7d89849d6d645077f
hiram
  Wed Oct 13 12:22:54 2021 -0700
attempt to better manage NCBI names no redmine

diff --git src/hg/utils/automation/gpToIx.pl src/hg/utils/automation/gpToIx.pl
index 8450c2d..0c7c598 100755
--- src/hg/utils/automation/gpToIx.pl
+++ src/hg/utils/automation/gpToIx.pl
@@ -16,35 +16,53 @@
   printf STDERR "then run ixIxx on ix.txt:\n";
   printf STDERR " ixIxx ix.txt out.ix out.ixx\n";
   exit 255;
 }
 
 my $gpFile = shift;
 
 if ($gpFile =~ m/.gz$/) {
   open (FH, "zcat $gpFile|") or die "ERROR: gpToIx.pl can not read '$gpFile'";
 } else {
   open (FH, "<$gpFile") or die "ERROR: gpToIx.pl can not read '$gpFile'";
 }
 while (my $line = <FH>) {
   next if ($line =~ m/^#/);
   chomp ($line);
-  my ($name, $chrom, $strand, $txStart, $txEnd, $cdsStart, $cdsEnd, $exonCount, $exonStarts, $exonEnds, $score, $name2, $cdsStartStat, $cdsEndStat, $exonFrames) = split('\s+', $line);
+  my ($itemName, $chrom, $strand, $txStart, $txEnd, $cdsStart, $cdsEnd, $exonCount, $exonStarts, $exonEnds, $score, $name2, $cdsStartStat, $cdsEndStat, $exonFrames) = split('\s+', $line);
+  my $name = $itemName;
   my $extraNames = "";
+  my $ncbiName = "";
+  my $isNcbiName = 0;
+  $isNcbiName = 1 if ($name =~ m/\|/);
+  if ($isNcbiName) {
+    my @ncbiParts = split('\|', $name);
+    if (defined($ncbiParts[2])) {
+      $name = $ncbiParts[2];
+      $name =~ s/^mrna.//;
+      $extraNames = $name;
+    }
+  }
   my $noSuffix=$name;
   $noSuffix =~ s/\.[0-9][0-9]*$//;
-  $extraNames = $noSuffix if (($noSuffix ne $name) && (length($noSuffix) > 0));
+  if (($noSuffix ne $name) && (length($noSuffix) > 0)) {
+      if (length($extraNames) > 0) {
+         $extraNames .= "\t" . $noSuffix;
+      } else {
+         $extraNames = $noSuffix;
+      }
+  }
   if (defined($name2)) {
     if ($name !~ m/\Q$name2\E/i) {
       if (length($extraNames) > 0) {
          $extraNames .= "\t" . $name2;
       } else {
          $extraNames = $name2;
       }
       $noSuffix = $name2;
       $noSuffix =~ s/\.[0-9][0-9]*$//;
       $extraNames .= "\t" . $noSuffix if (($noSuffix ne $name2) && (length($noSuffix) > 0));
     }
   }
-  printf "%s\t%s\n", $name, $extraNames if (length($extraNames) > 0);
+  printf "%s\t%s\n", $itemName, $extraNames if (length($extraNames) > 0);
 }
 close (FH);