05219e2ffe569b3403687b96701006e2ff16116d
hiram
  Tue Jul 23 13:13:17 2019 -0700
creating index from genePred file for assembly ncbiGene track searching refs #23734

diff --git src/hg/utils/automation/gpToIx.pl src/hg/utils/automation/gpToIx.pl
new file mode 100755
index 0000000..0bde5b4
--- /dev/null
+++ src/hg/utils/automation/gpToIx.pl
@@ -0,0 +1,50 @@
+#!/usr/bin/env perl
+
+# construct ix.txt index from a genePred file
+# index key is the name of the gene from column 1 of the genePred
+# alias names for that are constructed:
+#   the name without any .nnn suffix
+#   name2 if present, and name2 without .nnn suffix
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1 ) {
+  printf STDERR "ucage: gpToIx.pl <genePred.gp> | sort -u > ix.txt\n";
+  printf STDERR "then run ixIxx on ix.txt:\n";
+  printf STDERR " ixIxx ix.txt out.ix out.ixx\n";
+  exit 255;
+}
+
+my $gpFile = shift;
+
+if ($gpFile =~ m/.gz$/) {
+  open (FH, "zcat $gpFile|") or die "ERROR: gpToIx.pl can not read '$gpFile'";
+} else {
+  open (FH, "<$gpFile") or die "ERROR: gpToIx.pl can not read '$gpFile'";
+}
+while (my $line = <FH>) {
+  next if ($line =~ m/^#/);
+  chomp ($line);
+  my ($name, $chrom, $strand, $txStart, $txEnd, $cdsStart, $cdsEnd, $exonCount, $exonStarts, $exonEnds, $score, $name2, $cdsStartStat, $cdsEndStat, $exonFrames) = split('\s+', $line);
+  my $extraNames = "";
+  my $noSuffix=$name;
+  $noSuffix =~ s/\.[0-9][0-9]*$//;
+  $extraNames = $noSuffix if (($noSuffix ne $name) && (length($noSuffix) > 0));
+  if (defined($name2)) {
+    if ($name !~ m/\Q$name2\E/i) {
+      if (length($extraNames) > 0) {
+         $extraNames .= "\t" . $name2;
+      } else {
+         $extraNames = $name2;
+      }
+      $noSuffix = $name2;
+      $noSuffix =~ s/\.[0-9][0-9]*$//;
+      $extraNames .= "\t" . $noSuffix if (($noSuffix ne $name2) && (length($noSuffix) > 0));
+    }
+  }
+  printf "%s\t%s\n", $name, $extraNames if (length($extraNames) > 0);
+}
+close (FH);