60cbd8384a71282078c4651971c1c8857aa3be23 hiram Thu Nov 10 11:25:22 2022 -0800 not necessary to include both Case and non case strings ix is all lower case no redmine diff --git src/hg/utils/automation/xenoRefGeneIx.pl src/hg/utils/automation/xenoRefGeneIx.pl index cdd67ee..1e7ea49 100755 --- src/hg/utils/automation/xenoRefGeneIx.pl +++ src/hg/utils/automation/xenoRefGeneIx.pl @@ -41,60 +41,51 @@ if ($argc != 1 ) { printf STDERR "usage: xenoRefGeneIx.pl | sort -u > ix.txt\n"; printf STDERR "then run ixIxx on ix.txt:\n"; printf STDERR " ixIxx ix.txt out.ix out.ixx\n"; exit 255; } # some of the Arabidopsis thaliana genes are coming in as pairs # of different names separated by '; ', process both names sub processOne($$) { my ($nameHash, $someName) = @_; if ($someName =~ m/; /) { my ($name1, $name2) = split('; ', $someName); - $nameHash->{$name1} += 1; - $nameHash->{$name2} += 1; $nameHash->{lc($name1)} += 1; $nameHash->{lc($name2)} += 1; my $noSuffix = $name1; $noSuffix =~ s/\.[0-9][0-9]*$//; - $nameHash->{$noSuffix} += 1; $nameHash->{lc($noSuffix)} += 1; $noSuffix = $name2; $noSuffix =~ s/\.[0-9][0-9]*$//; - $nameHash->{$noSuffix} += 1; $nameHash->{lc($noSuffix)} += 1; } elsif ($someName =~ m/\s/) { - $nameHash->{$someName} += 1; $nameHash->{lc($someName)} += 1; my @names = split('\s+', $someName); foreach my $name (@names) { - $nameHash->{$name} += 1; $nameHash->{lc($name)} += 1; my $noSuffix = $name; $noSuffix =~ s/\.[0-9][0-9]*$//; - $nameHash->{$noSuffix} += 1; $nameHash->{lc($noSuffix)} += 1; } } else { - $nameHash->{$someName} += 1; $nameHash->{lc($someName)} += 1; my $noSuffix = $someName; $noSuffix =~ s/\.[0-9][0-9]*$//; - $nameHash->{$noSuffix} += 1; $nameHash->{lc($noSuffix)} += 1; } } my $gpFile = shift; if ($gpFile =~ m/.gz$/) { open (FH, "zcat $gpFile|") or die "ERROR: xenoRefGeneIx.pl can not read '$gpFile'"; } else { open (FH, "<$gpFile") or die "ERROR: xenoRefGeneIx.pl can not read '$gpFile'"; } while (my $line = ) { next if ($line =~ m/^#/); chomp ($line); my ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $reserved, $blockCount, $blockSizes, $chromStarts, $name2, $cdsStartStat, $cdsEndStat, $exonFrames, $type, $geneName, $geneName2, $geneType) = split('\t', $line);