src/hg/archaeStuff/scripts/extract-genome-info 1.4
1.4 2009/03/26 00:04:27 pchan
add altdb option for alternate centraldb
Index: src/hg/archaeStuff/scripts/extract-genome-info
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/archaeStuff/scripts/extract-genome-info,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/archaeStuff/scripts/extract-genome-info 25 Mar 2009 21:51:26 -0000 1.3
+++ src/hg/archaeStuff/scripts/extract-genome-info 26 Mar 2009 00:04:27 -0000 1.4
@@ -1,223 +1,234 @@
#! /usr/bin/perl -w
# extract-genome-info [options] > <output file>
use strict;
use Getopt::Long;
use archaeBrowser::Utils;
use archaeBrowser::Constant;
use archaeBrowser::Organism;
use archaeBrowser::GenomeInfo;
if (scalar(@ARGV) <= 0) {
die "Usage: extract-genome-info [options] > <output file>\n",
"Options:\n",
+ "--altdb=<name> alternative central db (eg. public for centraldb_pulic)\n",
"--file list of space-delimited fields in Genome-info-db to be included\n",
"--db list of space-delimited fields in centraldb to be included\n",
"--dbonly limit output to genomes included in browser database\n\n";
}
#options
+our $opt_altdb = "";
our $opt_file = "";
our $opt_db = "";
our $opt_dbonly = 0;
-Getopt::Long::GetOptions("file=s", "db=s", "dbonly");
+Getopt::Long::GetOptions("altdb=s", "file=s", "db=s", "dbonly");
# Global constants
our $global_constants = archaeBrowser::Constant->new;
# Utilities and error handling
our $utils = archaeBrowser::Utils->new;
# Organisms
our $organism;
our %organism_AR;
our %org_abbr_AR;
our %domain_ct_AR;
our @org_list;
# Module-sharing variable hash
our %global_vars = (global_constants => $global_constants,
utils => $utils,
organism => \$organism,
organism_AR => \%organism_AR,
org_abbr_AR => \%org_abbr_AR,
domain_ct_AR => \%domain_ct_AR
);
&Read_genome_info($global_constants->genome_info(), \%global_vars); # default genome information
&Read_genome_info($global_constants->genome_info_euk(), \%global_vars); # manual changes to genome info
&Read_genome_info($global_constants->genome_info_vir(), \%global_vars); # manual changes to genome info
&Read_genome_info($global_constants->genome_info_mods(), \%global_vars); # manual changes to genome info
-my $db_genomes = &read_centraldb();
+my $db_genomes = &read_centraldb(\%global_vars);
my $file_genomes = &read_genome_info($db_genomes);
&write_output($db_genomes, $file_genomes);
exit;
# end main
sub read_centraldb
{
+ my ($global_vars) = @_;
+ my $global_constants = $global_vars->{global_constants};
my %genomes = ();
my $genome = {};
my $col = 0;
my @cols = ();
my @headers = ();
my $name_col = 0;
+
+ my $central_db = $global_constants->default_central_db();
+ if ($opt_altdb ne "")
+ {
+ $central_db .= "_$opt_altdb";
+ }
my $cmd = "select ";
my @fields = split(/ /, $opt_db);
if (scalar(@fields) > 0)
{
for (my $i = 0; $i < scalar(@fields); $i++)
{
if ($i > 0) { $cmd .= ","; }
if ($fields[$i] =~ /^clade$/i) { $cmd .= "b.clade"; }
else { $cmd .= "a.$fields[$i]"; }
}
if ($opt_db !~ /name/) { $cmd .= ",a.name"; }
$cmd .= " from dbDb a, genomeClade b where a.genome = b.genome;";
- my $result = `hgsql "centraldb" -e "$cmd"`;
+ $cmd = "hgsql \"$central_db\" -e \"$cmd\"";
+ my $result = `$cmd`;
my @results = split(/\n/, $result);
for (my $line = 0; $line < scalar(@results); $line++)
{
chomp($line);
if ($line == 0)
{
@headers = split(/\t/, $results[$line]);
for (my $i = 0; $i < scalar(@headers); $i++)
{
if ($headers[$i] eq "name")
{
$name_col = $i;
last;
}
}
}
else
{
@cols = split(/\t/, $results[$line]);
$genome = {};
for ($col = 0; $col < scalar(@cols); $col++)
{
$genome->{$headers[$col]} = $cols[$col];
}
$genomes{$cols[$name_col]} = $genome;
}
}
}
return \%genomes;
}
sub read_genome_info
{
my ($db_genomes) = @_;
my %genomes = ();
my $genome = {};
my $field = "";
my $dummy = "";
my @fields = split(/ /, $opt_file);
if (scalar(@fields) > 0)
{
foreach my $abbr (keys %organism_AR)
{
if (($opt_dbonly && defined($db_genomes->{$organism_AR{$abbr}->{db_name}})) || !$opt_dbonly)
{
$genome = {};
foreach $field (@fields)
{
if ($field eq "tax_ID")
{
($genome->{$field}, $dummy) = split(/:/, $organism_AR{$abbr}->{$field});
}
elsif (defined $organism_AR{$abbr}->{$field})
{
$genome->{$field} = $organism_AR{$abbr}->{$field};
}
elsif ($field eq "tax")
{
$genome->{tax} = $organism_AR{$abbr}->{tax_ID};
}
else
{
$genome->{$field} = "";
}
}
if (defined $organism_AR{$abbr}->{db_name})
{
$genomes{$organism_AR{$abbr}->{db_name}} = $genome;
}
else
{
$genomes{$organism_AR{$abbr}->{org_name}} = $genome;
}
}
}
}
return \%genomes;
}
sub write_output
{
my ($db_genomes, $file_genomes) = @_;
my $ct = 0;
my $col = "";
my @db_fields = split(/ /, $opt_db);
foreach $col (@db_fields)
{
if ($ct > 0) { print STDOUT "\t"; }
print STDOUT $col;
$ct++;
}
my @file_fields = split(/ /, $opt_file);
foreach $col (@file_fields)
{
if ($ct > 0) { print STDOUT "\t"; }
print STDOUT $col;
$ct++;
}
print STDOUT "\n";
if (scalar(@db_fields) > 0)
{
foreach my $org (sort keys %$db_genomes)
{
$ct = 0;
foreach $col (@db_fields)
{
if ($ct > 0) { print STDOUT "\t"; }
print STDOUT $db_genomes->{$org}->{$col};
$ct++;
}
foreach $col (@file_fields)
{
if ($ct > 0) { print STDOUT "\t"; }
if (!defined $file_genomes->{$org}->{$col}) { print STDERR "$org\t $col\n"; }
print STDOUT $file_genomes->{$org}->{$col};
$ct++;
}
print STDOUT "\n";
}
}
else
{
foreach my $org (sort keys %$file_genomes)
{
$ct = 0;
foreach $col (@file_fields)
{
if ($ct > 0) { print STDOUT "\t"; }
print STDOUT $file_genomes->{$org}->{$col};
$ct++;
}
print STDOUT "\n";
}
}
}
\ No newline at end of file