src/hg/archaeStuff/scripts/extract-genome-info 1.4

1.4 2009/03/26 00:04:27 pchan
add altdb option for alternate centraldb
Index: src/hg/archaeStuff/scripts/extract-genome-info
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/archaeStuff/scripts/extract-genome-info,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/archaeStuff/scripts/extract-genome-info	25 Mar 2009 21:51:26 -0000	1.3
+++ src/hg/archaeStuff/scripts/extract-genome-info	26 Mar 2009 00:04:27 -0000	1.4
@@ -1,223 +1,234 @@
 #! /usr/bin/perl -w
 
 # extract-genome-info [options] > <output file>
 
 use strict;
 use Getopt::Long;
 use archaeBrowser::Utils;
 use archaeBrowser::Constant;
 use archaeBrowser::Organism;
 use archaeBrowser::GenomeInfo;
 
 if (scalar(@ARGV) <= 0) {
     die "Usage: extract-genome-info [options] > <output file>\n",
         "Options:\n",
+        "--altdb=<name> alternative central db (eg. public for centraldb_pulic)\n",
         "--file   list of space-delimited fields in Genome-info-db to be included\n",
         "--db     list of space-delimited fields in centraldb to be included\n",
         "--dbonly limit output to genomes included in browser database\n\n";
 }
 
 #options
+our $opt_altdb = "";
 our $opt_file = "";
 our $opt_db = "";
 our $opt_dbonly = 0;
 
-Getopt::Long::GetOptions("file=s", "db=s", "dbonly");
+Getopt::Long::GetOptions("altdb=s", "file=s", "db=s", "dbonly");
 
 # Global constants
 our $global_constants = archaeBrowser::Constant->new;
 
 # Utilities and error handling
 our $utils = archaeBrowser::Utils->new;
 
 # Organisms
 our $organism;
 our %organism_AR;
 our %org_abbr_AR;
 our %domain_ct_AR;
 our @org_list;
 
 # Module-sharing variable hash
 our %global_vars = (global_constants => $global_constants,
                     utils => $utils,
                     organism => \$organism,
                     organism_AR => \%organism_AR,
                     org_abbr_AR => \%org_abbr_AR,
                     domain_ct_AR => \%domain_ct_AR
                    );
 
 &Read_genome_info($global_constants->genome_info(), \%global_vars);       # default genome information
 &Read_genome_info($global_constants->genome_info_euk(), \%global_vars);   # manual changes to genome info
 &Read_genome_info($global_constants->genome_info_vir(), \%global_vars);   # manual changes to genome info
 &Read_genome_info($global_constants->genome_info_mods(), \%global_vars);  # manual changes to genome info
 
-my $db_genomes = &read_centraldb();
+my $db_genomes = &read_centraldb(\%global_vars);
 my $file_genomes = &read_genome_info($db_genomes);
 &write_output($db_genomes, $file_genomes);
 
 exit;
 
 # end main
 
 
 sub read_centraldb
 {
+    my ($global_vars) = @_;
+    my $global_constants = $global_vars->{global_constants};
     my %genomes = ();
     my $genome = {};
     my $col = 0;
     my @cols = ();
     my @headers = ();
     my $name_col = 0;
+    
+    my $central_db = $global_constants->default_central_db();
+    if ($opt_altdb ne "")
+    {
+        $central_db .= "_$opt_altdb";
+    }
     my $cmd = "select ";
     my @fields = split(/ /, $opt_db);
     
     if (scalar(@fields) > 0)
     {
         for (my $i = 0; $i < scalar(@fields); $i++)
         {
             if ($i > 0) { $cmd .= ","; }
             if ($fields[$i] =~ /^clade$/i) { $cmd .= "b.clade"; }
             else { $cmd .= "a.$fields[$i]"; }
         }
         if ($opt_db !~ /name/) { $cmd .= ",a.name"; }
         $cmd .= " from dbDb a, genomeClade b where a.genome = b.genome;";
-        my $result = `hgsql "centraldb" -e "$cmd"`;
+        $cmd = "hgsql \"$central_db\" -e \"$cmd\"";
+        my $result = `$cmd`;
         my @results = split(/\n/, $result);
 
         for (my $line = 0; $line < scalar(@results); $line++)
         {
             chomp($line);
             if ($line == 0)
             {
                 @headers = split(/\t/, $results[$line]);
                 for (my $i = 0; $i < scalar(@headers); $i++)
                 {
                     if ($headers[$i] eq "name")
                     {
                         $name_col = $i;
                         last;
                     }
                 }
             }
             else
             {
                 @cols = split(/\t/, $results[$line]);
                 $genome = {};
                 for ($col = 0; $col < scalar(@cols); $col++)
                 {
                     $genome->{$headers[$col]} = $cols[$col];
                 }
                 $genomes{$cols[$name_col]} = $genome;
             }
         }
     }
     
     return \%genomes;
 }
 
 sub read_genome_info
 {
     my ($db_genomes) = @_;
     my %genomes = ();
     my $genome = {};
     my $field = "";
     my $dummy = "";
     my @fields = split(/ /, $opt_file);
     if (scalar(@fields) > 0)
     {
         foreach my $abbr (keys %organism_AR)
         {
             if (($opt_dbonly && defined($db_genomes->{$organism_AR{$abbr}->{db_name}})) || !$opt_dbonly)
             {
                 $genome = {};
                 foreach $field (@fields)
                 {
                     if ($field eq "tax_ID")
                     {
                         ($genome->{$field}, $dummy) = split(/:/, $organism_AR{$abbr}->{$field});
                     }
                     elsif (defined $organism_AR{$abbr}->{$field})
                     {
                         $genome->{$field} = $organism_AR{$abbr}->{$field};
                     }
                     elsif ($field eq "tax")
                     {
                         $genome->{tax} = $organism_AR{$abbr}->{tax_ID};
                     }
                     else
                     {
                         $genome->{$field} = "";
                     }
                 }
                 if (defined $organism_AR{$abbr}->{db_name})
                 {
                     $genomes{$organism_AR{$abbr}->{db_name}} = $genome;
                 }
                 else
                 {
                     $genomes{$organism_AR{$abbr}->{org_name}} = $genome;
                 }
             }
         }
     }
     return \%genomes;
 }
 
 sub write_output
 {
     my ($db_genomes, $file_genomes) = @_;
     my $ct = 0;
     my $col = "";
     
     my @db_fields = split(/ /, $opt_db);
     foreach $col (@db_fields)
     {
         if ($ct > 0) { print STDOUT "\t"; }
         print STDOUT $col;
         $ct++;
     }
     my @file_fields = split(/ /, $opt_file);
     foreach $col (@file_fields)
     {
         if ($ct > 0) { print STDOUT "\t"; }
         print STDOUT $col;
         $ct++;
     }
     print STDOUT "\n";
     
     if (scalar(@db_fields) > 0)
     {
         foreach my $org (sort keys %$db_genomes)
         {
             $ct = 0;
             foreach $col (@db_fields)
             {
                 if ($ct > 0) { print STDOUT "\t"; }
                 print STDOUT $db_genomes->{$org}->{$col};
                 $ct++;
             }
             foreach $col (@file_fields)
             {
                 if ($ct > 0) { print STDOUT "\t"; }
                 if (!defined $file_genomes->{$org}->{$col}) { print STDERR "$org\t $col\n"; }
                 print STDOUT $file_genomes->{$org}->{$col};
                 $ct++;
             }
             print STDOUT "\n";
         }
     }
     else
     {
         foreach my $org (sort keys %$file_genomes)
         {
             $ct = 0;
             foreach $col (@file_fields)
             {
                 if ($ct > 0) { print STDOUT "\t"; }
                 print STDOUT $file_genomes->{$org}->{$col};
                 $ct++;
             }
             print STDOUT "\n";
         }        
     }
 }
\ No newline at end of file