af7a004c8f3fa909cd8c2cfc2e5bea60e3421cd1
hiram
  Fri Feb 25 11:02:46 2022 -0800
cleaner determination of parameters no redmine

diff --git src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl
index 6c8e06c..ea767d2 100755
--- src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl
+++ src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl
@@ -1,82 +1,109 @@
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $argc = scalar(@ARGV);
 
 if ($argc < 4) {
- printf STDERR "usage: compositeChainNet.pl [net|syn|rbest] <name> <db>  <clade1.list> \\\n\t[clade2.list ...etc...] > nameChainNet.ra
+ printf STDERR "usage: compositeChainNet.pl [net|syn|rbest] <name> <db> <nameList> <clade1.list> \\\n\t[clade2.list ...etc...] > nameChainNet.ra
 [net|syn|rbest] select one of these for lowest level chainNet\n\tor syntenic or reciprocal best chainNet
 name is the name of the composite track, examples:
 \tplacental, mammal, vertebrate
 db is the name of the database to construct the composite for
+nameList relates a genome name db/asmId to a common name to handle
+   assembly hubs
 The clade lists are lists of species dbs to put
 together into a view.  Additional clade lists for more views.
 The short and long lables will need attention in the result.
 And the default on/off visibilities.\n";
    exit 255;
 }
 
 my $netType = shift;
 my $trackName = shift;
 my $thisDb = shift;
+my $nameList = shift;
 printf STDERR "# net type: '%s'\n", $netType;
 printf STDERR "# track name: '%s'\n", $trackName;
 printf STDERR "# thisDb '%s'\n", $thisDb;
+printf STDERR "# nameList '%s'\n", $nameList;
+my $trackType = "";
+$trackType = "Syn" if ($netType =~ m/syn/);
+$trackType = "RBest" if ($netType =~ m/rbest/);
+my $trackLabel = "";
+$trackLabel = "Syntenic" if ($netType =~ m/syn/);
+$trackLabel = "RecipBest" if ($netType =~ m/rbest/);
+my $shortLabel = "";
+$shortLabel = "sy" if ($netType =~ m/syn/);
+$shortLabel = "rb" if ($netType =~ m/rbest/);
 my $dbPrefix = $thisDb;
 $dbPrefix =~ s/[0-9]+$//;
 my @cladeLists;
 my @cladeNames;
 my %dbList;	# key is cladeName, value is array of db names
 my %commonNames;	# key is db, value is common name
 my %speciesOrder;	# key is db, value is sNNN to get species order
 my %cladeOrder;		# key is clade name, value is cNNN to get clade order
 my %dbClade;		# key is db, value is clade
 my %rrActive;		# key is db, value is 1 for active on RR
-my %chainNetHg;		# chainNet tracks on hg38, hg19, key is db, value is 1
+my %chainNetHg;		# chainNet tracks on RR for given db
+my %nameList;	# key is genome db or asmId, value is common name
+my %existingTableList;	# key is table name, value is 1
+my $thisTableCount = 0;
 
+printf STDERR "# reading nameList: %s\n", $nameList;
+open (FH, "<$nameList") or die "can not read $nameList";
+while (my $line = <FH>) {
+  chomp $line;
+  my ($key, $value) = split('\t', $line);
+  $value =~ s/ /_/g;
+  $nameList{$key} = $value;
+  my $tableName = sprintf("chain%s%s", $trackType, $key);
+  $existingTableList{$tableName} = 1;
+  ++$thisTableCount;
+}
+close (FH);
 my @rrDbList;
 
-open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.mysqlrr.conf hgsql -N -e 'show databases;' hg19|") or die "can not hgsql show databases hg19";
+printf STDERR "# show databases for RR\n";
+# open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.hgw1.conf hgsql -N -e 'show databases;' hg19|") or die "can not hgsql show databases hg19";
+open (FH, "ssh qateam\@hgw1 'HGDB_CONF=`pwd`/.hg.local.conf /home/qateam/bin/x86_64/hgsql -N -e \"show databases;\" hg19'|") or die "can not hgsql show databases hg19";
 while (my $db = <FH>) {
   chomp $db;
   if ($db =~ m/^$dbPrefix/) {
     push @rrDbList, $db;
   }
 }
 close (FH);
 
-my %existingTableList;	# key is table name, value is 1
-my $thisTableCount = 0;
-
 printf STDERR "# show tables on db '%s'\n", $thisDb;
 open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.conf hgsql -N -e 'show tables;' $thisDb|") or die "can not hgsql show tables $thisDb";
 while (my $table = <FH>) {
   chomp $table;
   $existingTableList{$table} = 1;
   ++$thisTableCount;
 #  printf STDERR "# %s\n", $table;
 }
 close (FH);
 
 printf STDERR "# database %s table count: %s\n", $thisDb, $thisTableCount;
 
 foreach my $db (@rrDbList) {
   printf STDERR "# reading tables from %s\n", $db;
-  open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.mysqlrr.conf hgsql -e 'show tables;' $db | grep -i chain | grep Link | egrep -i -v 'self|patch' | sed -e 's/chain//; s/Link//;'|") or die "can not hgsql show tables $db";
+  open (FH, "ssh qateam\@hgw1 'HGDB_CONF=`pwd`/.hg.local.conf /home/qateam/bin/x86_64/hgsql -e \"show tables;\" $db | grep -i chain | grep Link | egrep -i -v \"self|patch\" | sed -e \"s/chain//; s/Link//;\"'|") or die "can not hgsql show tables $db";
   while (my $line = <FH>) {
     chomp $line;
   #  printf STDERR "# %s\n", lcfirst($line);
     $chainNetHg{lcfirst($line)} = 1;
   }
   close (FH);
 }
 
 # foreach my $db (sort keys %chainNetHg) {
 #   printf STDERR "# %s\n", $db;
 # }
 # exit 255;
 
 open (FH, "hgsql -N -hgenome-centdb -e 'select name from dbDb where active=1' hgcentral | sort|") or die "can not hgsql select name from hgcentral.dbDb";
 while (my $line = <FH>) {
@@ -108,52 +135,49 @@
 
 my $subGroup2 = "species Species";
 my $subGroup3 = "clade Clade";
 my $speciesCount = 0;
 my $cladeCount = 0;
 foreach my $clade (@cladeNames) {
    printf STDERR "# %s\n", $clade;
    $subGroup3 .= sprintf(" c%02d=%s", $cladeCount, $clade);
    $cladeOrder{$clade} = sprintf("c%02d", $cladeCount);
    ++$cladeCount;
    my $listPtr = $dbList{$clade};
    foreach my $db (@$listPtr) {
      my $commonName = `hgsql -N -e 'select organism from dbDb where name=\"$db\";' hgcentraltest`;
      chomp $commonName;
      $commonName =~ s/ /_/g;
-     $commonName = $db if (length($commonName) < 1);
+#     $commonName = $db if (length($commonName) < 1);
+     if (length($commonName) < 1) {
+       if (defined($nameList{$db})) {
+         $commonName = $nameList{$db};
+       } else {
+         $commonName = $db;
+       }
+     }
      printf STDERR "# %s\t%s\ts%03d=%s\n", $clade, $db, $speciesCount, $commonName;
      $speciesOrder{$db} = sprintf("s%03d", $speciesCount);
      $subGroup2 .= sprintf(" s%03d=%s", $speciesCount, $commonName);
      $dbClade{$db} = $clade;
      ++$speciesCount;
    }
 }
 
 printf STDERR "# %s\n", $subGroup2;
 printf STDERR "# %s\n", $subGroup3;
 
-my $trackType = "";
-$trackType = "Syn" if ($netType =~ m/syn/);
-$trackType = "RBest" if ($netType =~ m/rbest/);
-my $trackLabel = "";
-$trackLabel = "Syntenic" if ($netType =~ m/syn/);
-$trackLabel = "RecipBest" if ($netType =~ m/rbest/);
-my $shortLabel = "";
-$shortLabel = "sy" if ($netType =~ m/syn/);
-$shortLabel = "rb" if ($netType =~ m/rbest/);
-
 printf 'track %s%sChainNet
 compositeTrack on
 shortLabel %s %s Chain/Net
 longLabel %s %s Chain and Net Alignments
 subGroup1 view Views chain=Chains net=Nets
 ', $trackName, $trackType, ucfirst($trackName), $trackLabel, ucfirst($trackName), $trackLabel;
 
 printf "subGroup2 %s\nsubGroup3 %s\n", $subGroup2, $subGroup3;
 
 printf 'dragAndDrop subTracks
 visibility hide
 group compGeno
 noInherit on
 color 0,0,0
 altColor 255,255,0