af7a004c8f3fa909cd8c2cfc2e5bea60e3421cd1 hiram Fri Feb 25 11:02:46 2022 -0800 cleaner determination of parameters no redmine diff --git src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl index 6c8e06c..ea767d2 100755 --- src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl +++ src/hg/utils/phyloTrees/chainNetCompositeTrackDb.pl @@ -1,82 +1,109 @@ #!/usr/bin/env perl use strict; use warnings; my $argc = scalar(@ARGV); if ($argc < 4) { - printf STDERR "usage: compositeChainNet.pl [net|syn|rbest] <name> <db> <clade1.list> \\\n\t[clade2.list ...etc...] > nameChainNet.ra + printf STDERR "usage: compositeChainNet.pl [net|syn|rbest] <name> <db> <nameList> <clade1.list> \\\n\t[clade2.list ...etc...] > nameChainNet.ra [net|syn|rbest] select one of these for lowest level chainNet\n\tor syntenic or reciprocal best chainNet name is the name of the composite track, examples: \tplacental, mammal, vertebrate db is the name of the database to construct the composite for +nameList relates a genome name db/asmId to a common name to handle + assembly hubs The clade lists are lists of species dbs to put together into a view. Additional clade lists for more views. The short and long lables will need attention in the result. And the default on/off visibilities.\n"; exit 255; } my $netType = shift; my $trackName = shift; my $thisDb = shift; +my $nameList = shift; printf STDERR "# net type: '%s'\n", $netType; printf STDERR "# track name: '%s'\n", $trackName; printf STDERR "# thisDb '%s'\n", $thisDb; +printf STDERR "# nameList '%s'\n", $nameList; +my $trackType = ""; +$trackType = "Syn" if ($netType =~ m/syn/); +$trackType = "RBest" if ($netType =~ m/rbest/); +my $trackLabel = ""; +$trackLabel = "Syntenic" if ($netType =~ m/syn/); +$trackLabel = "RecipBest" if ($netType =~ m/rbest/); +my $shortLabel = ""; +$shortLabel = "sy" if ($netType =~ m/syn/); +$shortLabel = "rb" if ($netType =~ m/rbest/); my $dbPrefix = $thisDb; $dbPrefix =~ s/[0-9]+$//; my @cladeLists; my @cladeNames; my %dbList; # key is cladeName, value is array of db names my %commonNames; # key is db, value is common name my %speciesOrder; # key is db, value is sNNN to get species order my %cladeOrder; # key is clade name, value is cNNN to get clade order my %dbClade; # key is db, value is clade my %rrActive; # key is db, value is 1 for active on RR -my %chainNetHg; # chainNet tracks on hg38, hg19, key is db, value is 1 +my %chainNetHg; # chainNet tracks on RR for given db +my %nameList; # key is genome db or asmId, value is common name +my %existingTableList; # key is table name, value is 1 +my $thisTableCount = 0; +printf STDERR "# reading nameList: %s\n", $nameList; +open (FH, "<$nameList") or die "can not read $nameList"; +while (my $line = <FH>) { + chomp $line; + my ($key, $value) = split('\t', $line); + $value =~ s/ /_/g; + $nameList{$key} = $value; + my $tableName = sprintf("chain%s%s", $trackType, $key); + $existingTableList{$tableName} = 1; + ++$thisTableCount; +} +close (FH); my @rrDbList; -open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.mysqlrr.conf hgsql -N -e 'show databases;' hg19|") or die "can not hgsql show databases hg19"; +printf STDERR "# show databases for RR\n"; +# open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.hgw1.conf hgsql -N -e 'show databases;' hg19|") or die "can not hgsql show databases hg19"; +open (FH, "ssh qateam\@hgw1 'HGDB_CONF=`pwd`/.hg.local.conf /home/qateam/bin/x86_64/hgsql -N -e \"show databases;\" hg19'|") or die "can not hgsql show databases hg19"; while (my $db = <FH>) { chomp $db; if ($db =~ m/^$dbPrefix/) { push @rrDbList, $db; } } close (FH); -my %existingTableList; # key is table name, value is 1 -my $thisTableCount = 0; - printf STDERR "# show tables on db '%s'\n", $thisDb; open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.conf hgsql -N -e 'show tables;' $thisDb|") or die "can not hgsql show tables $thisDb"; while (my $table = <FH>) { chomp $table; $existingTableList{$table} = 1; ++$thisTableCount; # printf STDERR "# %s\n", $table; } close (FH); printf STDERR "# database %s table count: %s\n", $thisDb, $thisTableCount; foreach my $db (@rrDbList) { printf STDERR "# reading tables from %s\n", $db; - open (FH, "HGDB_CONF=$ENV{'HOME'}/.hg.mysqlrr.conf hgsql -e 'show tables;' $db | grep -i chain | grep Link | egrep -i -v 'self|patch' | sed -e 's/chain//; s/Link//;'|") or die "can not hgsql show tables $db"; + open (FH, "ssh qateam\@hgw1 'HGDB_CONF=`pwd`/.hg.local.conf /home/qateam/bin/x86_64/hgsql -e \"show tables;\" $db | grep -i chain | grep Link | egrep -i -v \"self|patch\" | sed -e \"s/chain//; s/Link//;\"'|") or die "can not hgsql show tables $db"; while (my $line = <FH>) { chomp $line; # printf STDERR "# %s\n", lcfirst($line); $chainNetHg{lcfirst($line)} = 1; } close (FH); } # foreach my $db (sort keys %chainNetHg) { # printf STDERR "# %s\n", $db; # } # exit 255; open (FH, "hgsql -N -hgenome-centdb -e 'select name from dbDb where active=1' hgcentral | sort|") or die "can not hgsql select name from hgcentral.dbDb"; while (my $line = <FH>) { @@ -108,52 +135,49 @@ my $subGroup2 = "species Species"; my $subGroup3 = "clade Clade"; my $speciesCount = 0; my $cladeCount = 0; foreach my $clade (@cladeNames) { printf STDERR "# %s\n", $clade; $subGroup3 .= sprintf(" c%02d=%s", $cladeCount, $clade); $cladeOrder{$clade} = sprintf("c%02d", $cladeCount); ++$cladeCount; my $listPtr = $dbList{$clade}; foreach my $db (@$listPtr) { my $commonName = `hgsql -N -e 'select organism from dbDb where name=\"$db\";' hgcentraltest`; chomp $commonName; $commonName =~ s/ /_/g; - $commonName = $db if (length($commonName) < 1); +# $commonName = $db if (length($commonName) < 1); + if (length($commonName) < 1) { + if (defined($nameList{$db})) { + $commonName = $nameList{$db}; + } else { + $commonName = $db; + } + } printf STDERR "# %s\t%s\ts%03d=%s\n", $clade, $db, $speciesCount, $commonName; $speciesOrder{$db} = sprintf("s%03d", $speciesCount); $subGroup2 .= sprintf(" s%03d=%s", $speciesCount, $commonName); $dbClade{$db} = $clade; ++$speciesCount; } } printf STDERR "# %s\n", $subGroup2; printf STDERR "# %s\n", $subGroup3; -my $trackType = ""; -$trackType = "Syn" if ($netType =~ m/syn/); -$trackType = "RBest" if ($netType =~ m/rbest/); -my $trackLabel = ""; -$trackLabel = "Syntenic" if ($netType =~ m/syn/); -$trackLabel = "RecipBest" if ($netType =~ m/rbest/); -my $shortLabel = ""; -$shortLabel = "sy" if ($netType =~ m/syn/); -$shortLabel = "rb" if ($netType =~ m/rbest/); - printf 'track %s%sChainNet compositeTrack on shortLabel %s %s Chain/Net longLabel %s %s Chain and Net Alignments subGroup1 view Views chain=Chains net=Nets ', $trackName, $trackType, ucfirst($trackName), $trackLabel, ucfirst($trackName), $trackLabel; printf "subGroup2 %s\nsubGroup3 %s\n", $subGroup2, $subGroup3; printf 'dragAndDrop subTracks visibility hide group compGeno noInherit on color 0,0,0 altColor 255,255,0