src/hg/utils/phyloTrees/chainNet.pl 1.3
1.3 2009/12/11 19:26:41 hiram
Eliminate much of the garbage from other chain tables and find appropriate tree in the phylo tree
Index: src/hg/utils/phyloTrees/chainNet.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/phyloTrees/chainNet.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/utils/phyloTrees/chainNet.pl 9 Dec 2009 18:28:01 -0000 1.2
+++ src/hg/utils/phyloTrees/chainNet.pl 11 Dec 2009 19:26:41 -0000 1.3
@@ -26,50 +26,76 @@
printf STDERR "$dissectTree\n";
exit 255;
}
+my $initialPrio = 200.3;
my $db = shift;
-my @orderedList;
my $dbCount = 0;
my %priorities;
-my $priority = 200.3;
+my $priority = $initialPrio;
+my $stripName = $db;
+$stripName =~ s/[0-9]+$//;
-open (FH, "$reroot $db $dissectTree 2> /dev/null|") or die "can not run rerootTree.pl";
+my $actualTreeName = "notFound";
+
+# given an arbitrary database name, find corresponding database name
+# that is actually in the phylo tree
+open (FH, "grep name $dissectTree | sort -u | sed -e \"s/.*name = '//; s/'.*//;\"|") or die "can not read $dissectTree";
+while (my $name = <FH>) {
+ chomp $name;
+ my $treeName = $name;
+ $name =~ s/[0-9]+$//;
+ if ($name eq $stripName) {
+ $actualTreeName = $treeName;
+ last;
+ }
+}
+
+# verify we found one
+if ($actualTreeName eq "notFound") {
+ printf "ERROR: specified db '$db' is not like one of the databases in the phylo tree\n";
+ printf `grep name $dissectTree | sort -u | sed -e "s/.*name = '//; s/'.*//;" | xargs echo`, "\n";
+ exit 255;
+}
+
+# reroot the tree to that database name
+open (FH, "$reroot $actualTreeName $dissectTree 2> /dev/null|") or die "can not run rerootTree.pl";
while (my $line = <FH>) {
chomp $line;
- $orderedList[$dbCount++] = $line;
$line =~ s/[0-9]+$//;
- if (!exists($priorities{$line})) {
- $priorities{$line} = $priority;
+ if (!exists($priorities{lcfirst($line)})) {
+ $priorities{lcfirst($line)} = $priority;
$priority += 10;
}
}
close (FH);
-for (my $i = 0; $i < $dbCount; ++$i) {
- my $stripDb = $orderedList[$i];
- $stripDb =~ s/[0-9]+$//;
-# printf "%02d: %03d %s\n", $i, $priorities{$stripDb}, $orderedList[$i];
-}
-
-my @chainTbls;
my $chainCount = 0;
my %orderChainNet;
-open (FH, "hgsql -e 'show tables;' $db | grep -v -i 'self' | grep 'chain.*Link'|") or
+# fetch all chain tables from the given database and for ones that are
+# in the phylo tree, output their priority
+open (FH, "hgsql -e 'show tables;' $db | grep 'chain.*Link' | egrep -i -v 'self|chainNet' | sed -e 's/^.*_chain/chain/' | sort -u|") or
die "can not run hgsql 'show tables' on $db";
while (my $tbl = <FH>) {
chomp $tbl;
$tbl =~ s/^chain//;
$tbl =~ s/Link$//;
- $chainTbls[$chainCount++] = $tbl;
- my $track = "$tbl";
+ my $track = $tbl;
my $stripDb = $tbl;
$stripDb =~ s/[0-9]+$//;
- $orderChainNet{$track} = $priorities{lcfirst($stripDb)};
+ if (defined($stripDb) && length($stripDb) > 0) {
+ if (exists($priorities{lcfirst($stripDb)})) {
+ my $priority = $priorities{lcfirst($stripDb)};
+ $orderChainNet{$track} = $priority;
+ } else {
+ printf STDERR "warning: not in phylo tree: $tbl\n";
+ }
+ }
}
close (FH);
+# print out the priorities in order by priority, lowest first
foreach my $track (sort { $orderChainNet{$a} cmp $orderChainNet{$b} }
keys %orderChainNet) {
my $priority = $orderChainNet{$track};
printf "track chainNet%s override\n", $track;