819c30d6d5b5e898158949bb88d89026bd30bdf6
hiram
Fri May 3 15:53:04 2024 -0700
initial version of a feature bits table
diff --git src/hg/utils/automation/featBitsSurvey.pl src/hg/utils/automation/featBitsSurvey.pl
new file mode 100755
index 0000000..6156ab1
--- /dev/null
+++ src/hg/utils/automation/featBitsSurvey.pl
@@ -0,0 +1,84 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+if ($argc < 1) {
+ printf STDERR "usage: featBitsSurvey.pl clade asmId [... asmId ...] > result.html\n";
+ exit 255;
+}
+
+printf '
+
+
+
+
+';
+
+sub checkOne($) {
+ my ($fbTxt) = @_;
+ my $stat = " ";
+ if ( -s "${fbTxt}" ) {
+ my $fBits = `cut -d' ' -f5 $fbTxt | tr -d '()%'`;
+ chomp $fBits;
+ $stat = $fBits;
+ } else {
+ printf STDERR "not found: '%s'\n", $fbTxt;
+ }
+ return $stat;
+}
+
+printf "
\n";
+printf "showing percent identity, how much of the target is matched by the query\n";
+printf "\n";
+printf "count | chains | syntenic | reciprocal best | lift over | target | query | group | \n";
+printf "
\n";
+
+my $N = 0;
+my $clade = shift;
+while (my $asmId = shift) {
+ my @a = split('_', $asmId);
+ my $target = sprintf("%s_%s", $a[0], $a[1]);
+ my $gcX = substr($asmId, 0, 3);
+ my $d0 = substr($asmId, 4, 3);
+ my $d1 = substr($asmId, 7, 3);
+ my $d2 = substr($asmId, 10, 3);
+ my $tDir = "/hive/data/genomes/asmHubs/allBuild/$gcX/$d0/$d1/$d2/$asmId";
+ my $buildDir = `realpath "${tDir}"`;
+ chomp $buildDir;
+ if ( -d "${buildDir}" ) {
+ open ( my $td, "-|", "ls -d ${buildDir}/trackData/lastz.*") or die "can not ls -d ${buildDir}/trackData/lastz.*";
+ while (my $query = <$td>) {
+ chomp $query;
+ $query =~ s#.*/trackData/lastz.##;
+ my $Query = ucfirst($query);
+ my $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chain${Query}Link.txt";
+ my $fBits = checkOne($fbTxt);
+ $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainSyn${Query}Link.txt";
+ my $synBits = checkOne($fbTxt);
+ $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainRBest.${Query}.txt";
+ my $rbBits = checkOne($fbTxt);
+ $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainLiftOver${Query}Link.txt";
+ my $loBits = checkOne($fbTxt);
+# printf STDERR "%s\t%s\t%s\t%s\t%s\t%s\n", $target, $query, $fBits, $synBits, $rbBits, $loBits;
+ printf "%4d | ", ++$N;
+ printf "%s | ", $fBits;;
+ printf "%s | ", $synBits;
+ printf "%s | ", $rbBits;
+ printf "%s | ", $loBits;
+ printf "%s | %s | %s |
\n", $target, $query, $clade;
+ }
+ close ($td);
+ } else {
+ printf STDERR "# %s - buildDir not found\n", $asmId;
+ }
+}
+printf "
\n";
+
+printf '
+
+
+
+