819c30d6d5b5e898158949bb88d89026bd30bdf6 hiram Fri May 3 15:53:04 2024 -0700 initial version of a feature bits table diff --git src/hg/utils/automation/featBitsSurvey.pl src/hg/utils/automation/featBitsSurvey.pl new file mode 100755 index 0000000..6156ab1 --- /dev/null +++ src/hg/utils/automation/featBitsSurvey.pl @@ -0,0 +1,84 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +my $argc = scalar(@ARGV); +if ($argc < 1) { + printf STDERR "usage: featBitsSurvey.pl clade asmId [... asmId ...] > result.html\n"; + exit 255; +} + +printf ' + + + + +'; + +sub checkOne($) { + my ($fbTxt) = @_; + my $stat = " "; + if ( -s "${fbTxt}" ) { + my $fBits = `cut -d' ' -f5 $fbTxt | tr -d '()%'`; + chomp $fBits; + $stat = $fBits; + } else { + printf STDERR "not found: '%s'\n", $fbTxt; + } + return $stat; +} + +printf "\n"; +printf "\n"; +printf "\n"; +printf "\n"; +printf "\n"; + +my $N = 0; +my $clade = shift; +while (my $asmId = shift) { + my @a = split('_', $asmId); + my $target = sprintf("%s_%s", $a[0], $a[1]); + my $gcX = substr($asmId, 0, 3); + my $d0 = substr($asmId, 4, 3); + my $d1 = substr($asmId, 7, 3); + my $d2 = substr($asmId, 10, 3); + my $tDir = "/hive/data/genomes/asmHubs/allBuild/$gcX/$d0/$d1/$d2/$asmId"; + my $buildDir = `realpath "${tDir}"`; + chomp $buildDir; + if ( -d "${buildDir}" ) { + open ( my $td, "-|", "ls -d ${buildDir}/trackData/lastz.*") or die "can not ls -d ${buildDir}/trackData/lastz.*"; + while (my $query = <$td>) { + chomp $query; + $query =~ s#.*/trackData/lastz.##; + my $Query = ucfirst($query); + my $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chain${Query}Link.txt"; + my $fBits = checkOne($fbTxt); + $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainSyn${Query}Link.txt"; + my $synBits = checkOne($fbTxt); + $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainRBest.${Query}.txt"; + my $rbBits = checkOne($fbTxt); + $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainLiftOver${Query}Link.txt"; + my $loBits = checkOne($fbTxt); +# printf STDERR "%s\t%s\t%s\t%s\t%s\t%s\n", $target, $query, $fBits, $synBits, $rbBits, $loBits; + printf "", ++$N; + printf "", $fBits;; + printf "", $synBits; + printf "", $rbBits; + printf "", $loBits; + printf "\n", $target, $query, $clade; + } + close ($td); + } else { + printf STDERR "# %s - buildDir not found\n", $asmId; + } +} +printf "
showing percent identity, how much of the target is matched by the query
countchainssyntenicreciprocal
best
lift
over
targetquerygroup
%4d%s%s%s%s%s%s%s
\n"; + +printf ' + + + + +';