819c30d6d5b5e898158949bb88d89026bd30bdf6 hiram Fri May 3 15:53:04 2024 -0700 initial version of a feature bits table diff --git src/hg/utils/automation/featBitsSurvey.pl src/hg/utils/automation/featBitsSurvey.pl new file mode 100755 index 0000000..6156ab1 --- /dev/null +++ src/hg/utils/automation/featBitsSurvey.pl @@ -0,0 +1,84 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +my $argc = scalar(@ARGV); +if ($argc < 1) { + printf STDERR "usage: featBitsSurvey.pl clade asmId [... asmId ...] > result.html\n"; + exit 255; +} + +printf '<!DOCTYPE HTML> +<!--#set var="TITLE" value="Primates genomes assembly hubs" --> +<!--#set var="ROOT" value="../.." --> + +<!--#include virtual="$ROOT/inc/gbPageStartHardcoded.html" --> +'; + +sub checkOne($) { + my ($fbTxt) = @_; + my $stat = " "; + if ( -s "${fbTxt}" ) { + my $fBits = `cut -d' ' -f5 $fbTxt | tr -d '()%'`; + chomp $fBits; + $stat = $fBits; + } else { + printf STDERR "not found: '%s'\n", $fbTxt; + } + return $stat; +} + +printf "<table class='sortable' border='1' style='background-color:powderblue;'>\n"; +printf "<caption>showing percent identity, how much of the target is matched by the query</caption>\n"; +printf "<thead style='position:sticky; top:0; background-color: white;'><tr>\n"; +printf "<th>count</th><th>chains</th><th>syntenic</th><th>reciprocal<br>best</th><th>lift<br>over</th><th>target</th><th>query</th><th>group</th>\n"; +printf "</tr></thead><tbody>\n"; + +my $N = 0; +my $clade = shift; +while (my $asmId = shift) { + my @a = split('_', $asmId); + my $target = sprintf("%s_%s", $a[0], $a[1]); + my $gcX = substr($asmId, 0, 3); + my $d0 = substr($asmId, 4, 3); + my $d1 = substr($asmId, 7, 3); + my $d2 = substr($asmId, 10, 3); + my $tDir = "/hive/data/genomes/asmHubs/allBuild/$gcX/$d0/$d1/$d2/$asmId"; + my $buildDir = `realpath "${tDir}"`; + chomp $buildDir; + if ( -d "${buildDir}" ) { + open ( my $td, "-|", "ls -d ${buildDir}/trackData/lastz.*") or die "can not ls -d ${buildDir}/trackData/lastz.*"; + while (my $query = <$td>) { + chomp $query; + $query =~ s#.*/trackData/lastz.##; + my $Query = ucfirst($query); + my $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chain${Query}Link.txt"; + my $fBits = checkOne($fbTxt); + $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainSyn${Query}Link.txt"; + my $synBits = checkOne($fbTxt); + $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainRBest.${Query}.txt"; + my $rbBits = checkOne($fbTxt); + $fbTxt = "${buildDir}/trackData/lastz.${query}/fb.${target}.chainLiftOver${Query}Link.txt"; + my $loBits = checkOne($fbTxt); +# printf STDERR "%s\t%s\t%s\t%s\t%s\t%s\n", $target, $query, $fBits, $synBits, $rbBits, $loBits; + printf "<tr><td style='text-align:right;'>%4d</td>", ++$N; + printf "<td style='text-align:right;'>%s</td>", $fBits;; + printf "<td style='text-align:right;'>%s</td>", $synBits; + printf "<td style='text-align:right;'>%s</td>", $rbBits; + printf "<td style='text-align:right;'>%s</td>", $loBits; + printf "<td>%s</td><td>%s</td><td>%s</td></tr>\n", $target, $query, $clade; + } + close ($td); + } else { + printf STDERR "# %s - buildDir not found\n", $asmId; + } +} +printf "</tbody></table>\n"; + +printf '</div><!-- closing gbsPage from gbPageStartHardcoded.html --> +</div><!-- closing container-fluid from gbPageStartHardcoded.html --> +<!--#include virtual="$ROOT/inc/gbFooterHardcoded.html"--> +<script src="<!--#echo var="ROOT" -->/js/sorttable.js"></script> +</body></html> +';