d3219932529dacd180646575f4892e8a32ba2601 hiram Mon Feb 24 10:16:46 2020 -0800 adding trackData page construction refs #23891 diff --git src/hg/makeDb/doc/asmHubs/trackData.pl src/hg/makeDb/doc/asmHubs/trackData.pl new file mode 100755 index 0000000..074214a --- /dev/null +++ src/hg/makeDb/doc/asmHubs/trackData.pl @@ -0,0 +1,360 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use File::stat; + +my $argc = scalar(@ARGV); +if ($argc != 2) { + printf STDERR "usage: trackData.pl Name asmHubName > trackData.html\n"; + printf STDERR "e.g.: trackData.pl Mammals mammals > trackData.html\n"; + exit 255; +} +my $Name = shift; +my $asmHubName = shift; + +my $home = $ENV{'HOME'}; +my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; + +my $commonNameList = "$asmHubName.asmId.commonName.tsv"; +my $commonNameOrder = "$asmHubName.commonName.asmId.orderList.tsv"; +my @orderList; # asmId of the assemblies in order from the *.list files +# the order to read the different .list files: +my %betterName; # key is asmId, value is better common name than found in + # assembly_report + +my $assemblyTotal = 0; # complete list of assemblies in this group +my $asmCount = 0; # count of assemblies completed and in the table +my $overallNucleotides = 0; +my $overallSeqCount = 0; +my $overallGapSize = 0; +my $overallGapCount = 0; + +############################################################################## +# from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: +############################################################################## +sub commify($) { + my $text = reverse $_[0]; + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return scalar reverse $text +} + +sub oneTrackData($$) { + my ($file, $genomeSize) = @_; +# printf STDERR "# %s\n", $file; + my $itemCount = 0; + my $percentCover = 0; + if (! -s "${file}") { + return("n/a", "n/a"); + } + if ($file =~ m/.bw$/) { + my $bigWigInfo = `bigWigInfo "$file" | egrep "basesCovered:|mean:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`; + chomp $bigWigInfo; + my ($bases, $mean) = split('\s+', $bigWigInfo); + $percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize); + $itemCount = sprintf ("%.2f", $mean); +# printf STDERR "# bigWigInfo %s %s %s\n", $itemCount, $percentCover, $file; + } else { + my $bigBedInfo = `bigBedInfo "$file" | egrep "itemCount:|basesCovered:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`; + chomp $bigBedInfo; + my ($items, $bases) = split('\s', $bigBedInfo); + $itemCount = commify($items); + $percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize); +# printf STDERR "# bigBedInfo %s %s %s\n", $itemCount, $percentCover, $file; + } + return ($itemCount, $percentCover); +} + +############################################################################## +### start the HTML output +############################################################################## +sub startHtml() { + +my $timeStamp = `date "+%F"`; +chomp $timeStamp; + +my $subSetMessage = "subset of $asmHubName only"; +if ($asmHubName eq "vertebrate") { + $subSetMessage = "subset of other ${asmHubName}s only"; +} + +print <<"END" + + + + + + +
+Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage. +
+ +count | +common name link to genome browser |
+ gc5 base | +gap | +assembly | +rmsk | +TRF simpleRepeat |
+ windowMasker | +gapOverlap | +tandemDups | +cpg unmasked |
+ cpg island |
+ ncbiGene | +ncbiRefSeq | +xenoRefGene | +augustus | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TOTALS: | total assembly count ${assemblyTotal} | +
\nOther assembly hubs available:
\n
Primates | \n" + if ($asmHubName ne "primates"); + printf "Mammals | \n" + if ($asmHubName ne "mammals"); + printf "Birds | \n" + if ($asmHubName ne "birds"); + printf "Fish | \n" + if ($asmHubName ne "fish"); + printf "other vertebrates | \n" + if ($asmHubName ne "vertebrate"); + + printf "
---|