f654134d5172e2687b3e6f98da7db78dd8379387 hiram Tue Jan 21 15:01:57 2020 -0800 initial set of files to build "other vertebrates" assembly hub refs #2482 diff --git src/hg/makeDb/doc/vertebratesAsmHub/mkAsmStats.pl src/hg/makeDb/doc/vertebratesAsmHub/mkAsmStats.pl new file mode 100755 index 0000000..1f255f3 --- /dev/null +++ src/hg/makeDb/doc/vertebratesAsmHub/mkAsmStats.pl @@ -0,0 +1,262 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use File::stat; + +my $home = $ENV{'HOME'}; +my $Name = "Vertebrate"; +my $asmHubName = "vertebrate"; +my $srcDocDir = "${asmHubName}sAsmHub"; +my $asmHubDocDir = "$home/kent/src/hg/makeDb/doc/$srcDocDir"; + +my $commonNameList = "$asmHubName.asmId.commonName.tsv"; +my $commonNameOrder = "$asmHubName.commonName.asmId.orderList.tsv"; +my @orderList; # asmId of the assemblies in order from the *.list files +# the order to read the different .list files: + +my $assemblyCount = 0; +my $overallNucleotides = 0; +my $overallSeqCount = 0; +my $overallGapSize = 0; +my $overallGapCount = 0; + +############################################################################## +# from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: +############################################################################## +sub commify($) { + my $text = reverse $_[0]; + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return scalar reverse $text +} + +############################################################################## +### start the HTML output +############################################################################## +sub startHtml() { + +my $timeStamp = `date "+%F"`; +chomp $timeStamp; + +print <<"END" + + + + + + +
+Assemblies from NCBI/Genbank/Refseq sources, subset of other $asmHubName only. +
+ +count | +common name link to genome browser |
+ scientific name and data download |
+ NCBI assembly | +sequence count | genome size nucleotides |
+ gap count | unknown bases (gap size sum) | masking percent |
+
---|---|---|---|---|---|---|---|---|
TOTALS: | assembly count $assemblyCount | +$commaSeqCount | +$commaNuc | +$commaGapCount | +$commaGapSize | ++ |