22a9ec8b34dd60a14257bdef408f21c03db601b4 hiram Mon Feb 17 16:06:38 2020 -0800 now using the shorter assembly name for genome identifier and fix some common names refs #23891 diff --git src/hg/makeDb/doc/asmHubs/mkAsmStats.pl src/hg/makeDb/doc/asmHubs/mkAsmStats.pl index c8613fc..c6a8685 100755 --- src/hg/makeDb/doc/asmHubs/mkAsmStats.pl +++ src/hg/makeDb/doc/asmHubs/mkAsmStats.pl @@ -1,309 +1,310 @@ #!/usr/bin/env perl use strict; use warnings; use File::stat; my $argc = scalar(@ARGV); if ($argc != 2) { printf STDERR "mkAsmStats Name asmName\n"; printf STDERR "e.g.: mkAsmStats Mammals mammals\n"; exit 255; } my $Name = shift; my $asmHubName = shift; my $home = $ENV{'HOME'}; my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $commonNameList = "$asmHubName.asmId.commonName.tsv"; my $commonNameOrder = "$asmHubName.commonName.asmId.orderList.tsv"; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my %betterName; # key is asmId, value is better common name than found in # assembly_report my $assemblyTotal = 0; # complete list of assemblies in this group my $asmCount = 0; # count of assemblies completed and in the table my $overallNucleotides = 0; my $overallSeqCount = 0; my $overallGapSize = 0; my $overallGapCount = 0; ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; my $subSetMessage = "subset of $asmHubName only"; if ($asmHubName eq "vertebrate") { $subSetMessage = "subset of other ${asmHubName}s only"; } print <<"END"
Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage.
| count | common name link to genome browser |
scientific name and data download |
NCBI assembly | sequence count | genome size nucleotides |
gap count | unknown bases (gap size sum) | masking percent |
|---|---|---|---|---|---|---|---|---|
| TOTALS: | total assembly count ${assemblyTotal}${doneMsg} | $commaSeqCount | $commaNuc | $commaGapCount | $commaGapSize | |||
\nOther assembly hubs available:
\n
| Primates | \n" if ($asmHubName ne "primates"); printf "Mammals | \n" if ($asmHubName ne "mammals"); printf "Birds | \n" if ($asmHubName ne "birds"); printf "Fish | \n" if ($asmHubName ne "fish"); printf "other vertebrates | \n" if ($asmHubName ne "vertebrate"); printf "
|---|