19d33fe8c7b5b90b7faa514185c79eb6af0d8d8b hiram Tue Jan 21 13:22:47 2020 -0800 now using new style GCF path names and separate hubs for each assembly refs #24748 diff --git src/hg/makeDb/doc/primateAsmHub/mkAsmStats.pl src/hg/makeDb/doc/primateAsmHub/mkAsmStats.pl index cb3d393..d8414ae 100755 --- src/hg/makeDb/doc/primateAsmHub/mkAsmStats.pl +++ src/hg/makeDb/doc/primateAsmHub/mkAsmStats.pl @@ -1,25 +1,26 @@ #!/usr/bin/env perl use strict; use warnings; use File::stat; my $home = $ENV{'HOME'}; my $srcDocDir = "primateAsmHub"; my $asmHubDocDir = "$home/kent/src/hg/makeDb/doc/$srcDocDir"; my $asmHubName = "primates"; +my $Name = "Primate"; my $commonNameList = "primates.asmId.commonName.tsv"; my $commonNameOrder = "primates.commonName.asmId.orderList.tsv"; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my $assemblyCount = 0; my $overallNucleotides = 0; my $overallSeqCount = 0; my $overallGapSize = 0; my $overallGapCount = 0; ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## @@ -27,46 +28,46 @@ my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; print <<"END" <!DOCTYPE HTML 4.01 Transitional> -<!--#set var="TITLE" value="Primate genomes assembly hubs" --> +<!--#set var="TITLE" value="$Name genomes assembly hubs" --> <!--#set var="ROOT" value="../.." --> <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> -<h1>Primate Genomes assembly hubs</h1> +<h1>$Name Genomes assembly hubs</h1> <p> -Assemblies from NCBI/Genbank/Refseq sources +Assemblies from NCBI/Genbank/Refseq sources, subset of $asmHubName only. </p> -<p> -<h3>See also: <a href='index.html' target=_blank>hub access</a></h3> -</p> +<h3>See also: <a href='index.html' target=_blank>hub access</a></h3><br> <h3>Data resource links</h3> -NOTE: <em>Click on the column headers to sort the table by that column</em> +NOTE: <em>Click on the column headers to sort the table by that column</em><br> +The <em>link to genome browser</em> will attach only that single assembly to +the genome browser. END } ############################################################################## ### start the table output ############################################################################## sub startTable() { print <<"END" <table class="sortable" border="1"> <thead><tr><th>count</th> <th>common name<br>link to genome browser</th> <th>scientific name<br>and data download</th> <th>NCBI assembly</th> <th>sequence<br>count</th><th>genome size<br>nucleotides</th> <th>gap<br>count</th><th>unknown bases<br>(gap size sum)</th><th>masking<br>percent</th> @@ -214,31 +215,31 @@ $sciName = $line; $commonName =~ s/.*\(//; $commonName =~ s/\)//; $sciName =~ s/.*:\s+//; $sciName =~ s/\s+\(.*//; } } elsif ($line =~ m/Taxid:/) { if ($taxId =~ m/notFound/) { ++$itemsFound; $taxId = $line; $taxId =~ s/.*:\s+//; } } } close (FH); - printf "<tr><th>%d</th><td align=center><a href='https://genome.ucsc.edu/cgi-bin/hgGateway?hubUrl=https://hgdownload.soe.ucsc.edu/hubs/%s/hub.txt&genome=%s&position=lastDbPos' target=_blank>%s</a></td>\n", ++$asmCount, $asmHubName, $asmId, $commonName; + printf "<tr><th>%d</th><td align=center><a href='https://genome.ucsc.edu/cgi-bin/hgGateway?hubUrl=https://hgdownload.soe.ucsc.edu/hubs/genomes/%s/%s.hub.txt&genome=%s&position=lastDbPos' target=_blank>%s</a></td>\n", ++$asmCount, $asmId, $asmId, $asmId, $commonName; printf " <td align=center><a href='https://hgdownload.soe.ucsc.edu/hubs/%s/genomes/%s/' target=_blank>%s</a></td>\n", $asmHubName, $asmId, $sciName; printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/assembly/%s_%s/' target=_blank>%s</a></td>\n", $gcPrefix, $asmAcc, $asmId; printf " <td align=right>%s</td>\n", commify($seqCount); printf " <td align=right>%s</td>\n", commify($totalSize); printf " <td align=right>%s</td>\n", commify($gapCount); printf " <td align=right>%s</td>\n", commify($gapSize); printf " <td align=right>%.2f</td>\n", $maskPerCent; printf "</tr>\n"; } } ############################################################################## ### main() ##############################################################################