9006bcfc0395f98e6cc2db983e5f56d188244c82 hiram Tue Mar 9 13:49:23 2021 -0800 updated script to use on hgdownload for constructing hubs/index.html gateway page refs #26658 diff --git src/hg/makeDb/doc/asmHubs/hubIndex.pl src/hg/makeDb/doc/asmHubs/hubIndex.pl index 45bd9cf..d360318 100755 --- src/hg/makeDb/doc/asmHubs/hubIndex.pl +++ src/hg/makeDb/doc/asmHubs/hubIndex.pl @@ -1,210 +1,184 @@ #!/usr/bin/env perl use strict; use warnings; use File::Basename; my $thisMachine = `uname -n`; chomp $thisMachine; if ($thisMachine ne "hgdownload") { printf STDERR "# NOTE: This script is only used on hgdownload\n"; exit 255; } ############################################################################# sub startHtml() { printf '<!DOCTYPE HTML 4.01 Transitional> -<!--#set var="TITLE" value="U.C. Santa Cruz Genomics Institute Assembly and Track hubs" --> +<!--#set var="TITLE" value="UCSC Genome Browser Assembly hubs" --> <!--#set var="ROOT" value=".." --> <!--#include virtual="$ROOT/inc/gbPageStartHardcoded.html" --> -<h1>U.C. Santa Cruz Genomics Institute Assembly and Track hubs</h1> +<h1>UCSC Genome Browser Assembly hubs</h1> ' } ############################################################################# sub endHtml() { printf ' </div><!-- closing gbsPage from gbPageStartHardcoded.html --> </div><!-- closing container-fluid from gbPageStartHardcoded.html --> <!--#include virtual="$ROOT/inc/gbFooterHardcoded.html"--> <script type="text/javascript" src="/js/sorttable.js"></script> </body></html> ' } ############################################################################# startHtml; my %expectedList = ( "VGP" => 1, "birds" => 1, "fish" => 1, "globalReference" => 1, "mammals" => 1, "primates" => 1, "vertebrate" => 1, + "invertebrate" => 1, + "fungi" => 1, "legacy" => 1, "plants" => 1, ); my %titles = ( "VGP" => "Vertebrate Genomes Project collection", "birds" => "NCBI bird genomes", "fish" => "NCBI fish genomes", "globalReference" => "Global Human Reference genomes, January 2020", "mammals" => "NCBI mammal genomes", "primates" => "NCBI primate genomes", "vertebrate" => "NCBI other vertebrate genomes", + "invertebrate" => "NCBI invertebrate genomes", + "fungi" => "NCBI fungi genomes", "legacy" => "NCBI genomes legacy/superseded by newer versions", "plants" => "NCBI plant genomes", "gtexAnalysis" => "Genotype-Tissue Expression (GTEx) Project analysis results track hub, V6 October 2015", "gtex" => "Genotype-Tissue Expression (GTEx) RNA-seq signal track hub, V6 October 2015", "mouseStrains" => "16 mouse strain assembly and track hub, May 2017", "neuroDiffCrispr" => "Structurally conserved primate cerebral cortex lincRNAs track hub, December 2018", ); -my %newDirectories; - my %otherTopLevels = ( "GCA" => 1, "GCF" => 1, "gtex" => 1, "gtexAnalysis" => 1, "mouseStrains" => 1, "neuroDiffCrispr" => 1, "UCSC_GI.assemblyHubList.txt" => 1, "index.html" => 1, ); my @orderOutHubs = ( "primates", "mammals", "birds", "fish", "vertebrate", - "legacy", + "invertebrate", + "fungi", "plants", "VGP", "globalReference", "mouseStrains", + "legacy", ); my @orderOutTracks = ( "gtexAnalysis", "gtex", "neuroDiffCrispr", ); my %indexPage = ( "primates" => "index.html", "mammals" => "index.html", "birds" => "index.html", "fish" => "index.html", "vertebrate" => "index.html", + "invertebrate" => "index.html", + "fungi" => "index.html", "legacy" => "index.html", "plants" => "index.html", "VGP" => "index.html", "mouseStrains" => "hubIndex.html", "globalReference" => "index.html", "gtexAnalysis" => "index.html", "gtex" => "index.html", "neuroDiffCrispr" => "index.html", ); # verify all known directories and files, alert for any new ones open (FH, "ls -d /mirrordata/hubs/*|") or die "can not ls -d /mirrordata/hubs/*"; while (my $dirPath = <FH>) { chomp $dirPath; my $fileDirName = basename($dirPath); if (! (defined($expectedList{$fileDirName}) || defined($otherTopLevels{$fileDirName})) ) { printf STDERR "# something new: %s\n", $fileDirName; - $newDirectories{$fileDirName} = 1; } } close (FH); ### Determine genome counts: my %genomeCounts; my $genomeCount = `grep -h ^genome /mirrordata/hubs/VGP/*enomes.txt | wc -l`; chomp $genomeCount; $genomeCounts{"VGP"} = $genomeCount; -my @checkList = ('primates', 'mammals', 'birds', 'fish', 'vertebrate', 'legacy', 'plants', 'globalReference'); +my @checkList = ('primates', 'mammals', 'birds', 'fish', 'vertebrate', 'legacy', 'plants', "invertebrate", "fungi", 'globalReference'); -foreach my $genome (@checkList) { - $genomeCount = `grep -h ^genome /mirrordata/hubs/$genome/genomes.txt | wc -l`; +foreach my $hubSet (@checkList) { + $genomeCount = `grep -h ^genome /mirrordata/hubs/$hubSet/genomes.txt | wc -l`; chomp $genomeCount; - $genomeCounts{$genome} = $genomeCount; + $genomeCounts{$hubSet} = $genomeCount; } my $hubCount = 0; -printf "<h2>Assembly hubs</h2>\n\n"; - printf "<table class='sortable' border='1'>\n"; printf "<thead>\n"; printf " <th>hub gateway</th>\n"; printf " <th>description</th>\n"; printf "</tr></thead><tbody>\n"; # construct table foreach my $orderUp (@orderOutHubs) { printf "<tr>\n"; ++$hubCount; if ($orderUp eq "fish") { printf " <td><a href='%s/%s' target=_blank>fishes</a></td>\n", $orderUp, $indexPage{$orderUp}; } else { printf " <td><a href='%s/%s' target=_blank>%s</a></td>\n", $orderUp, $indexPage{$orderUp}, $orderUp; } if (defined($genomeCounts{$orderUp})) { printf " <td>%s (%d assemblies)</td>\n", $titles{$orderUp}, $genomeCounts{$orderUp}; } else { printf " <td>%s</td>\n", $titles{$orderUp}; } printf "</tr>\n"; } printf "</tbody></table>\n"; my $totalAsmHubs = `grep -v "^#" /mirrordata/hubs/UCSC_GI.assemblyHubList.txt | wc -l`; chomp $totalAsmHubs; printf "<p>\n"; printf "Please note: text file <a href='UCSC_GI.assemblyHubList.txt' target=_blank>listing</a> of %d NCBI/VGP genome assembly hubs\n", $totalAsmHubs; printf "</p>\n"; -printf "\n<h2>Track hubs</h2>\n\n"; - -printf "<table class='sortable' border='1'>\n"; -printf "<thead>\n"; -printf " <th>hub gateway</th>\n"; -printf " <th>description</th>\n"; -printf "</tr></thead><tbody>\n"; - -# construct table -foreach my $orderUp (@orderOutTracks) { - printf "<tr>\n"; - ++$hubCount; - if ($orderUp eq "fish") { - printf " <td><a href='%s/%s' target=_blank>fishes</a></td>\n", $orderUp, $indexPage{$orderUp}; - } else { - printf " <td><a href='%s/%s' target=_blank>%s</a></td>\n", $orderUp, $indexPage{$orderUp}, $orderUp; - } - if (defined($genomeCounts{$orderUp})) { - printf " <td>%s (%d assemblies)</td>\n", $titles{$orderUp}, $genomeCounts{$orderUp}; - } else { - printf " <td>%s</td>\n", $titles{$orderUp}; - } - printf "</tr>\n"; -} - -printf "</tbody></table>\n"; - endHtml; - -