bd7ad41f40025e2d8d08ea6a001507b935a8ff5d hiram Mon Jul 21 17:39:50 2025 -0700 adding archaea as a new group refs #29545 diff --git src/hg/makeDb/doc/asmHubs/hubIndex.pl src/hg/makeDb/doc/asmHubs/hubIndex.pl index d7806abcb53..c64c5aa33c9 100755 --- src/hg/makeDb/doc/asmHubs/hubIndex.pl +++ src/hg/makeDb/doc/asmHubs/hubIndex.pl @@ -45,104 +45,108 @@ "VGP" => 1, "CCGP" => 1, "HPRC" => 1, "BRC" => 1, "birds" => 1, "fish" => 1, "globalReference" => 1, "mammals" => 1, "primates" => 1, "vertebrate" => 1, "invertebrate" => 1, "fungi" => 1, "legacy" => 1, "plants" => 1, "viral" => 1, + "archaea" => 1, "bacteria" => 1, ); my %titles = ( "VGP" => "Vertebrate Genomes Project collection", "CCGP" => "The California Conservation Genomics Project", "HPRC" => "Human Pangenome Reference Consortium", "BRC" => "BRC Analytics - Bioinformatics Research Center", "birds" => "NCBI bird genomes", "fish" => "NCBI fish genomes", "globalReference" => "Global Human Reference genomes, January 2020", "mammals" => "NCBI mammal genomes", "primates" => "NCBI primate genomes", "vertebrate" => "NCBI other vertebrate genomes", "invertebrate" => "NCBI invertebrate genomes", "fungi" => "NCBI fungi genomes", "legacy" => "NCBI genomes legacy/superseded by newer versions", "plants" => "NCBI plant genomes", "viral" => "NCBI virus genomes", + "archaea" => "NCBI archaea genomes", "bacteria" => "NCBI bacteria genomes", "gtexAnalysis" => "Genotype-Tissue Expression (GTEx) Project analysis results track hub, V6 October 2015", "gtex" => "Genotype-Tissue Expression (GTEx) RNA-seq signal track hub, V6 October 2015", "mouseStrains" => "16 mouse strain assembly and track hub, May 2017", "neuroDiffCrispr" => "Structurally conserved primate cerebral cortex lincRNAs track hub, December 2018", ); my %otherTopLevels = ( "GCA" => 1, "GCF" => 1, "gtex" => 1, "gtexAnalysis" => 1, "mouseStrains" => 1, "neuroDiffCrispr" => 1, "UCSC_GI.assemblyHubList.txt" => 1, "index.html" => 1, ); my @orderOutHubs = ( "primates", "mammals", "birds", "fish", "vertebrate", "invertebrate", "fungi", "plants", "viral", + "archaea", "bacteria", "VGP", "CCGP", "HPRC", "BRC", "globalReference", "mouseStrains", "legacy", ); my @orderOutTracks = ( "gtexAnalysis", "gtex", "neuroDiffCrispr", ); my %indexPage = ( "primates" => "index.html", "mammals" => "index.html", "birds" => "index.html", "fish" => "index.html", "vertebrate" => "index.html", "invertebrate" => "index.html", "fungi" => "index.html", "legacy" => "index.html", "plants" => "index.html", "viral" => "index.html", + "archaea" => "index.html", "bacteria" => "index.html", "VGP" => "index.html", "CCGP" => "index.html", "HPRC" => "index.html", "BRC" => "index.html", "mouseStrains" => "hubIndex.html", "globalReference" => "index.html", "gtexAnalysis" => "index.html", "gtex" => "index.html", "neuroDiffCrispr" => "index.html", ); # verify all known directories and files, alert for any new ones open (FH, "ls -d /mirrordata/hubs/*|") or die "can not ls -d /mirrordata/hubs/*"; while (my $dirPath = <FH>) { @@ -151,31 +155,31 @@ if (! (defined($expectedList{$fileDirName}) || defined($otherTopLevels{$fileDirName})) ) { printf STDERR "# something new: %s\n", $fileDirName; } } close (FH); ### Determine genome counts: my %genomeCounts; my $asmListJson = "/mirrordata/hubs/VGP/assemblyList.json"; my $genomeCount = `cat $asmListJson | python -mjson.tool | grep -c '"asmId":'`; chomp $genomeCount; $genomeCounts{"VGP"} = $genomeCount; -my @checkList = ('primates', 'mammals', 'birds', 'fish', 'vertebrate', 'legacy', 'plants', "invertebrate", "fungi", 'viral', 'bacteria', 'CCGP', 'HPRC', 'BRC', 'globalReference'); +my @checkList = ('primates', 'mammals', 'birds', 'fish', 'vertebrate', 'legacy', 'plants', "invertebrate", "fungi", 'viral', 'archaea', 'bacteria', 'CCGP', 'HPRC', 'BRC', 'globalReference'); foreach my $hubSet (@checkList) { $asmListJson = "/mirrordata/hubs/$hubSet/assemblyList.json"; if ( -s "${asmListJson}" ) { $genomeCount = `cat $asmListJson | python -mjson.tool | grep -c '"asmId":'`; chomp $genomeCount; $genomeCounts{$hubSet} = $genomeCount; } else { printf STDERR "# ERROR: can not find assemblyList.json:\n%s\n", $asmListJson; } } my $hubCount = 0; printf "<table border='1'>\n";