42d19c5cec90202c2b8f520de948905af057fe4e hiram Sun Nov 3 13:07:26 2024 -0800 switch from counting in genomes.txt and instead use the assemblyList.json and count up clades correctly refs #29545 diff --git src/hg/makeDb/doc/asmHubs/hubIndex.pl src/hg/makeDb/doc/asmHubs/hubIndex.pl index 8a8f108..84e6f5b 100755 --- src/hg/makeDb/doc/asmHubs/hubIndex.pl +++ src/hg/makeDb/doc/asmHubs/hubIndex.pl @@ -148,40 +148,46 @@ # verify all known directories and files, alert for any new ones open (FH, "ls -d /mirrordata/hubs/*|") or die "can not ls -d /mirrordata/hubs/*"; while (my $dirPath = ) { chomp $dirPath; my $fileDirName = basename($dirPath); if (! (defined($expectedList{$fileDirName}) || defined($otherTopLevels{$fileDirName})) ) { printf STDERR "# something new: %s\n", $fileDirName; } } close (FH); ### Determine genome counts: my %genomeCounts; -my $genomeCount = `grep -h ^genome /mirrordata/hubs/VGP/*enomes.txt | wc -l`; +my $asmListJson = "/mirrordata/hubs/VGP/assemblyList.json"; +my $genomeCount = `cat $asmListJson | python -mjson.tool | grep -c '"asmId":'`; chomp $genomeCount; $genomeCounts{"VGP"} = $genomeCount; my @checkList = ('primates', 'mammals', 'birds', 'fish', 'vertebrate', 'legacy', 'plants', "invertebrate", "fungi", 'viral', 'bacteria', 'CCGP', 'HPRC', 'BRC', 'globalReference'); foreach my $hubSet (@checkList) { - $genomeCount = `grep -h ^genome /mirrordata/hubs/$hubSet/genomes.txt | wc -l`; + $asmListJson = "/mirrordata/hubs/$hubSet/assemblyList.json"; + if ( -s "${asmListJson}" ) { + $genomeCount = `cat $asmListJson | python -mjson.tool | grep -c '"asmId":'`; chomp $genomeCount; $genomeCounts{$hubSet} = $genomeCount; + } else { + printf STDERR "# ERROR: can not find assemblyList.json:\n%s\n", $asmListJson; + } } my $hubCount = 0; printf "\n"; printf "\n"; printf " \n"; printf " \n"; printf "\n"; # construct table foreach my $orderUp (@orderOutHubs) { printf "\n"; ++$hubCount; if ($orderUp eq "VGP") {
hub gatewaydescription