42d19c5cec90202c2b8f520de948905af057fe4e
hiram
  Sun Nov 3 13:07:26 2024 -0800
switch from counting in genomes.txt and instead use the assemblyList.json and count up clades correctly refs #29545

diff --git src/hg/makeDb/doc/asmHubs/hubIndex.pl src/hg/makeDb/doc/asmHubs/hubIndex.pl
index 8a8f108..84e6f5b 100755
--- src/hg/makeDb/doc/asmHubs/hubIndex.pl
+++ src/hg/makeDb/doc/asmHubs/hubIndex.pl
@@ -148,40 +148,46 @@
 # verify all known directories and files, alert for any new ones
 open (FH, "ls -d /mirrordata/hubs/*|") or die "can not ls -d /mirrordata/hubs/*";
 while (my $dirPath = <FH>) {
   chomp $dirPath;
   my $fileDirName = basename($dirPath);
   if (! (defined($expectedList{$fileDirName}) || defined($otherTopLevels{$fileDirName})) ) {
     printf STDERR "# something new: %s\n", $fileDirName;
   }
 }
 
 close (FH);
 
 ### Determine genome counts:
 my %genomeCounts;
 
-my $genomeCount = `grep -h ^genome /mirrordata/hubs/VGP/*enomes.txt | wc -l`;
+my $asmListJson = "/mirrordata/hubs/VGP/assemblyList.json";
+my $genomeCount = `cat $asmListJson | python -mjson.tool | grep -c '"asmId":'`;
 chomp $genomeCount;
 $genomeCounts{"VGP"} = $genomeCount;
 
 my @checkList = ('primates', 'mammals', 'birds', 'fish', 'vertebrate', 'legacy', 'plants', "invertebrate", "fungi", 'viral', 'bacteria', 'CCGP', 'HPRC', 'BRC', 'globalReference');
 
 foreach my $hubSet (@checkList) {
-  $genomeCount = `grep -h ^genome /mirrordata/hubs/$hubSet/genomes.txt | wc -l`;
+  $asmListJson = "/mirrordata/hubs/$hubSet/assemblyList.json";
+  if ( -s "${asmListJson}" ) {
+    $genomeCount = `cat $asmListJson | python -mjson.tool | grep -c '"asmId":'`;
     chomp $genomeCount;
     $genomeCounts{$hubSet} = $genomeCount;
+  } else {
+    printf STDERR "# ERROR: can not find assemblyList.json:\n%s\n", $asmListJson;
+  }
 }
 
 my $hubCount = 0;
 
 printf "<table border='1'>\n";
 printf "<thead><tr>\n";
 printf "  <th>hub&nbsp;gateway</th>\n";
 printf "  <th>description</th>\n";
 printf "</tr></thead><tbody>\n";
 
 # construct table
 foreach my $orderUp (@orderOutHubs) {
   printf "<tr>\n";
   ++$hubCount;
   if ($orderUp eq "VGP") {