452b689cab327a20bb7ac506eaf89a3f69d23feb
hiram
  Tue Jan 17 08:23:41 2023 -0800
invertebrates has become invertebrate and master.run.list has changed columns no redmine

diff --git src/hg/gar/garTable.pl src/hg/gar/garTable.pl
index 247d28b..a803556 100755
--- src/hg/gar/garTable.pl
+++ src/hg/gar/garTable.pl
@@ -80,72 +80,71 @@
 
 ###############################################################################
 # output a table cell for an N50 measurement
 sub n50Cell($$$) {
   my ($size, $count, $fh) = @_;
   if ($size > 0) {
     printf "<td style='display:none; text-align:right;' sorttable_customkey='%d'>%s&nbsp;(%s)</td>", $size, gmk($size), commify($count);
     printf $fh "\t%d (%d)", $size, $count;	# output to clade.tableData.txt
   } else {
     printf "<td style='display:none;'>&nbsp;</td>";
     printf $fh "\tn/a (n/a)";	# output to clade.tableData.txt
   }
 }
 ###############################################################################
 
-my @clades = qw( primates mammals birds fish vertebrate invertebrates plants fungi );
+my @clades = qw( primates mammals birds fish vertebrate invertebrate plants fungi );
 
 # to help weed out some of the noise
 # key is clade, value is minimal size to count as a whole genome
 # these are actually pretty low to allow in some alternate haplotype
 # assemblies that don't seem to be the whole assembly.
 # The assemblies are also filtered by NCBI status 'full/partial' to only
 # allow in the 'full' genomes meaning representation of the whole genome
 my %minimalGenomeSize = (
   primates => 1000000000,
   mammals => 20000000,
   birds => 200000000,
   fish => 100000,
   vertebrate => 4000000,
-  invertebrates => 10000,
+  invertebrate => 10000,
   plants => 100000,
   fungi => 50000,
 );
 
 #########################################################################
 ## read in list of current GenArk assemblies
 
 my %genArkAsm;	# key is asmId, value is string with:
 my %genArkClade;	# key is asmId, value is clade
 my %genArkCladeProfile;	# key is clade, value is count
 my %genArkSciName;	# key is asmId, value is sciName
 my %genArkComName;	# key is asmId, value is sciName
 
 # accession<tab>assembly<tab>scientific name<tab>common name<tab>taxonId<tab>clade
 
 my $genArkCount = 0;
 printf STDERR "# reading UCSC_GI.assemblyHubList.txt\n";
 open (FH, "<UCSC_GI.assemblyHubList.txt") or die "can not read UCSC_GI.assemblyHubList.txt";
 while (my $line = <FH>) {
   next if ($line =~ m/^#/);
   chomp $line;
   my ($accession, $assembly, $sciName, $commonName, $taxonId, $gClade) = split('\t', $line);
   my $asmId = sprintf("%s_%s", $accession, $assembly);
   die "ERROR: duplicate accession from UCSC_GI.assemblyHubList.txt" if (defined($genArkAsm{$asmId}));
   $genArkAsm{$asmId} = sprintf("%s\t%s\t%s\t%s\t%s", $accession, $assembly, $sciName, $commonName, $taxonId);
   $gClade =~ s/\(L\)//;	# remove the 'legacy' tag
-  $gClade = "invertebrates" if ($gClade =~ m/invertebrate/);
   $genArkClade{$asmId} = $gClade;
   ++$genArkCladeProfile{$gClade};
   ++$genArkCount;
 }
 close (FH);
 
 # accession     assembly        scientific name common name     taxonId
 # GCA_000001905.1 Loxafr3.0       Loxodonta africana      African savanna elephant (2009 genbank) 9785
 
 printf STDERR "# have %s assemblies from GenArk UCSC_GI.assemblyHubList.txt\n", commify($genArkCount);
 printf STDERR "# genArk clade profile:\n";
 foreach my $gClade (sort keys %genArkCladeProfile) {
   printf STDERR "# %6d\t%s\n", $genArkCladeProfile{$gClade}, $gClade;
 }
 
@@ -575,31 +574,31 @@
 
 my $examinedAsmId = 0;
 my $shouldBeGenArk = 0;
 my $shouldBeUcsc = 0;
 my %usedGenArk;	# key is asmId, value is count seen in *.today files
 my $genArkTooManySpecies = 0;
 my $countingUsedGenArk = 0;
 my $prevUsedGenArk = 0;
 
 foreach my $clade (@clades) {
   my $cladeCount = 0;
   my $goodToGoCount = 0;
   my $refSeq = "${NA}/all.refseq.${clade}.today";
   my $genBank = "${NA}/all.genbank.${clade}.today";
   my $asmHubTsv = "~/kent/src/hg/makeDb/doc/${clade}AsmHub/${clade}.orderList.tsv";
-  if ($clade eq "invertebrates") {
+  if ($clade eq "invertebrate") {
     $asmHubTsv = "~/kent/src/hg/makeDb/doc/invertebrateAsmHub/invertebrate.orderList.tsv";
   }
   open (FH, "cut -f1 ${refSeq} ${genBank} ${asmHubTsv}|sort -u|") or die "can not read $refSeq or $genBank or $asmHubTsv";
   while (my $asmId = <FH>) {
      chomp $asmId;
      ++$examinedAsmId;
      $asmId =~ s/\.]/_/g;
      $asmId =~ s/\:/_/g;
      $asmId =~ s/\%/_/g;
      $asmId =~ s/\+/_/g;
      $asmId =~ s/\//_/g;
      $asmId =~ s/\#/_/g;
 #      $asmId =~ s/[.:%+/#]/_/g;
      $asmId =~ s/[()]//g;
      $asmId =~ s/__/_/g;
@@ -1091,31 +1090,31 @@
   ++$outputGenArkRR if (defined($rrGcaGcfList{$asmId}) || defined($genArkAsm{$asmId}));
   $outputGenArk{$asmId} = $clade if (defined($genArkAsm{$asmId}));
   $outputRR{$asmId} = $clade if (defined($rrGcaGcfList{$asmId}));
   if ($asmId =~ m/^GCF/) {
     $gcfGcaCounts{'GCF'} += 1;
   } elsif ($asmId =~ m/GCA/) {
     $gcfGcaCounts{'GCA'} += 1;
   }
   ### If equivalent to UCSC database browser, make reference to RR browser
   my $ucscDb = "";
   $ucscDb = "/" . $rrGcaGcfList{$asmId} if (defined($rrGcaGcfList{$asmId}));
   if (length($ucscDb)) {
      $browserUrl = sprintf("/cgi-bin/hgTracks?db=%s", $rrGcaGcfList{$asmId});
   }
 
-  printf PC "%d", $asmCountInTable;	# start a line output to clade.tableData.tsv
+  printf PC "%s", $browserUrl;	# start a line output to clade.tableData.tsv
 
   ## count number of different scientific names used in this clade table
   if (!defined($cladeSciNameCounts{$clade})) {
     my %h;
     $cladeSciNameCounts{$clade} = \%h;
   }
   my $csnPtr = $cladeSciNameCounts{$clade};
   $csnPtr->{$sciNames{$asmId}} += 1 if (defined($sciNames{$asmId}));
   my $rowClass = "";
   my $gcaGcfClass = "gca";
   $gcaGcfClass = "gcf" if ($asmId =~ m/^GCF/);
   $gcaGcfClass .= " hasIucn" if (length($iucnLink));
   if (defined($comName{$asmId})) {
     if (defined($rrGcaGcfList{$asmId})) {
       $rowClass = " class='ucscDb $gcaGcfClass $clade'"; # present in UCSC db