2862b2a8ea3a56805265df693bf94a4ed7d07cca
hiram
  Thu Jul 21 18:04:51 2022 -0700
adding the viral group to the index page footer table and special sets of columns for virus index pages refs #29545

diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
index 41f89c7..ed79318 100755
--- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
+++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
@@ -20,30 +20,31 @@
   printf STDERR "The result prints to stdout the index.html page for this set of assemblies\n";
   exit 255;
 }
 
 my $home = $ENV{'HOME'};
 my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs";
 
 my $Name = shift;
 my $asmHubName = shift;
 my $defaultAssembly = shift;
 my $inputList = shift;
 my $orderList = $inputList;
 if ( ! -s "$orderList" ) {
   $orderList = $toolsDir/$inputList;
 }
+my %cladeId;	# value is asmId, value is clade, useful for 'legacy' index page
 
 printf STDERR "# mkHubIndex %s %s %s %s\n", $Name, $asmHubName, $defaultAssembly, $orderList;
 my $vgpIndex = 0;
 $vgpIndex = 1 if ($Name =~ m/vgp/i);
 my %vgpClass;	# key is asmId, value is taxon 'class' as set by VGP project
 if ($vgpIndex) {
   my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt";
   open (FH, "<$vgpClass") or die "can not read $vgpClass";
   while (my $line = <FH>) {
     my ($taxId, $asmId, $class) = split('\t', $line);
     $vgpClass{$asmId} = $class;
   }
   close (FH);
 }
 
@@ -188,33 +189,41 @@
 The other links provide access to NCBI resources for these assemblies.
 END
 
 }	#	sub startHtml()
 
 ##############################################################################
 ### start the table output
 ##############################################################################
 sub startTable() {
 print '
 <table class="sortable" border="1">
 <thead><tr><th>count</th>
   <th>common&nbsp;name&nbsp;and<br>view&nbsp;in&nbsp;browser</th>
   <th>scientific name<br>and&nbsp;data&nbsp;download</th>
   <th>NCBI&nbsp;assembly</th>
-  <th>BioSample</th><th>BioProject</th>
-  <th>assembly&nbsp;date,<br>source&nbsp;link</th>
+  <th>BioSample</th>
 ';
+if ("viral" ne $asmHubName) {
+  printf "  <th>BioProject</th>\n";
+}
+
+printf "<th>assembly&nbsp;date,<br>source&nbsp;link</th>\n";
+
+if ("legacy" eq $asmHubName) {
+  printf "<th>clade</th>\n";
+}
 
 if ($vgpIndex) {
   printf "<th>class<br>VGP&nbsp;link</th>\n";
 }
 print "</tr></thead><tbody>\n";
 }	#	sub startTable()
 
 ##############################################################################
 ### end the table output
 ##############################################################################
 sub endTable() {
 
 print <<"END";
 
 </tbody>
@@ -354,43 +363,49 @@
     my $browserName = $commonName;
     my $browserUrl = "https://genome.ucsc.edu/h/$accessionId";
     if ($asmId !~ m/^GC/) {
        $hubUrl = "https://hgdownload.soe.ucsc.edu/goldenPath/$asmId/bigZips";
        $browserUrl = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=$asmId";
        $browserName = "$commonName ($asmId)";
     }
     printf "<tr><td align=right>%d</td>\n", ++$rowCount;
     printf "<td align=center><a href='%s' target=_blank>%s</a></td>\n", $browserUrl, $browserName;
     printf "    <td align=center><a href='%s/' target=_blank>%s</a></td>\n", $hubUrl, $sciName;
     if ($asmId !~ m/^GC/) {
       printf "    <td align=left><a href='https://www.ncbi.nlm.nih.gov/assembly/%s_%s/' target=_blank>%s_%s</a></td>\n", $gcPrefix, $asmAcc, $accessionId, $asmName;
     } else {
       printf "    <td align=left><a href='https://www.ncbi.nlm.nih.gov/assembly/%s/' target=_blank>%s</a></td>\n", $accessionId, $asmId;
     }
+    # viruses do not appear to have BioSample
+    if ($asmHubName ne "viral") {
       if ( $bioSample ne "notFound" ) {
         printf "    <td align=left><a href='https://www.ncbi.nlm.nih.gov/biosample/?term=%s' target=_blank>%s</a></td>\n", $bioSample, $bioSample;
       } else {
       printf "    <td align=left>n/a</td>\n";
       }
+    }
     # one broken assembly_report
     $bioProject= "PRJEB25768" if ($accessionId eq "GCA_900324465.2");
     if ($bioProject eq "notFound") {
       printf "    <td align=left>%s</td>\n", $bioProject;
     } else {
       printf "    <td align=left><a href='https://www.ncbi.nlm.nih.gov/bioproject/?term=%s' target=_blank>%s</a></td>\n", $bioProject, $bioProject;
     }
     printf "    <td align=center><a href='%s' target=_blank>%s</a></td>\n", $ncbiFtpLink, $asmDate;
+    if ("legacy" eq $asmHubName) {
+      printf "    <td align=center>%s</td>\n", $cladeId{$asmId};
+    }
     if ($vgpIndex) {
       my $sciNameUnderscore = $sciName;
       $sciNameUnderscore =~ s/ /_/g;
       $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/);
 
       if (! defined($vgpClass{$asmId})) {
          printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId;
          exit 255;
       }
       printf "    <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $vgpClass{$asmId}
     }
     printf "</tr>\n";
   }
 }	#	sub tableContents()
 
@@ -408,30 +423,41 @@
 if ( -s "${promotedList}" ) {
   open (FH, "<${promotedList}" ) or die "can not read ${promotedList}";
   while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($asmId, $commonName) = split('\t', $line);
     $promotedList{$asmId} = $commonName;
   }
   close (FH);
   foreach my $asmId ( sort { lc($promotedList{$a}) cmp lc($promotedList{$b}) } keys %promotedList) {
      push @promotedList, $asmId;
   }
   $promotedIndex = 0;
 }
 
+my $cladeList = dirname(${orderList}) . "/$asmHubName.clade.txt";
+if ( -s "${cladeList}" ) {
+  open (FH, "<$cladeList") or die "can not read ${cladeList}";
+  while (my $clade = <FH>) {
+    chomp $clade;
+    my @a = split('\t', $clade);
+    $cladeId{$a[0]} = $a[1];
+  }
+  close (FH);
+}
+
 open (FH, "<${orderList}") or die "can not read ${orderList}";
 while (my $line = <FH>) {
   next if ($line =~ m/^#/);
   chomp $line;
   my ($asmId, $commonName) = split('\t', $line);
   if ( ($promotedIndex > -1) && ($promotedIndex < scalar(@promotedList))) {
      my $checkInsertAsmId = $promotedList[$promotedIndex];
      my $checkInsertName = $promotedList{$checkInsertAsmId};
      # insert before this commonName when alphabetic before
      if (lc($checkInsertName) lt lc($commonName)) {
        push @orderList, $checkInsertAsmId;
        $commonName{$checkInsertAsmId} = $checkInsertName;
        ++$assemblyTotal;
        printf STDERR "# inserting '%s' before '%s' at # %03d\n", $checkInsertName, $commonName, $assemblyTotal;
        ++$promotedIndex;	# only doing one at this time