47fa2308b393f3d1eea2310c58cdc46090356a62
hiram
  Sun Apr 2 22:35:22 2023 -0700
adding custom index page code for CCGP collection refs #30912

diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
index e1bcf30..4414bf6 100755
--- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
+++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
@@ -24,40 +24,44 @@
 my $home = $ENV{'HOME'};
 my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs";
 
 my $Name = shift;
 my $asmHubName = shift;
 my $defaultAssembly = shift;
 my $inputList = shift;
 my $orderList = $inputList;
 if ( ! -s "$orderList" ) {
   $orderList = $toolsDir/$inputList;
 }
 my %cladeId;	# value is asmId, value is clade, useful for 'legacy' index page
 
 printf STDERR "# mkHubIndex %s %s %s %s\n", $Name, $asmHubName, $defaultAssembly, $orderList;
 my $hprcIndex = 0;
+my $ccgpIndex = 0;
 my $vgpIndex = 0;
 $hprcIndex = 1 if ($Name =~ m/hprc/i);
+$ccgpIndex = 1 if ($Name =~ m/ccgp/i);
 $vgpIndex = 1 if ($Name =~ m/vgp/i);
-my %vgpClass;	# key is asmId, value is taxon 'class' as set by VGP project
-if ($vgpIndex) {
-  my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt";
-  open (FH, "<$vgpClass") or die "can not read $vgpClass";
+my %extraClass;	# key is asmId, value is taxon 'class' as set by VGP project
+if ($vgpIndex || $ccgpIndex) {
+  my $whichIndex = "vgp";
+  $whichIndex = "ccgp" if ($ccgpIndex);
+  my $extraClass = "$home/kent/src/hg/makeDb/doc/${whichIndex}AsmHub/${whichIndex}.taxId.asmId.class.txt";
+  open (FH, "<$extraClass") or die "can not read $extraClass";
   while (my $line = <FH>) {
     my ($taxId, $asmId, $class) = split('\t', $line);
-    $vgpClass{$asmId} = $class;
+    $extraClass{$asmId} = $class;
   }
   close (FH);
 }
 
 my @orderList;	# asmId of the assemblies in order from the *.list files
 # the order to read the different .list files:
 my $assemblyTotal = 0;
 my %commonName;	# key is asmId, value is a common name, perhaps more appropriate
                 # than found in assembly_report file
 
 ##############################################################################
 # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters:
 ##############################################################################
 sub commify($) {
     my $text = reverse $_[0];
@@ -96,31 +100,50 @@
 
 <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" -->
 
 <h1>VGP - Vertebrate Genomes Project assembly hub</h1>
 <p>
 <a href='https://vertebrategenomesproject.org/' target=_blank>
 <img src='VGPlogo.png' width=280 alt='VGP logo'></a></p>
 <p>
 This assembly hub contains assemblies released
 by the <a href='https://vertebrategenomesproject.org/' target=_blank>
 Vertebrate Genomes Project.</a> $vgpSubset
 </p>
 
 END
 } else {
-  if ($hprcIndex) {
+  if ($ccgpIndex) {
+    print <<"END";
+<!DOCTYPE HTML 4.01 Transitional>
+<!--#set var="TITLE" value="CCGP -  California Conservation Genomics Project " -->
+<!--#set var="ROOT" value="../.." -->
+
+<!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" -->
+
+<h1>CCGP -  California Conservation Genomics Project assembly hub</h1>
+<p>
+<a href='https://www.ccgproject.org/' target=_blank>
+<img src='CCGP_logo.png' width=280 alt='CCGP logo'></a></p>
+<p>
+This assembly hub contains assemblies released
+by the <a href='https://www.ccgproject.org/' target=_blank>
+California Conservation Genomics Project.</a>
+</p>
+
+END
+  } elsif ($hprcIndex) {
     print <<"END";
 <!DOCTYPE HTML 4.01 Transitional>
 <!--#set var="TITLE" value="HPRC - Human Pangenome Reference Consortium" -->
 <!--#set var="ROOT" value="../.." -->
 
 <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" -->
 
 <h1>HPRC - Human Pangenome Reference Consortium assembly hub</h1>
 <p>
 <a href='https://humanpangenome.org/' target=_blank>
 <img src='HPRC_logo.png' width=280 alt='HPRC logo'></a></p>
 <p>
 This assembly hub contains assemblies released
 by the <a href='https://humanpangenome.org/' target=_blank>
 Human Pangenome Reference Consortium.</a>
@@ -242,31 +265,33 @@
   <th>common&nbsp;name&nbsp;and<br>view&nbsp;in&nbsp;browser</th>
   <th>scientific name<br>and&nbsp;data&nbsp;download</th>
   <th>NCBI&nbsp;assembly</th>
   <th>BioSample</th>
 ';
 if ("viral" ne $asmHubName) {
   printf "  <th>BioProject</th>\n";
 }
 
 printf "<th>assembly&nbsp;date,<br>source&nbsp;link</th>\n";
 
 if ("legacy" eq $asmHubName) {
   printf "<th>clade</th>\n";
 }
 
-if ($vgpIndex) {
+if ($ccgpIndex) {
+  printf "<th>class<br>CCGP&nbsp;link</th>\n";
+} elsif ($vgpIndex) {
   printf "<th>class<br>VGP&nbsp;link</th>\n";
 }
 print "</tr></thead><tbody>\n";
 }	#	sub startTable()
 
 ##############################################################################
 ### end the table output
 ##############################################################################
 sub endTable() {
 
 print <<"END";
 
 </tbody>
 </table>
 END
@@ -428,40 +453,52 @@
     $bioProject= "PRJEB25768" if ($accessionId eq "GCA_900324465.2");
     if ($bioProject eq "notFound") {
       printf "    <td align=left>%s</td>\n", $bioProject;
     } else {
       printf "    <td align=left><a href='https://www.ncbi.nlm.nih.gov/bioproject/?term=%s' target=_blank>%s</a></td>\n", $bioProject, $bioProject;
     }
     printf "    <td align=center><a href='%s' target=_blank>%s</a></td>\n", $ncbiFtpLink, $asmDate;
     if ("legacy" eq $asmHubName) {
       if (! defined($cladeId{$asmId})) {
          printf STDERR "# ERROR: missing clade definition for %s\n", $asmId;
          exit 255;
       } else {
          printf "    <td align=center>%s</td>\n", $cladeId{$asmId};
       }
     }
-    if ($vgpIndex) {
+    if ($ccgpIndex) {
+      my $sciNameUnderscore = $sciName;
+      $sciNameUnderscore =~ s/ /_/g;
+      $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/);
+
+      if (! defined($extraClass{$asmId})) {
+         printf STDERR "# ERROR: no 'class' defined for CCGP assembly %s\n", $asmId;
+         exit 255;
+      }
+# it isn't clear how we can get these names
+# https://www.ccgproject.org/species/corynorhinus-townsendii-townsends-big-eared-bat
+      printf "    <td align=center><a href='https://www.ccgproject.org/species/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $extraClass{$asmId}
+    } elsif ($vgpIndex) {
       my $sciNameUnderscore = $sciName;
       $sciNameUnderscore =~ s/ /_/g;
       $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/);
 
-      if (! defined($vgpClass{$asmId})) {
-         printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId;
+      if (! defined($extraClass{$asmId})) {
+         printf STDERR "# ERROR: no 'class' defined for VGP/CCGP assembly %s\n", $asmId;
          exit 255;
       }
-      printf "    <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $vgpClass{$asmId}
+      printf "    <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $extraClass{$asmId}
     }
     printf "</tr>\n";
   }
 }	#	sub tableContents()
 
 ##############################################################################
 ### main()
 ##############################################################################
 
 # if there is a 'promoted' list, it has been taken out of the 'orderList'
 # so will need to stuff it back in at the correct ordered location
 my %promotedList;	# key is asmId, value is common name
 my $promotedList = dirname(${orderList}) . "/promoted.list";
 my @promotedList;	# contents are asmIds, in order by lc(common name)
 my $promotedIndex = -1;	# to walk through @promotedList;