ede2fd190d25f754238a2f78b25249d400b596a3
hiram
  Thu Apr 14 22:10:55 2022 -0700
fixup the index creation to stuff in the "promoted" hub that is no longer in the tsv listings refs #29259

diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
index 2a3fbc6..41f89c7 100755
--- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
+++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl
@@ -1,22 +1,23 @@
 #!/usr/bin/env perl
 #
 # mkHubIndex.pl - construct index.html page for a set of assemblies in a hub
 #
 
 use strict;
 use warnings;
+use File::Basename;
 use FindBin qw($Bin);
 use lib "$Bin";
 use commonHtml;
 
 my $argc = scalar(@ARGV);
 if ($argc != 4) {
   printf STDERR "mkHubIndex.pl Name asmName defaultAsmId [two column name list] > index.html\n";
   printf STDERR "e.g.: mkHubIndex Primates primates GCF_000001405.39_GRCh38.p13 primates.commonName.asmId.orderList.tsv\n";
   printf STDERR "the name list is found in \$HOME/kent/src/hg/makeDb/doc/asmHubs/\n";
   printf STDERR "\nthe two columns are 1: asmId (accessionId_assemblyName)\n";
   printf STDERR "column 2: common name for species, columns separated by tab\n";
   printf STDERR "The result prints to stdout the index.html page for this set of assemblies\n";
   exit 255;
 }
 
@@ -36,31 +37,31 @@
 my $vgpIndex = 0;
 $vgpIndex = 1 if ($Name =~ m/vgp/i);
 my %vgpClass;	# key is asmId, value is taxon 'class' as set by VGP project
 if ($vgpIndex) {
   my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt";
   open (FH, "<$vgpClass") or die "can not read $vgpClass";
   while (my $line = <FH>) {
     my ($taxId, $asmId, $class) = split('\t', $line);
     $vgpClass{$asmId} = $class;
   }
   close (FH);
 }
 
 my @orderList;	# asmId of the assemblies in order from the *.list files
 # the order to read the different .list files:
-my $assemblyCount = 0;
+my $assemblyTotal = 0;
 my %commonName;	# key is asmId, value is a common name, perhaps more appropriate
                 # than found in assembly_report file
 
 ##############################################################################
 # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters:
 ##############################################################################
 sub commify($) {
     my $text = reverse $_[0];
     $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
     return scalar reverse $text
 }
 
 ##############################################################################
 ### start the HTML output
 ##############################################################################
@@ -385,31 +386,67 @@
 
       if (! defined($vgpClass{$asmId})) {
          printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId;
          exit 255;
       }
       printf "    <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $vgpClass{$asmId}
     }
     printf "</tr>\n";
   }
 }	#	sub tableContents()
 
 ##############################################################################
 ### main()
 ##############################################################################
 
+# if there is a 'promoted' list, it has been taken out of the 'orderList'
+# so will need to stuff it back in at the correct ordered location
+my %promotedList;	# key is asmId, value is common name
+my $promotedList = dirname(${orderList}) . "/promoted.list";
+my @promotedList;	# contents are asmIds, in order by lc(common name)
+my $promotedIndex = -1;	# to walk through @promotedList;
+
+if ( -s "${promotedList}" ) {
+  open (FH, "<${promotedList}" ) or die "can not read ${promotedList}";
+  while (my $line = <FH>) {
+    next if ($line =~ m/^#/);
+    chomp $line;
+    my ($asmId, $commonName) = split('\t', $line);
+    $promotedList{$asmId} = $commonName;
+  }
+  close (FH);
+  foreach my $asmId ( sort { lc($promotedList{$a}) cmp lc($promotedList{$b}) } keys %promotedList) {
+     push @promotedList, $asmId;
+  }
+  $promotedIndex = 0;
+}
+
 open (FH, "<${orderList}") or die "can not read ${orderList}";
 while (my $line = <FH>) {
   next if ($line =~ m/^#/);
   chomp $line;
   my ($asmId, $commonName) = split('\t', $line);
+  if ( ($promotedIndex > -1) && ($promotedIndex < scalar(@promotedList))) {
+     my $checkInsertAsmId = $promotedList[$promotedIndex];
+     my $checkInsertName = $promotedList{$checkInsertAsmId};
+     # insert before this commonName when alphabetic before
+     if (lc($checkInsertName) lt lc($commonName)) {
+       push @orderList, $checkInsertAsmId;
+       $commonName{$checkInsertAsmId} = $checkInsertName;
+       ++$assemblyTotal;
+       printf STDERR "# inserting '%s' before '%s' at # %03d\n", $checkInsertName, $commonName, $assemblyTotal;
+       ++$promotedIndex;	# only doing one at this time
+                        # TBD: will need to improve this for more inserts
+     }
+  }
   push @orderList, $asmId;
   $commonName{$asmId} = $commonName;
-  ++$assemblyCount;
+  ++$assemblyTotal;
 }
 close (FH);
+# TBD: and would need to check if all promoted assemblies have been included
 
 startHtml();
 startTable();
 tableContents();
 endTable();
 endHtml();