ede2fd190d25f754238a2f78b25249d400b596a3 hiram Thu Apr 14 22:10:55 2022 -0700 fixup the index creation to stuff in the "promoted" hub that is no longer in the tsv listings refs #29259 diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl index 2a3fbc6..41f89c7 100755 --- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl +++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl @@ -1,22 +1,23 @@ #!/usr/bin/env perl # # mkHubIndex.pl - construct index.html page for a set of assemblies in a hub # use strict; use warnings; +use File::Basename; use FindBin qw($Bin); use lib "$Bin"; use commonHtml; my $argc = scalar(@ARGV); if ($argc != 4) { printf STDERR "mkHubIndex.pl Name asmName defaultAsmId [two column name list] > index.html\n"; printf STDERR "e.g.: mkHubIndex Primates primates GCF_000001405.39_GRCh38.p13 primates.commonName.asmId.orderList.tsv\n"; printf STDERR "the name list is found in \$HOME/kent/src/hg/makeDb/doc/asmHubs/\n"; printf STDERR "\nthe two columns are 1: asmId (accessionId_assemblyName)\n"; printf STDERR "column 2: common name for species, columns separated by tab\n"; printf STDERR "The result prints to stdout the index.html page for this set of assemblies\n"; exit 255; } @@ -36,31 +37,31 @@ my $vgpIndex = 0; $vgpIndex = 1 if ($Name =~ m/vgp/i); my %vgpClass; # key is asmId, value is taxon 'class' as set by VGP project if ($vgpIndex) { my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt"; open (FH, "<$vgpClass") or die "can not read $vgpClass"; while (my $line = <FH>) { my ($taxId, $asmId, $class) = split('\t', $line); $vgpClass{$asmId} = $class; } close (FH); } my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: -my $assemblyCount = 0; +my $assemblyTotal = 0; my %commonName; # key is asmId, value is a common name, perhaps more appropriate # than found in assembly_report file ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } ############################################################################## ### start the HTML output ############################################################################## @@ -385,31 +386,67 @@ if (! defined($vgpClass{$asmId})) { printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId; exit 255; } printf " <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $vgpClass{$asmId} } printf "</tr>\n"; } } # sub tableContents() ############################################################################## ### main() ############################################################################## +# if there is a 'promoted' list, it has been taken out of the 'orderList' +# so will need to stuff it back in at the correct ordered location +my %promotedList; # key is asmId, value is common name +my $promotedList = dirname(${orderList}) . "/promoted.list"; +my @promotedList; # contents are asmIds, in order by lc(common name) +my $promotedIndex = -1; # to walk through @promotedList; + +if ( -s "${promotedList}" ) { + open (FH, "<${promotedList}" ) or die "can not read ${promotedList}"; + while (my $line = <FH>) { + next if ($line =~ m/^#/); + chomp $line; + my ($asmId, $commonName) = split('\t', $line); + $promotedList{$asmId} = $commonName; + } + close (FH); + foreach my $asmId ( sort { lc($promotedList{$a}) cmp lc($promotedList{$b}) } keys %promotedList) { + push @promotedList, $asmId; + } + $promotedIndex = 0; +} + open (FH, "<${orderList}") or die "can not read ${orderList}"; while (my $line = <FH>) { next if ($line =~ m/^#/); chomp $line; my ($asmId, $commonName) = split('\t', $line); + if ( ($promotedIndex > -1) && ($promotedIndex < scalar(@promotedList))) { + my $checkInsertAsmId = $promotedList[$promotedIndex]; + my $checkInsertName = $promotedList{$checkInsertAsmId}; + # insert before this commonName when alphabetic before + if (lc($checkInsertName) lt lc($commonName)) { + push @orderList, $checkInsertAsmId; + $commonName{$checkInsertAsmId} = $checkInsertName; + ++$assemblyTotal; + printf STDERR "# inserting '%s' before '%s' at # %03d\n", $checkInsertName, $commonName, $assemblyTotal; + ++$promotedIndex; # only doing one at this time + # TBD: will need to improve this for more inserts + } + } push @orderList, $asmId; $commonName{$asmId} = $commonName; - ++$assemblyCount; + ++$assemblyTotal; } close (FH); +# TBD: and would need to check if all promoted assemblies have been included startHtml(); startTable(); tableContents(); endTable(); endHtml();