47fa2308b393f3d1eea2310c58cdc46090356a62 hiram Sun Apr 2 22:35:22 2023 -0700 adding custom index page code for CCGP collection refs #30912 diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl index e1bcf30..4414bf6 100755 --- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl +++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl @@ -24,40 +24,44 @@ my $home = $ENV{'HOME'}; my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $Name = shift; my $asmHubName = shift; my $defaultAssembly = shift; my $inputList = shift; my $orderList = $inputList; if ( ! -s "$orderList" ) { $orderList = $toolsDir/$inputList; } my %cladeId; # value is asmId, value is clade, useful for 'legacy' index page printf STDERR "# mkHubIndex %s %s %s %s\n", $Name, $asmHubName, $defaultAssembly, $orderList; my $hprcIndex = 0; +my $ccgpIndex = 0; my $vgpIndex = 0; $hprcIndex = 1 if ($Name =~ m/hprc/i); +$ccgpIndex = 1 if ($Name =~ m/ccgp/i); $vgpIndex = 1 if ($Name =~ m/vgp/i); -my %vgpClass; # key is asmId, value is taxon 'class' as set by VGP project -if ($vgpIndex) { - my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt"; - open (FH, "<$vgpClass") or die "can not read $vgpClass"; +my %extraClass; # key is asmId, value is taxon 'class' as set by VGP project +if ($vgpIndex || $ccgpIndex) { + my $whichIndex = "vgp"; + $whichIndex = "ccgp" if ($ccgpIndex); + my $extraClass = "$home/kent/src/hg/makeDb/doc/${whichIndex}AsmHub/${whichIndex}.taxId.asmId.class.txt"; + open (FH, "<$extraClass") or die "can not read $extraClass"; while (my $line = ) { my ($taxId, $asmId, $class) = split('\t', $line); - $vgpClass{$asmId} = $class; + $extraClass{$asmId} = $class; } close (FH); } my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my $assemblyTotal = 0; my %commonName; # key is asmId, value is a common name, perhaps more appropriate # than found in assembly_report file ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; @@ -96,31 +100,50 @@

VGP - Vertebrate Genomes Project assembly hub

VGP logo

This assembly hub contains assemblies released by the Vertebrate Genomes Project. $vgpSubset

END } else { - if ($hprcIndex) { + if ($ccgpIndex) { + print <<"END"; + + + + + + +

CCGP - California Conservation Genomics Project assembly hub

+

+ +CCGP logo

+

+This assembly hub contains assemblies released +by the +California Conservation Genomics Project. +

+ +END + } elsif ($hprcIndex) { print <<"END";

HPRC - Human Pangenome Reference Consortium assembly hub

HPRC logo

This assembly hub contains assemblies released by the Human Pangenome Reference Consortium. @@ -242,31 +265,33 @@ common name and
view in browser scientific name
and data download NCBI assembly BioSample '; if ("viral" ne $asmHubName) { printf " BioProject\n"; } printf "assembly date,
source link\n"; if ("legacy" eq $asmHubName) { printf "clade\n"; } -if ($vgpIndex) { +if ($ccgpIndex) { + printf "class
CCGP link\n"; +} elsif ($vgpIndex) { printf "class
VGP link\n"; } print "\n"; } # sub startTable() ############################################################################## ### end the table output ############################################################################## sub endTable() { print <<"END"; END @@ -428,40 +453,52 @@ $bioProject= "PRJEB25768" if ($accessionId eq "GCA_900324465.2"); if ($bioProject eq "notFound") { printf " %s\n", $bioProject; } else { printf " %s\n", $bioProject, $bioProject; } printf " %s\n", $ncbiFtpLink, $asmDate; if ("legacy" eq $asmHubName) { if (! defined($cladeId{$asmId})) { printf STDERR "# ERROR: missing clade definition for %s\n", $asmId; exit 255; } else { printf " %s\n", $cladeId{$asmId}; } } - if ($vgpIndex) { + if ($ccgpIndex) { + my $sciNameUnderscore = $sciName; + $sciNameUnderscore =~ s/ /_/g; + $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/); + + if (! defined($extraClass{$asmId})) { + printf STDERR "# ERROR: no 'class' defined for CCGP assembly %s\n", $asmId; + exit 255; + } +# it isn't clear how we can get these names +# https://www.ccgproject.org/species/corynorhinus-townsendii-townsends-big-eared-bat + printf " %s\n", $sciNameUnderscore, $extraClass{$asmId} + } elsif ($vgpIndex) { my $sciNameUnderscore = $sciName; $sciNameUnderscore =~ s/ /_/g; $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/); - if (! defined($vgpClass{$asmId})) { - printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId; + if (! defined($extraClass{$asmId})) { + printf STDERR "# ERROR: no 'class' defined for VGP/CCGP assembly %s\n", $asmId; exit 255; } - printf " %s\n", $sciNameUnderscore, $vgpClass{$asmId} + printf " %s\n", $sciNameUnderscore, $extraClass{$asmId} } printf "\n"; } } # sub tableContents() ############################################################################## ### main() ############################################################################## # if there is a 'promoted' list, it has been taken out of the 'orderList' # so will need to stuff it back in at the correct ordered location my %promotedList; # key is asmId, value is common name my $promotedList = dirname(${orderList}) . "/promoted.list"; my @promotedList; # contents are asmIds, in order by lc(common name) my $promotedIndex = -1; # to walk through @promotedList;