47fa2308b393f3d1eea2310c58cdc46090356a62 hiram Sun Apr 2 22:35:22 2023 -0700 adding custom index page code for CCGP collection refs #30912 diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl index e1bcf30..4414bf6 100755 --- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl +++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl @@ -24,40 +24,44 @@ my $home = $ENV{'HOME'}; my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $Name = shift; my $asmHubName = shift; my $defaultAssembly = shift; my $inputList = shift; my $orderList = $inputList; if ( ! -s "$orderList" ) { $orderList = $toolsDir/$inputList; } my %cladeId; # value is asmId, value is clade, useful for 'legacy' index page printf STDERR "# mkHubIndex %s %s %s %s\n", $Name, $asmHubName, $defaultAssembly, $orderList; my $hprcIndex = 0; +my $ccgpIndex = 0; my $vgpIndex = 0; $hprcIndex = 1 if ($Name =~ m/hprc/i); +$ccgpIndex = 1 if ($Name =~ m/ccgp/i); $vgpIndex = 1 if ($Name =~ m/vgp/i); -my %vgpClass; # key is asmId, value is taxon 'class' as set by VGP project -if ($vgpIndex) { - my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt"; - open (FH, "<$vgpClass") or die "can not read $vgpClass"; +my %extraClass; # key is asmId, value is taxon 'class' as set by VGP project +if ($vgpIndex || $ccgpIndex) { + my $whichIndex = "vgp"; + $whichIndex = "ccgp" if ($ccgpIndex); + my $extraClass = "$home/kent/src/hg/makeDb/doc/${whichIndex}AsmHub/${whichIndex}.taxId.asmId.class.txt"; + open (FH, "<$extraClass") or die "can not read $extraClass"; while (my $line = <FH>) { my ($taxId, $asmId, $class) = split('\t', $line); - $vgpClass{$asmId} = $class; + $extraClass{$asmId} = $class; } close (FH); } my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my $assemblyTotal = 0; my %commonName; # key is asmId, value is a common name, perhaps more appropriate # than found in assembly_report file ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; @@ -96,31 +100,50 @@ <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> <h1>VGP - Vertebrate Genomes Project assembly hub</h1> <p> <a href='https://vertebrategenomesproject.org/' target=_blank> <img src='VGPlogo.png' width=280 alt='VGP logo'></a></p> <p> This assembly hub contains assemblies released by the <a href='https://vertebrategenomesproject.org/' target=_blank> Vertebrate Genomes Project.</a> $vgpSubset </p> END } else { - if ($hprcIndex) { + if ($ccgpIndex) { + print <<"END"; +<!DOCTYPE HTML 4.01 Transitional> +<!--#set var="TITLE" value="CCGP - California Conservation Genomics Project " --> +<!--#set var="ROOT" value="../.." --> + +<!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> + +<h1>CCGP - California Conservation Genomics Project assembly hub</h1> +<p> +<a href='https://www.ccgproject.org/' target=_blank> +<img src='CCGP_logo.png' width=280 alt='CCGP logo'></a></p> +<p> +This assembly hub contains assemblies released +by the <a href='https://www.ccgproject.org/' target=_blank> +California Conservation Genomics Project.</a> +</p> + +END + } elsif ($hprcIndex) { print <<"END"; <!DOCTYPE HTML 4.01 Transitional> <!--#set var="TITLE" value="HPRC - Human Pangenome Reference Consortium" --> <!--#set var="ROOT" value="../.." --> <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> <h1>HPRC - Human Pangenome Reference Consortium assembly hub</h1> <p> <a href='https://humanpangenome.org/' target=_blank> <img src='HPRC_logo.png' width=280 alt='HPRC logo'></a></p> <p> This assembly hub contains assemblies released by the <a href='https://humanpangenome.org/' target=_blank> Human Pangenome Reference Consortium.</a> @@ -242,31 +265,33 @@ <th>common name and<br>view in browser</th> <th>scientific name<br>and data download</th> <th>NCBI assembly</th> <th>BioSample</th> '; if ("viral" ne $asmHubName) { printf " <th>BioProject</th>\n"; } printf "<th>assembly date,<br>source link</th>\n"; if ("legacy" eq $asmHubName) { printf "<th>clade</th>\n"; } -if ($vgpIndex) { +if ($ccgpIndex) { + printf "<th>class<br>CCGP link</th>\n"; +} elsif ($vgpIndex) { printf "<th>class<br>VGP link</th>\n"; } print "</tr></thead><tbody>\n"; } # sub startTable() ############################################################################## ### end the table output ############################################################################## sub endTable() { print <<"END"; </tbody> </table> END @@ -428,40 +453,52 @@ $bioProject= "PRJEB25768" if ($accessionId eq "GCA_900324465.2"); if ($bioProject eq "notFound") { printf " <td align=left>%s</td>\n", $bioProject; } else { printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/bioproject/?term=%s' target=_blank>%s</a></td>\n", $bioProject, $bioProject; } printf " <td align=center><a href='%s' target=_blank>%s</a></td>\n", $ncbiFtpLink, $asmDate; if ("legacy" eq $asmHubName) { if (! defined($cladeId{$asmId})) { printf STDERR "# ERROR: missing clade definition for %s\n", $asmId; exit 255; } else { printf " <td align=center>%s</td>\n", $cladeId{$asmId}; } } - if ($vgpIndex) { + if ($ccgpIndex) { + my $sciNameUnderscore = $sciName; + $sciNameUnderscore =~ s/ /_/g; + $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/); + + if (! defined($extraClass{$asmId})) { + printf STDERR "# ERROR: no 'class' defined for CCGP assembly %s\n", $asmId; + exit 255; + } +# it isn't clear how we can get these names +# https://www.ccgproject.org/species/corynorhinus-townsendii-townsends-big-eared-bat + printf " <td align=center><a href='https://www.ccgproject.org/species/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $extraClass{$asmId} + } elsif ($vgpIndex) { my $sciNameUnderscore = $sciName; $sciNameUnderscore =~ s/ /_/g; $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/); - if (! defined($vgpClass{$asmId})) { - printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId; + if (! defined($extraClass{$asmId})) { + printf STDERR "# ERROR: no 'class' defined for VGP/CCGP assembly %s\n", $asmId; exit 255; } - printf " <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $vgpClass{$asmId} + printf " <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $extraClass{$asmId} } printf "</tr>\n"; } } # sub tableContents() ############################################################################## ### main() ############################################################################## # if there is a 'promoted' list, it has been taken out of the 'orderList' # so will need to stuff it back in at the correct ordered location my %promotedList; # key is asmId, value is common name my $promotedList = dirname(${orderList}) . "/promoted.list"; my @promotedList; # contents are asmIds, in order by lc(common name) my $promotedIndex = -1; # to walk through @promotedList;