2862b2a8ea3a56805265df693bf94a4ed7d07cca hiram Thu Jul 21 18:04:51 2022 -0700 adding the viral group to the index page footer table and special sets of columns for virus index pages refs #29545 diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl index 41f89c7..ed79318 100755 --- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl +++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl @@ -20,30 +20,31 @@ printf STDERR "The result prints to stdout the index.html page for this set of assemblies\n"; exit 255; } my $home = $ENV{'HOME'}; my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $Name = shift; my $asmHubName = shift; my $defaultAssembly = shift; my $inputList = shift; my $orderList = $inputList; if ( ! -s "$orderList" ) { $orderList = $toolsDir/$inputList; } +my %cladeId; # value is asmId, value is clade, useful for 'legacy' index page printf STDERR "# mkHubIndex %s %s %s %s\n", $Name, $asmHubName, $defaultAssembly, $orderList; my $vgpIndex = 0; $vgpIndex = 1 if ($Name =~ m/vgp/i); my %vgpClass; # key is asmId, value is taxon 'class' as set by VGP project if ($vgpIndex) { my $vgpClass = "$home/kent/src/hg/makeDb/doc/vgpAsmHub/vgp.taxId.asmId.class.txt"; open (FH, "<$vgpClass") or die "can not read $vgpClass"; while (my $line = <FH>) { my ($taxId, $asmId, $class) = split('\t', $line); $vgpClass{$asmId} = $class; } close (FH); } @@ -188,33 +189,41 @@ The other links provide access to NCBI resources for these assemblies. END } # sub startHtml() ############################################################################## ### start the table output ############################################################################## sub startTable() { print ' <table class="sortable" border="1"> <thead><tr><th>count</th> <th>common name and<br>view in browser</th> <th>scientific name<br>and data download</th> <th>NCBI assembly</th> - <th>BioSample</th><th>BioProject</th> - <th>assembly date,<br>source link</th> + <th>BioSample</th> '; +if ("viral" ne $asmHubName) { + printf " <th>BioProject</th>\n"; +} + +printf "<th>assembly date,<br>source link</th>\n"; + +if ("legacy" eq $asmHubName) { + printf "<th>clade</th>\n"; +} if ($vgpIndex) { printf "<th>class<br>VGP link</th>\n"; } print "</tr></thead><tbody>\n"; } # sub startTable() ############################################################################## ### end the table output ############################################################################## sub endTable() { print <<"END"; </tbody> @@ -354,43 +363,49 @@ my $browserName = $commonName; my $browserUrl = "https://genome.ucsc.edu/h/$accessionId"; if ($asmId !~ m/^GC/) { $hubUrl = "https://hgdownload.soe.ucsc.edu/goldenPath/$asmId/bigZips"; $browserUrl = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=$asmId"; $browserName = "$commonName ($asmId)"; } printf "<tr><td align=right>%d</td>\n", ++$rowCount; printf "<td align=center><a href='%s' target=_blank>%s</a></td>\n", $browserUrl, $browserName; printf " <td align=center><a href='%s/' target=_blank>%s</a></td>\n", $hubUrl, $sciName; if ($asmId !~ m/^GC/) { printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/assembly/%s_%s/' target=_blank>%s_%s</a></td>\n", $gcPrefix, $asmAcc, $accessionId, $asmName; } else { printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/assembly/%s/' target=_blank>%s</a></td>\n", $accessionId, $asmId; } + # viruses do not appear to have BioSample + if ($asmHubName ne "viral") { if ( $bioSample ne "notFound" ) { printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/biosample/?term=%s' target=_blank>%s</a></td>\n", $bioSample, $bioSample; } else { printf " <td align=left>n/a</td>\n"; } + } # one broken assembly_report $bioProject= "PRJEB25768" if ($accessionId eq "GCA_900324465.2"); if ($bioProject eq "notFound") { printf " <td align=left>%s</td>\n", $bioProject; } else { printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/bioproject/?term=%s' target=_blank>%s</a></td>\n", $bioProject, $bioProject; } printf " <td align=center><a href='%s' target=_blank>%s</a></td>\n", $ncbiFtpLink, $asmDate; + if ("legacy" eq $asmHubName) { + printf " <td align=center>%s</td>\n", $cladeId{$asmId}; + } if ($vgpIndex) { my $sciNameUnderscore = $sciName; $sciNameUnderscore =~ s/ /_/g; $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/); if (! defined($vgpClass{$asmId})) { printf STDERR "# ERROR: no 'class' defined for VGP assembly %s\n", $asmId; exit 255; } printf " <td align=center><a href='https://vgp.github.io/genomeark/%s/' target=_blank>%s</a></td>\n", $sciNameUnderscore, $vgpClass{$asmId} } printf "</tr>\n"; } } # sub tableContents() @@ -408,30 +423,41 @@ if ( -s "${promotedList}" ) { open (FH, "<${promotedList}" ) or die "can not read ${promotedList}"; while (my $line = <FH>) { next if ($line =~ m/^#/); chomp $line; my ($asmId, $commonName) = split('\t', $line); $promotedList{$asmId} = $commonName; } close (FH); foreach my $asmId ( sort { lc($promotedList{$a}) cmp lc($promotedList{$b}) } keys %promotedList) { push @promotedList, $asmId; } $promotedIndex = 0; } +my $cladeList = dirname(${orderList}) . "/$asmHubName.clade.txt"; +if ( -s "${cladeList}" ) { + open (FH, "<$cladeList") or die "can not read ${cladeList}"; + while (my $clade = <FH>) { + chomp $clade; + my @a = split('\t', $clade); + $cladeId{$a[0]} = $a[1]; + } + close (FH); +} + open (FH, "<${orderList}") or die "can not read ${orderList}"; while (my $line = <FH>) { next if ($line =~ m/^#/); chomp $line; my ($asmId, $commonName) = split('\t', $line); if ( ($promotedIndex > -1) && ($promotedIndex < scalar(@promotedList))) { my $checkInsertAsmId = $promotedList[$promotedIndex]; my $checkInsertName = $promotedList{$checkInsertAsmId}; # insert before this commonName when alphabetic before if (lc($checkInsertName) lt lc($commonName)) { push @orderList, $checkInsertAsmId; $commonName{$checkInsertAsmId} = $checkInsertName; ++$assemblyTotal; printf STDERR "# inserting '%s' before '%s' at # %03d\n", $checkInsertName, $commonName, $assemblyTotal; ++$promotedIndex; # only doing one at this time