dff254260c6b5cb3ab0f2257775fce47bbbfa3ea hiram Wed Mar 25 11:56:34 2020 -0700 now staging on hgdownload.soe.ucsc.edu refs #24748 diff --git src/hg/makeDb/doc/asmHubs/mkAsmStats.pl src/hg/makeDb/doc/asmHubs/mkAsmStats.pl index c6a8685..1b713fa 100755 --- src/hg/makeDb/doc/asmHubs/mkAsmStats.pl +++ src/hg/makeDb/doc/asmHubs/mkAsmStats.pl @@ -42,41 +42,41 @@ ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; my $subSetMessage = "subset of $asmHubName only"; if ($asmHubName eq "vertebrate") { $subSetMessage = "subset of other ${asmHubName}s only"; } print <<"END" <!DOCTYPE HTML 4.01 Transitional> -<!--#set var="TITLE" value="$Name genomes assembly hubs" --> +<!--#set var="TITLE" value="$Name genomes assembly hubs, assembly statistics" --> <!--#set var="ROOT" value="../.." --> <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> -<h1>$Name Genomes assembly hubs</h1> +<h1>$Name Genomes assembly hubs, assembly statistics</h1> <p> Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage. </p> -<h3>See also: <a href='index.html'>hub access</a></h3><br> +<h3>See also: <a href='index.html'>hub access</a>, <a href='trackData.html'>track statistics</a></h3><br> <h3>Data resource links</h3> NOTE: <em>Click on the column headers to sort the table by that column</em><br> The <em>link to genome browser</em> will attach only that single assembly to the genome browser. END } ############################################################################## ### start the table output ############################################################################## sub startTable() { print <<"END" <table class="sortable" border="1"> <thead><tr><th>count</th> @@ -121,42 +121,53 @@ } else { print <<"END" </tbody> </table> END } } # sub endTable() ############################################################################## ### end the HTML output ############################################################################## sub endHtml() { if ($asmHubName ne "viral") { - printf "<p>\nOther assembly hubs available:<br>\n<table border='1'><thead>\n<tr>"; - - printf "<th><a href='../primates/asmStatsPrimates.html'>Primates</a></th>\n" - if ($asmHubName ne "primates"); - printf "<th><a href='../mammals/asmStatsMammals.html'>Mammals</a></th>\n" - if ($asmHubName ne "mammals"); - printf "<th><a href='../birds/asmStatsBirds.html'>Birds</a></th>\n" - if ($asmHubName ne "birds"); - printf "<th><a href='../fish/asmStatsFish.html'>Fish</a></th>\n" - if ($asmHubName ne "fish"); - printf "<th><a href='../vertebrate/asmStatsVertebrate.html'>other vertebrates</a></th>\n" - if ($asmHubName ne "vertebrate"); + printf "<p>\n<table border='1'><thead>\n<tr>"; + printf "<th>Assembly hubs index pages: </th>\n"; + printf "<th><a href='../primates/index.html'>Primates</a></th>\n"; + printf "<th><a href='../mammals/index.html'>Mammals</a></th>\n"; + printf "<th><a href='../birds/index.html'>Birds</a></th>\n"; + printf "<th><a href='../fish/index.html'>Fish</a></th>\n"; + printf "<th><a href='../vertebrate/index.html'>other vertebrates</a></th>\n"; + + printf "</tr><tr>\n"; + printf "<th>Hubs assembly statistics: </th>\n"; + printf "<th><a href='../primates/asmStatsPrimates.html'>Primates</a></th>\n"; + printf "<th><a href='../mammals/asmStatsMammals.html'>Mammals</a></th>\n"; + printf "<th><a href='../birds/asmStatsBirds.html'>Birds</a></th>\n"; + printf "<th><a href='../fish/asmStatsFish.html'>Fish</a></th>\n"; + printf "<th><a href='../vertebrate/asmStatsVertebrate.html'>other vertebrates</a></th>\n"; + + printf "</tr><tr>\n"; + printf "<th>Hubs track statistics: </th>\n"; + printf "<th><a href='../primates/trackData.html'>Primates</a></th>\n"; + printf "<th><a href='../mammals/trackData.html'>Mammals</a></th>\n"; + printf "<th><a href='../birds/trackData.html'>Birds</a></th>\n"; + printf "<th><a href='../fish/trackData.html'>Fish</a></th>\n"; + printf "<th><a href='../vertebrate/trackData.html'>other vertebrates</a></th>\n"; printf "</tr></thead>\n</table>\n</p>\n"; } print <<"END" </div><!-- closing gbsPage from gbPageStartHardcoded.html --> </div><!-- closing container-fluid from gbPageStartHardcoded.html --> <!--#include virtual="\$ROOT/inc/gbFooterHardcoded.html"--> <script type="text/javascript" src="/js/sorttable.js"></script> </body></html> END } sub asmCounts($) { my ($chromSizes) = @_; @@ -194,34 +205,40 @@ ############################################################################## ### tableContents() ############################################################################## sub tableContents() { foreach my $asmId (reverse(@orderList)) { my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3); my $accessionId = sprintf("%s_%s", $gcPrefix, $asmAcc); my $accessionDir = substr($asmId, 0 ,3); $accessionDir .= "/" . substr($asmId, 4 ,3); $accessionDir .= "/" . substr($asmId, 7 ,3); $accessionDir .= "/" . substr($asmId, 10 ,3); my $buildDir = "/hive/data/genomes/asmHubs/refseqBuild/$accessionDir/$asmId"; my $asmReport="$buildDir/download/${asmId}_assembly_report.txt"; - next if (! -s "$asmReport"); + if (! -s "$asmReport") { + printf STDERR "# no assembly report:\n# %s\n", $asmReport; + next; + } my $chromSizes = "${buildDir}/${asmId}.chrom.sizes"; my $twoBit = "${buildDir}/trackData/addMask/${asmId}.masked.2bit"; - next if (! -s "$twoBit"); + if (! -s "$twoBit") { + printf STDERR "# no 2bit file:\n# %s\n", $twoBit; + next; + } my $faSizeTxt = "${buildDir}/${asmId}.faSize.txt"; if ( ! -s "$faSizeTxt" ) { printf STDERR "twoBitToFa $twoBit stdout | faSize stdin > $faSizeTxt\n"; print `twoBitToFa $twoBit stdout | faSize stdin > $faSizeTxt`; } my ($gapSize, $maskPerCent) = maskStats($faSizeTxt); $overallGapSize += $gapSize; my ($seqCount, $totalSize) = asmCounts($chromSizes); $overallSeqCount += $seqCount; # my $totalSize=`ave -col=2 $chromSizes | grep "^total" | awk '{printf "%d", \$NF}'`; $overallNucleotides += $totalSize; my $gapCount = gapStats($buildDir, $asmId); $overallGapCount += $gapCount; my $sciName = "notFound"; my $commonName = "notFound"; @@ -264,31 +281,32 @@ $commonName = $betterName{$asmId} if (exists($betterName{$asmId})); $sciName =~ s/.*:\s+//; $sciName =~ s/\s+\(.*//; } } elsif ($line =~ m/Taxid:/) { if ($taxId =~ m/notFound/) { ++$itemsFound; $taxId = $line; $taxId =~ s/.*:\s+//; } } } close (FH); my $hubUrl = "https://hgdownload.soe.ucsc.edu/hubs/$accessionDir/$accessionId"; printf "<tr><td align=right>%d</td>\n", ++$asmCount; - printf "<td align=center><a href='https://genome.ucsc.edu/cgi-bin/hgGateway?hubUrl=%s/hub.txt&genome=%s&position=lastDbPos' target=_blank>%s</a></td>\n", $hubUrl, $accessionId, $commonName; +# printf "<td align=center><a href='https://genome.ucsc.edu/cgi-bin/hgGateway?hubUrl=%s/hub.txt&genome=%s&position=lastDbPos' target=_blank>%s</a></td>\n", $hubUrl, $accessionId, $commonName; + printf "<td align=center><a href='https://genome.ucsc.edu/h/%s' target=_blank>%s</a></td>\n", $accessionId, $commonName; printf " <td align=center><a href='%s/' target=_blank>%s</a></td>\n", $hubUrl, $sciName; printf " <td align=left><a href='https://www.ncbi.nlm.nih.gov/assembly/%s/' target=_blank>%s</a></td>\n", $accessionId, $asmId; printf " <td align=right>%s</td>\n", commify($seqCount); printf " <td align=right>%s</td>\n", commify($totalSize); printf " <td align=right>%s</td>\n", commify($gapCount); printf " <td align=right>%s</td>\n", commify($gapSize); printf " <td align=right>%.2f</td>\n", $maskPerCent; printf "</tr>\n"; } } ############################################################################## ### main() ##############################################################################