67d50873eca0ae6f595367077c9735b7d348e5c2 hiram Fri May 1 12:31:59 2020 -0700 adding functions to build VGP index pages refs #23734 diff --git src/hg/makeDb/doc/asmHubs/mkAsmStats.pl src/hg/makeDb/doc/asmHubs/mkAsmStats.pl index 5d1f5b0..c6154f9 100755 --- src/hg/makeDb/doc/asmHubs/mkAsmStats.pl +++ src/hg/makeDb/doc/asmHubs/mkAsmStats.pl @@ -40,42 +40,66 @@ } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; my $subSetMessage = "subset of $asmHubName only"; if ($asmHubName eq "vertebrate") { $subSetMessage = "subset of other ${asmHubName}s only"; } +if ($Name =~ m/vgp/i) { + print <<"END" +<!DOCTYPE HTML 4.01 Transitional> +<!--#set var="TITLE" value="VGP - Vertebrate Genomes Project assembly hubs, assembly statistics" --> +<!--#set var="ROOT" value="../.." --> + +<!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> + +<h1>VGP - Vertebrate Genomes Project assembly hubs, assembly statistics</h1> +<p> +<a href='https://vertebrategenomesproject.org/' target=_blank> +<img src='VGPlogo.png' width=280 alt='VGP logo'></a></p> +<p> +This assembly hub contains assemblies released +by the <a href='https://vertebrategenomesproject.org/' target=_blank> +Vertebrate Genomes Project.</a> +</p> + +END +} else { print <<"END" <!DOCTYPE HTML 4.01 Transitional> <!--#set var="TITLE" value="$Name genomes assembly hubs, assembly statistics" --> <!--#set var="ROOT" value="../.." --> <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> <h1>$Name Genomes assembly hubs, assembly statistics</h1> <p> Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage. </p> +END +} + + print <<"END" <h3>See also: <a href='index.html'>hub access</a>, <a href='trackData.html'>track statistics</a></h3><br> <h3>Data resource links</h3> NOTE: <em>Click on the column headers to sort the table by that column</em><br> The <em>link to genome browser</em> will attach only that single assembly to the genome browser. END } ############################################################################## ### start the table output ############################################################################## sub startTable() { print <<"END" <table class="sortable" border="1"> @@ -205,41 +229,46 @@ } ############################################################################## ### tableContents() ############################################################################## sub tableContents() { foreach my $asmId (reverse(@orderList)) { my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3); my $accessionId = sprintf("%s_%s", $gcPrefix, $asmAcc); my $accessionDir = substr($asmId, 0 ,3); $accessionDir .= "/" . substr($asmId, 4 ,3); $accessionDir .= "/" . substr($asmId, 7 ,3); $accessionDir .= "/" . substr($asmId, 10 ,3); my $buildDir = "/hive/data/genomes/asmHubs/refseqBuild/$accessionDir/$asmId"; + if ($gcPrefix eq "GCA") { + $buildDir = "/hive/data/genomes/asmHubs/genbankBuild/$accessionDir/$asmId"; + } my $asmReport="$buildDir/download/${asmId}_assembly_report.txt"; if (! -s "$asmReport") { printf STDERR "# no assembly report:\n# %s\n", $asmReport; next; } my $chromSizes = "${buildDir}/${asmId}.chrom.sizes"; my $twoBit = "${buildDir}/trackData/addMask/${asmId}.masked.2bit"; if (! -s "$twoBit") { printf STDERR "# no 2bit file:\n# %s\n", $twoBit; next; } + my $trackDb="$buildDir/${asmId}.trackDb.txt"; + next if (! -s "$trackDb"); # assembly build not complete my $faSizeTxt = "${buildDir}/${asmId}.faSize.txt"; if ( ! -s "$faSizeTxt" ) { printf STDERR "twoBitToFa $twoBit stdout | faSize stdin > $faSizeTxt\n"; print `twoBitToFa $twoBit stdout | faSize stdin > $faSizeTxt`; } my ($gapSize, $maskPerCent) = maskStats($faSizeTxt); $overallGapSize += $gapSize; my ($seqCount, $totalSize) = asmCounts($chromSizes); $overallSeqCount += $seqCount; # my $totalSize=`ave -col=2 $chromSizes | grep "^total" | awk '{printf "%d", \$NF}'`; $overallNucleotides += $totalSize; my $gapCount = gapStats($buildDir, $asmId); $overallGapCount += $gapCount; my $sciName = "notFound"; my $commonName = "notFound";