67d50873eca0ae6f595367077c9735b7d348e5c2 hiram Fri May 1 12:31:59 2020 -0700 adding functions to build VGP index pages refs #23734 diff --git src/hg/makeDb/doc/asmHubs/mkHubIndex.pl src/hg/makeDb/doc/asmHubs/mkHubIndex.pl index 9402773..724f50d 100755 --- src/hg/makeDb/doc/asmHubs/mkHubIndex.pl +++ src/hg/makeDb/doc/asmHubs/mkHubIndex.pl @@ -35,42 +35,66 @@ ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; # <html xmlns="http://www.w3.org/1999/xhtml"> my $subSetMessage = "subset of $asmHubName only"; if ($asmHubName eq "vertebrate") { $subSetMessage = "subset of other ${asmHubName}s only"; } +if ($Name =~ m/vgp/i) { + print <<"END" +<!DOCTYPE HTML 4.01 Transitional> +<!--#set var="TITLE" value="VGP - Vertebrate Genomes Project assembly hub" --> +<!--#set var="ROOT" value="../.." --> + +<!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> + +<h1>VGP - Vertebrate Genomes Project assembly hub</h1> +<p> +<a href='https://vertebrategenomesproject.org/' target=_blank> +<img src='VGPlogo.png' width=280 alt='VGP logo'></a></p> +<p> +This assembly hub contains assemblies released +by the <a href='https://vertebrategenomesproject.org/' target=_blank> +Vertebrate Genomes Project.</a> +</p> + +END +} else { print <<"END" <!DOCTYPE HTML 4.01 Transitional> <!--#set var="TITLE" value="$Name genomes assembly hubs" --> <!--#set var="ROOT" value="../.." --> <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" --> <h1>$Name Genomes assembly hubs</h1> <p> Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage. </p> +END +} + +print <<"END" <h3>How to view the hub</h3> <p> Options: <ol> <li>The links to the genome browser in the table below will attach that one specific assembly to the genome browser. This is most likely what you want.</li> <li>Instead, you can attach the entire set of assemblies as one group to the genome browser with the following links depending upon which of our mirror site browsers you prefer to use: <ul> <li><a href="https://genome.ucsc.edu/cgi-bin/hgGateway?hubUrl=https://hgdownload.soe.ucsc.edu/hubs/$asmHubName/hub.txt&genome=GCF_000001405.39" target="_blank">genome.ucsc.edu</a></li> <li><a href="https://genome-euro.ucsc.edu/cgi-bin/hgGateway?hubUrl=https://hgdownload.soe.ucsc.edu/hubs/$asmHubName/hub.txt&genome=GCF_000001405.39" target="_blank">genome-euro.ucsc.edu</a></li> @@ -186,33 +210,36 @@ ############################################################################## ### tableContents() ############################################################################## sub tableContents() { my $rowCount = 0; foreach my $asmId (reverse(@orderList)) { my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3); my $accessionId = sprintf("%s_%s", $gcPrefix, $asmAcc); my $accessionDir = substr($asmId, 0 ,3); $accessionDir .= "/" . substr($asmId, 4 ,3); $accessionDir .= "/" . substr($asmId, 7 ,3); $accessionDir .= "/" . substr($asmId, 10 ,3); my $ncbiFtpLink = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$accessionDir/$asmId"; my $buildDir = "/hive/data/genomes/asmHubs/refseqBuild/$accessionDir/$asmId"; + if ($gcPrefix eq "GCA") { + $buildDir = "/hive/data/genomes/asmHubs/genbankBuild/$accessionDir/$asmId"; + } my $asmReport="$buildDir/download/${asmId}_assembly_report.txt"; my $trackDb="$buildDir/${asmId}.trackDb.txt"; - next if (! -s "$trackDb"); + next if (! -s "$trackDb"); # assembly build not complete my $chromSizes="${buildDir}/${asmId}.chrom.sizes"; my $sciName = "notFound"; my $commonName = "notFound"; my $bioSample = "notFound"; my $bioProject = "notFound"; my $taxId = "notFound"; my $asmDate = "notFound"; my $itemsFound = 0; open (FH, "<$asmReport") or die "can not read $asmReport"; while (my $line = <FH>) { last if ($itemsFound > 5); chomp $line; $line =~ s/ //g;; $line =~ s/\s+$//g;; if ($line =~ m/Date:/) {