19d33fe8c7b5b90b7faa514185c79eb6af0d8d8b hiram Tue Jan 21 13:22:47 2020 -0800 now using new style GCF path names and separate hubs for each assembly refs #24748 diff --git src/hg/makeDb/doc/primateAsmHub/mkHubIndex.pl src/hg/makeDb/doc/primateAsmHub/mkHubIndex.pl index b330087..f1528d3 100755 --- src/hg/makeDb/doc/primateAsmHub/mkHubIndex.pl +++ src/hg/makeDb/doc/primateAsmHub/mkHubIndex.pl @@ -1,110 +1,114 @@ #!/usr/bin/env perl use strict; use warnings; my $home = $ENV{'HOME'}; my $srcDocDir = "primateAsmHub"; my $asmHubDocDir = "$home/kent/src/hg/makeDb/doc/$srcDocDir"; -my $Name = "Primates"; +my $Name = "Primate"; my $asmHubName = "primates"; my $defaultAssembly = "GCF_000001405.39_GRCh38.p13"; my $srcDir = "$home/kent/src/hg/makeDb/doc/$srcDocDir"; -my $commonNameList = "primates.asmId.commonName.tsv"; my $commonNameOrder = "primates.commonName.asmId.orderList.tsv"; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: -my @classList = qw( human ); -my %class; # key is asmId, value is from class list my $assemblyCount = 0; ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; # print <<"END" - + -

Primate Genomes assembly hubs

+

$Name Genomes assembly hubs

-Assemblies from NCBI/Genbank/Refseq sources +Assemblies from NCBI/Genbank/Refseq sources, subset of $asmHubName only.

How to view the hub

-You can load this hub from our +Individual assemblies are attached to the genome browser via the +link to genome browser in the table below. To attach all +of these assemblies in one set for this hub, select the ${Name}s assembly +hub from our Public Hubs page or by clicking these assembly links to any of our official websites:

-To manually attach this hub to other genome browsers: +To manually attach all the assemblies in this hub to other genome browsers:

  1. From the blue navigation bar, go to My Data -> Track Hubs
  2. Then select the My Hubs tab and enter this URL into the textbox:
    https://hgdownload.soe.ucsc.edu/hubs/$asmHubName/hub.txt
  3. Once you have added the URL to the entry form, press the Add Hub button to add the hub.

After adding the hub, you will be redirected to the gateway page. The -genome assemblies can be selected from the Reference Genome Improvement Hub Assembly dropdown menu. -

-

-

See also: assembly statistics

+genome assemblies can be selected from the +${Name}s Hub Assembly dropdown menu. +Instead of adding all the assemblies in one collected group, use the individual +link to genome browser in the table below.

+

See also: assembly statistics


Data resource links

-NOTE: Click on the column headers to sort the table by that column +NOTE: Click on the column headers to sort the table by that column
+The link to genome browser will attach only that single assembly to +the genome browser. END } # sub startHtml() ############################################################################## ### start the table output ############################################################################## sub startTable() { print <<"END" @@ -131,35 +135,37 @@ print <<"END" END } # sub endHtml() ############################################################################## ### tableContents() ############################################################################## sub tableContents() { my $rowCount = 0; foreach my $asmId (@orderList) { - my $buildDir = "/hive/data/genomes/asmHubs/refseqBuild/" . substr($asmId, 0 ,3); - $buildDir .= "/" . substr($asmId, 4 ,3); - $buildDir .= "/" . substr($asmId, 7 ,3); - $buildDir .= "/" . substr($asmId, 10 ,3); - $buildDir .= "/" . $asmId; + my $accessionDir = substr($asmId, 0 ,3); + $accessionDir .= "/" . substr($asmId, 4 ,3); + $accessionDir .= "/" . substr($asmId, 7 ,3); + $accessionDir .= "/" . substr($asmId, 10 ,3); + $accessionDir .= "/" . $asmId; + my $ncbiFtpLink = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$accessionDir/"; + my $buildDir = "/hive/data/genomes/asmHubs/refseqBuild/$accessionDir"; my $asmReport="$buildDir/download/${asmId}_assembly_report.txt"; my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3); my $chromSizes="${buildDir}/${asmId}.chrom.sizes"; my $sciName = "notFound"; my $commonName = "notFound"; my $bioSample = "notFound"; my $bioProject = "notFound"; my $taxId = "notFound"; my $asmDate = "notFound"; my $itemsFound = 0; open (FH, "<$asmReport") or die "can not read $asmReport"; while (my $line = ) { last if ($itemsFound > 5); chomp $line; $line =~ s/ //g;; @@ -190,41 +196,42 @@ $sciName = $line; $commonName =~ s/.*\(//; $commonName =~ s/\)//; $sciName =~ s/.*:\s+//; $sciName =~ s/\s+\(.*//; } } elsif ($line =~ m/Taxid:/) { if ($taxId =~ m/notFound/) { ++$itemsFound; $taxId = $line; $taxId =~ s/.*:\s+//; } } } close (FH); + my $hubUrl = "https://hgdownload.soe.ucsc.edu/hubs/$accessionDir"; printf "\n", ++$rowCount; - printf "\n", $asmHubName, $asmId, $commonName; - printf " \n", $asmHubName, $asmId, $sciName; + printf "\n", $hubUrl, $asmId, $asmId, $commonName; + printf " \n", $hubUrl, $sciName; printf " \n", $gcPrefix, $asmAcc, $asmId; if ( $bioSample ne "notFound" ) { printf " \n", $bioSample, $bioSample; } else { printf " \n"; } printf " \n", $bioProject, $bioProject; - printf " \n", $asmDate; + printf " \n", $ncbiFtpLink, $asmDate; printf "\n"; } } # sub tableContents() ############################################################################## ### main() ############################################################################## open (FH, "<$srcDir/${commonNameOrder}") or die "can not read ${commonNameOrder}"; while (my $line = ) { chomp $line; my ($commonName, $asmId) = split('\t', $line); push @orderList, $asmId; ++$assemblyCount; }
count common name
link to genome browser
scientific name
and data download
NCBI assembly bioSamplebioProject assembly date,
source link
%d%s%s%s%s%s%sn/a%s%s%s