4257b7f552eb7d6bf044fc2edf27b462dd890c79 hiram Fri Feb 14 14:47:29 2020 -0800 now with new path names for download access refs #23891 diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl index 02d4502..e3a658e 100755 --- src/hg/utils/automation/asmHubGatewayPage.pl +++ src/hg/utils/automation/asmHubGatewayPage.pl @@ -151,32 +151,35 @@ # transform this path name into a chrom.sizes reference my $thisDir = `pwd`; chomp $thisDir; printf STDERR "# thisDir $thisDir\n"; my $ftpName = dirname($thisDir); my $asmId = basename($ftpName);; my $accessionDir = substr($asmId, 0 ,3); $accessionDir .= "/" . substr($asmId, 4 ,3); $accessionDir .= "/" . substr($asmId, 7 ,3); $accessionDir .= "/" . substr($asmId, 10 ,3); $accessionDir .= "/" . $asmId; my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3); +my $accessionId = sprintf("%s_%s", $gcXPrefix, $accession); my $newStyleUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), substr($accession,3,3), substr($accession,6,3), $asmId); +my $localDataUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), + substr($accession,3,3), substr($accession,6,3), $accessionId); $ftpName =~ s#/hive/data/outside/ncbi/##; $ftpName =~ s#/hive/data/inside/ncbi/##; $ftpName =~ s#/hive/data/genomes/asmHubs/##; printf STDERR "# ftpName $ftpName\n"; # my $urlDirectory = `basename $ftpName`; # chomp $urlDirectory; my $speciesSubgroup = $ftpName; my $asmType = "genbank"; $asmType = "refseq" if ( $speciesSubgroup =~ m#refseq/#); $speciesSubgroup =~ s#genomes/$asmType/##;; $speciesSubgroup =~ s#/.*##;; my %taxIdCommonName; # key is taxId, value is common name # from NCBI taxonomy database dump open (FH, "<$ENV{'HOME'}/kent/src/hg/utils/automation/genbank/taxId.comName.tab") or die "can not read taxId.comName.tab"; @@ -315,45 +318,45 @@ <b>Assembly FTP location:</b> <a href=\"ftp://ftp.ncbi.nlm.nih.gov/genomes/all/%s\" target=\"_blank\">%s</a><br> \n", $commonName, $orgName, $taxId, $taxId, $submitter, $asmDate, $descrAsmType, $asmLevel, $bioSample, $bioSample, $asmAccession, $asmAccession, $newStyleUrl, $newStyleUrl; chromSizes($chromSizes); printf "</p>\n<hr> <p> <b>Download files for this assembly hub:</b><br> To use the data from this assembly for a local hub instance at your institution, download these data as indicated by these instructions.<br> <br> To download this assembly data, use this <em>rsync</em> command: <pre> rsync -a -P \\ - rsync://$sourceServer/hubs/$newStyleUrl/ \\ - ./$asmId/ + rsync://$sourceServer/hubs/$localDataUrl/ \\ + ./$accessionId/ - which creates the local directory: ./$asmId/ + which creates the local directory: ./$accessionId/ </pre> or this <em>wget</em> command: <pre> - wget --timestamping -m -nH -x --cut-dirs=4 -e robots=off -np -k \\ - --reject \"index.html*\" -P \"$asmId\" \\ - https://$sourceServer/hubs/$newStyleUrl/ + wget --timestamping -m -nH -x --cut-dirs=6 -e robots=off -np -k \\ + --reject \"index.html*\" -P \"$accessionId\" \\ + https://$sourceServer/hubs/$localDataUrl/ - which creates a local directory: ./$asmId/ + which creates a local directory: ./$accessionId/ </pre> <p> -There is an included <em>$asmId.hub.txt</em> file in that download +There is an included <em>hub.txt</em> file in that download data directory to use for your local track hub instance.<br> Using the genome browser menus: <em><strong>My Data</strong> -> <strong>Track Hubs</strong></em><br> select the <em><strong>My Hubs</strong></em> tab to enter a URL to this hub.txt file to attach this assembly hub to a genome browser. </p> <p> The <em>html/$asmId.description.html</em> page is information for your users to describe this assembly. This WEB page with these instructions is an instance of html/$asmId.description.html file. </p> <p> See also: <a href='/goldenPath/help/hgTrackHubHelp.html' target=_blank>track hub help</a> documentation.<br> </p>\n"; printf "<hr> @@ -361,31 +364,31 @@ To operate a blat server on this assembly, in the directory where you have the <em>$asmId.2bit</em> file: <pre> gfServer -log=$asmId.gfServer.trans.log -ipLog -canStop start \\ yourserver.domain.edu 76543 -trans -mask $asmId.2bit & gfServer -log=$asmId.gfServer.log -ipLog -canStop start \\ yourserver.domain.edu 76542 -stepSize=5 $asmId.2bit & </pre> Adjust the port numbers <em>76543</em> <em>76542</em> and the <em>yourserver.domain.edu</em> for your local circumstances.<br> Typically, port numbers in the range <em>49152</em> to <em>65535</em> are available for private use as in this case. See also: <a href='https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml' target=_blank>IANA.org</a> port registry. </p> <p> -Enter the following specifications in your <em>$asmId.genomes.txt</em> file: +Enter the following specifications in your <em>genomes.txt</em> file: <pre> transBlat yourserver.domain.edu 76543 blat yourserver.domain.edu 76542 </pre> See also: <a href=\"https://genome.ucsc.edu/goldenPath/help/hubQuickStartAssembly.html#blat\" target=_blank>Blat for an Assembly Hub</a> </p>\n"; printf "<hr> <p> <b>Search the assembly:</b> <ul> <li> <b>By position or search term: </b> Use the "position or search term" box to find areas of the genome associated with many different attributes, such