a4b95829492ef38feb9369fecd8a0451d35c4103 hiram Tue Jan 21 13:21:23 2020 -0800 now using the new style GCF path names refs #24748 diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl index b1e9087..02d4502 100755 --- src/hg/utils/automation/asmHubGatewayPage.pl +++ src/hg/utils/automation/asmHubGatewayPage.pl @@ -144,30 +144,36 @@ if ( -s $jpgImage ) { $imageSize = `identify $jpgImage | awk '{print \$3}'`; chomp $imageSize; ($imageWidth, $imageHeight) = split('x', $imageSize); $imageName = basename($jpgImage); } } # transform this path name into a chrom.sizes reference my $thisDir = `pwd`; chomp $thisDir; printf STDERR "# thisDir $thisDir\n"; my $ftpName = dirname($thisDir); my $asmId = basename($ftpName);; +my $accessionDir = substr($asmId, 0 ,3); +$accessionDir .= "/" . substr($asmId, 4 ,3); +$accessionDir .= "/" . substr($asmId, 7 ,3); +$accessionDir .= "/" . substr($asmId, 10 ,3); +$accessionDir .= "/" . $asmId; + my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3); my $newStyleUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), substr($accession,3,3), substr($accession,6,3), $asmId); $ftpName =~ s#/hive/data/outside/ncbi/##; $ftpName =~ s#/hive/data/inside/ncbi/##; $ftpName =~ s#/hive/data/genomes/asmHubs/##; printf STDERR "# ftpName $ftpName\n"; # my $urlDirectory = `basename $ftpName`; # chomp $urlDirectory; my $speciesSubgroup = $ftpName; my $asmType = "genbank"; $asmType = "refseq" if ( $speciesSubgroup =~ m#refseq/#); $speciesSubgroup =~ s#genomes/$asmType/##;; $speciesSubgroup =~ s#/.*##;; @@ -268,123 +274,125 @@ printf STDERR "%s\t", $asmName; printf STDERR "%s\t", $orgName; printf STDERR "%s\t", $bioSample; printf STDERR "%s\t", $descrAsmType; printf STDERR "%s\t", $asmLevel; printf STDERR "%s\t", $asmDate; printf STDERR "%s\n", $asmAccession; # printf "<script type='text/javascript'>var asmId='%s';</script>\n", $asmId; if (length($imageName)) { printf "<!-- Display image in righthand corner --> <table align=right border=0 width=%d height=%d> <tr><td align=RIGHT><a href=\"https://www.ncbi.nlm.nih.gov/assembly/%s\" target=_blank> - <img src=\"https://%s/hubs/%s/genomes/%s/html/%s\" width=%d height=%d alt=\"%s\"></a> + <img src=\"https://%s/hubs/%s/html/%s\" width=%d height=%d alt=\"%s\"></a> </td></tr> <tr><td align=right> <font size=-1> <em>%s</em><BR> </font> <font size=-2> (Photo courtesy of <a href=\"%s\" target=_blank>%s</a>) </font> </td></tr> </table> -\n", $imageWidth+$imageWidthBorder, $imageHeight, $asmAccession, $sourceServer, $asmHubName, $asmId, $imageName, $imageWidth, $imageHeight, $commonName, $orgName, $photoCreditURL, $photoCreditName; +\n", $imageWidth+$imageWidthBorder, $imageHeight, $asmAccession, $sourceServer, $accessionDir, $imageName, $imageWidth, $imageHeight, $commonName, $orgName, $photoCreditURL, $photoCreditName; } my $sciNameUnderscore = $orgName; $sciNameUnderscore =~ s/ /_/g; $sciNameUnderscore = "Strigops_habroptilus" if ($orgName =~ m/Strigops habroptila/); printf "<p> <b>Common name:</b> %s<br> <b>Taxonomic name: %s, taxonomy ID:</b> <a href='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=%s' target='_blank'> %s</a><br> <b>Sequencing/Assembly provider ID:</b> %s<br> <b>Assembly date:</b> %s<br> <b>Assembly type:</b> %s<br> <b>Assembly level:</b> %s<br> <b>Biosample:</b> <a href=\"https://www.ncbi.nlm.nih.gov/biosample/?term=%s\" target=\"_blank\">%s</a><br> <b>Assembly accession ID:</b> <a href=\"https://www.ncbi.nlm.nih.gov/assembly/%s\" target=\"_blank\">%s</a><br> <b>Assembly FTP location:</b> <a href=\"ftp://ftp.ncbi.nlm.nih.gov/genomes/all/%s\" target=\"_blank\">%s</a><br> \n", $commonName, $orgName, $taxId, $taxId, $submitter, $asmDate, $descrAsmType, $asmLevel, $bioSample, $bioSample, $asmAccession, $asmAccession, $newStyleUrl, $newStyleUrl; chromSizes($chromSizes); printf "</p>\n<hr> <p> <b>Download files for this assembly hub:</b><br> To use the data from this assembly for a local hub instance at your institution, download these data as indicated by these instructions.<br> -See also: <a href='/goldenPath/help/hgTrackHubHelp.html' target=_blank>track hub help</a> documentation.<br> <br> To download this assembly data, use this <em>rsync</em> command: <pre> rsync -a -P \\ - rsync://$sourceServer/hubs/$asmHubName/genomes/$asmId/ \\ + rsync://$sourceServer/hubs/$newStyleUrl/ \\ ./$asmId/ which creates the local directory: ./$asmId/ </pre> or this <em>wget</em> command: <pre> wget --timestamping -m -nH -x --cut-dirs=4 -e robots=off -np -k \\ --reject \"index.html*\" -P \"$asmId\" \\ - https://$sourceServer/hubs/$asmHubName/genomes/$asmId/ + https://$sourceServer/hubs/$newStyleUrl/ which creates a local directory: ./$asmId/ </pre> -<br> -There is an included $asmId.genomes.txt file in that download -data to use for your local track hub instance.<br> -You will need to add a hub.txt file to point to this genomes.txt file.<br> -Something like: -<pre> -hub myLocalHub -shortLabel myLocalHub -longLabel genome assembly $asmId -genomesFile $asmId.genomes.txt -email yourEmail\@yourdomain.edu -descriptionUrl html/$asmId.description.html -</pre> +<p> +There is an included <em>$asmId.hub.txt</em> file in that download +data directory to use for your local track hub instance.<br> +Using the genome browser menus: <em><strong>My Data</strong> -> <strong>Track Hubs</strong></em><br> +select the <em><strong>My Hubs</strong></em> tab to enter a URL +to this hub.txt file to attach this assembly hub to a genome browser. +</p> +<p> The <em>html/$asmId.description.html</em> page is information for your users to describe this assembly. This WEB page with these instructions is an instance of html/$asmId.description.html file. +</p> +<p> +See also: <a href='/goldenPath/help/hgTrackHubHelp.html' target=_blank>track hub help</a> documentation.<br> </p>\n"; printf "<hr> <p> To operate a blat server on this assembly, in the directory where you have -the $asmId.2bit file: +the <em>$asmId.2bit</em> file: <pre> -gfServer -log=%s.gfServer.trans.log -ipLog -canStop start \\ - yourserver.domain.edu 76543 -trans -mask %s.2bit & -gfServer -log=%s.gfServer.log -ipLog -canStop start \\ - yourserver.domain.edu 76542 -stepSize=5 %s.2bit & +gfServer -log=$asmId.gfServer.trans.log -ipLog -canStop start \\ + yourserver.domain.edu 76543 -trans -mask $asmId.2bit & +gfServer -log=$asmId.gfServer.log -ipLog -canStop start \\ + yourserver.domain.edu 76542 -stepSize=5 $asmId.2bit & </pre> Adjust the port numbers <em>76543</em> <em>76542</em> and the <em>yourserver.domain.edu</em> for your local circumstances.<br> -Enter the following specifications in your genomes.txt file: +Typically, port numbers in the range <em>49152</em> to <em>65535</em> +are available for private use as in this case. +See also: <a href='https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml' target=_blank>IANA.org</a> port registry. +</p> +<p> +Enter the following specifications in your <em>$asmId.genomes.txt</em> file: <pre> transBlat yourserver.domain.edu 76543 blat yourserver.domain.edu 76542 </pre> See also: <a href=\"https://genome.ucsc.edu/goldenPath/help/hubQuickStartAssembly.html#blat\" target=_blank>Blat for an Assembly Hub</a> -</p>\n", $asmId, $asmId, $asmId, $asmId; +</p>\n"; printf "<hr> <p> <b>Search the assembly:</b> <ul> <li> <b>By position or search term: </b> Use the "position or search term" box to find areas of the genome associated with many different attributes, such as a specific chromosomal coordinate range; mRNA, EST, or STS marker names; or keywords from the GenBank description of an mRNA. <a href=\"http://genome.ucsc.edu/goldenPath/help/query.html\">More information</a>, including sample queries.</li> <li> <b>By gene name: </b> Type a gene name into the "search term" box, choose your gene from the drop-down list, then press "submit" to go directly to the assembly location associated with that gene.