a4b95829492ef38feb9369fecd8a0451d35c4103
hiram
  Tue Jan 21 13:21:23 2020 -0800
now using the new style GCF path names refs #24748

diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl
index b1e9087..02d4502 100755
--- src/hg/utils/automation/asmHubGatewayPage.pl
+++ src/hg/utils/automation/asmHubGatewayPage.pl
@@ -144,30 +144,36 @@
   if ( -s $jpgImage ) {
     $imageSize = `identify $jpgImage | awk '{print \$3}'`;
     chomp $imageSize;
     ($imageWidth, $imageHeight) = split('x', $imageSize);
     $imageName = basename($jpgImage);
   }
 }
 
 # transform this path name into a chrom.sizes reference
 
 my $thisDir = `pwd`;
 chomp $thisDir;
 printf STDERR "# thisDir $thisDir\n";
 my $ftpName = dirname($thisDir);
 my $asmId = basename($ftpName);;
+my $accessionDir = substr($asmId, 0 ,3);
+$accessionDir .= "/" . substr($asmId, 4 ,3);
+$accessionDir .= "/" . substr($asmId, 7 ,3);
+$accessionDir .= "/" . substr($asmId, 10 ,3);
+$accessionDir .= "/" . $asmId;
+
 my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3);
 my $newStyleUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3),
    substr($accession,3,3), substr($accession,6,3), $asmId);
 $ftpName =~ s#/hive/data/outside/ncbi/##;
 $ftpName =~ s#/hive/data/inside/ncbi/##;
 $ftpName =~ s#/hive/data/genomes/asmHubs/##;
 printf STDERR "# ftpName $ftpName\n";
 # my $urlDirectory = `basename $ftpName`;
 # chomp $urlDirectory;
 my $speciesSubgroup = $ftpName;
 my $asmType = "genbank";
 $asmType = "refseq" if ( $speciesSubgroup =~ m#refseq/#);
 $speciesSubgroup =~ s#genomes/$asmType/##;;
 $speciesSubgroup =~ s#/.*##;;
 
@@ -268,123 +274,125 @@
 printf STDERR "%s\t", $asmName;
 printf STDERR "%s\t", $orgName;
 printf STDERR "%s\t", $bioSample;
 printf STDERR "%s\t", $descrAsmType;
 printf STDERR "%s\t", $asmLevel;
 printf STDERR "%s\t", $asmDate;
 printf STDERR "%s\n", $asmAccession;
 
 # printf "<script type='text/javascript'>var asmId='%s';</script>\n", $asmId;
 
 if (length($imageName)) {
 printf "<!-- Display image in righthand corner -->
 <table align=right border=0 width=%d height=%d>
   <tr><td align=RIGHT><a href=\"https://www.ncbi.nlm.nih.gov/assembly/%s\"
     target=_blank>
-    <img src=\"https://%s/hubs/%s/genomes/%s/html/%s\" width=%d height=%d alt=\"%s\"></a>
+    <img src=\"https://%s/hubs/%s/html/%s\" width=%d height=%d alt=\"%s\"></a>
   </td></tr>
   <tr><td align=right>
     <font size=-1> <em>%s</em><BR>
     </font>
     <font size=-2> (Photo courtesy of
       <a href=\"%s\" target=_blank>%s</a>)
     </font>
   </td></tr>
 </table>
-\n", $imageWidth+$imageWidthBorder, $imageHeight, $asmAccession, $sourceServer, $asmHubName, $asmId, $imageName, $imageWidth, $imageHeight, $commonName, $orgName, $photoCreditURL, $photoCreditName;
+\n", $imageWidth+$imageWidthBorder, $imageHeight, $asmAccession, $sourceServer, $accessionDir, $imageName, $imageWidth, $imageHeight, $commonName, $orgName, $photoCreditURL, $photoCreditName;
 }
 
 my $sciNameUnderscore = $orgName;
 $sciNameUnderscore =~ s/ /_/g;
 $sciNameUnderscore = "Strigops_habroptilus" if ($orgName =~ m/Strigops habroptila/);
 
 printf "<p>
 <b>Common name:</b>&nbsp;%s<br>
 <b>Taxonomic name: %s, taxonomy ID:</b> <a href='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=%s' target='_blank'> %s</a><br>
 <b>Sequencing/Assembly provider ID:</b> %s<br>
 <b>Assembly date:</b> %s<br>
 <b>Assembly type:</b> %s<br>
 <b>Assembly level:</b> %s<br>
 <b>Biosample:</b> <a href=\"https://www.ncbi.nlm.nih.gov/biosample/?term=%s\" target=\"_blank\">%s</a><br>
 <b>Assembly accession ID:</b> <a href=\"https://www.ncbi.nlm.nih.gov/assembly/%s\" target=\"_blank\">%s</a><br>
 <b>Assembly FTP location:</b> <a href=\"ftp://ftp.ncbi.nlm.nih.gov/genomes/all/%s\" target=\"_blank\">%s</a><br>
 \n", $commonName, $orgName, $taxId, $taxId, $submitter, $asmDate, $descrAsmType,
   $asmLevel, $bioSample, $bioSample, $asmAccession, $asmAccession, $newStyleUrl, $newStyleUrl;
 
 chromSizes($chromSizes);
 
 printf "</p>\n<hr>
 <p>
 <b>Download files for this assembly hub:</b><br>
 To use the data from this assembly for a local hub instance at your
 institution, download these data as indicated by these instructions.<br>
-See also: <a href='/goldenPath/help/hgTrackHubHelp.html' target=_blank>track hub help</a> documentation.<br>
 <br>
 To download this assembly data, use this <em>rsync</em> command:
 <pre>
   rsync -a -P \\
-    rsync://$sourceServer/hubs/$asmHubName/genomes/$asmId/ \\
+    rsync://$sourceServer/hubs/$newStyleUrl/ \\
       ./$asmId/
 
   which creates the local directory: ./$asmId/
 </pre>
 or this <em>wget</em> command:
 <pre>
   wget --timestamping -m -nH -x --cut-dirs=4 -e robots=off -np -k \\
     --reject \"index.html*\" -P \"$asmId\" \\
-       https://$sourceServer/hubs/$asmHubName/genomes/$asmId/
+       https://$sourceServer/hubs/$newStyleUrl/
 
   which creates a local directory: ./$asmId/
 </pre>
-<br>
-There is an included $asmId.genomes.txt file in that download
-data to use for your local track hub instance.<br>
-You will need to add a hub.txt file to point to this genomes.txt file.<br>
-Something like:
-<pre>
-hub myLocalHub
-shortLabel myLocalHub
-longLabel genome assembly $asmId
-genomesFile $asmId.genomes.txt
-email yourEmail\@yourdomain.edu
-descriptionUrl html/$asmId.description.html
-</pre>
+<p>
+There is an included <em>$asmId.hub.txt</em> file in that download
+data directory to use for your local track hub instance.<br>
+Using the genome browser menus: <em><strong>My Data</strong> -&gt; <strong>Track Hubs</strong></em><br>
+select the <em><strong>My Hubs</strong></em> tab to enter a URL
+to this hub.txt file to attach this assembly hub to a genome browser.
+</p>
+<p>
 The <em>html/$asmId.description.html</em> page is information for your users to
 describe this assembly.  This WEB page with these instructions
 is an instance of html/$asmId.description.html file.
+</p>
+<p>
+See also: <a href='/goldenPath/help/hgTrackHubHelp.html' target=_blank>track hub help</a> documentation.<br>
 </p>\n";
 
 printf "<hr>
 <p>
 To operate a blat server on this assembly, in the directory where you have
-the $asmId.2bit file:
+the <em>$asmId.2bit</em> file:
 <pre>
-gfServer -log=%s.gfServer.trans.log -ipLog -canStop start \\
-    yourserver.domain.edu 76543 -trans -mask %s.2bit &
-gfServer -log=%s.gfServer.log -ipLog -canStop start \\
-    yourserver.domain.edu 76542 -stepSize=5 %s.2bit &
+gfServer -log=$asmId.gfServer.trans.log -ipLog -canStop start \\
+    yourserver.domain.edu 76543 -trans -mask $asmId.2bit &
+gfServer -log=$asmId.gfServer.log -ipLog -canStop start \\
+    yourserver.domain.edu 76542 -stepSize=5 $asmId.2bit &
 </pre>
 Adjust the port numbers <em>76543</em> <em>76542</em> and the
 <em>yourserver.domain.edu</em> for your local circumstances.<br>
-Enter the following specifications in your genomes.txt file:
+Typically, port numbers in the range <em>49152</em> to <em>65535</em>
+are available for private use as in this case.
+See also: <a href='https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml' target=_blank>IANA.org</a> port registry.
+</p>
+<p>
+Enter the following specifications in your <em>$asmId.genomes.txt</em> file:
 <pre>
 transBlat yourserver.domain.edu 76543
 blat yourserver.domain.edu 76542
 </pre>
 See also: <a href=\"https://genome.ucsc.edu/goldenPath/help/hubQuickStartAssembly.html#blat\"
 target=_blank>Blat for an Assembly Hub</a>
-</p>\n", $asmId, $asmId, $asmId, $asmId;
+</p>\n";
 
 printf "<hr>
 <p>
 <b>Search the assembly:</b>
 <ul>
 <li>
 <b>By position or search term: </b> Use the &quot;position or search term&quot;
 box to find areas of the genome associated with many different attributes, such
 as a specific chromosomal coordinate range; mRNA, EST, or STS marker names; or
 keywords from the GenBank description of an mRNA.
 <a href=\"http://genome.ucsc.edu/goldenPath/help/query.html\">More information</a>, including sample queries.</li>
 <li>
 <b>By gene name: </b> Type a gene name into the &quot;search term&quot; box,
 choose your gene from the drop-down list, then press &quot;submit&quot; to go
 directly to the assembly location associated with that gene.