186ce9edc8f61d7ad4c8b2e86437a81e29d77ccd
hiram
  Wed Feb 26 14:41:28 2020 -0800
correctly locate buildDIr with accessionId vs asmId refs #23891

diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl
index e3a658e..5024ee8 100755
--- src/hg/utils/automation/asmHubGatewayPage.pl
+++ src/hg/utils/automation/asmHubGatewayPage.pl
@@ -1,34 +1,38 @@
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 use FindBin qw($Bin);
 use lib "$Bin";
 use AsmHub;
 use File::Basename;
 
+### XXX ### temporary hgdownload-test.gi
+### my $sourceServer = "hgdownload-test.gi.ucsc.edu";
+
 my $sourceServer = "hgdownload.soe.ucsc.edu";
 
 my @months = qw( 0 Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
 
 sub usage() {
   printf STDERR "usage: asmHubGatewayPage.pl <asmHubName> <pathTo>/*assembly_report.txt <pathTo>/asmId.chrom.sizes <pathTo>/image.jpg <pathTo>/photoCredits.txt\n";
   printf STDERR "output is to stdout, redirect to file: > description.html\n";
   printf STDERR "photoCredits.txt is a two line tag<tab>string file:\n";
   printf STDERR "tags: photoCreditURL and photoCreditName\n";
   printf STDERR "use string 'noPhoto' for image and credits when no photo\n";
+  printf STDERR "stderr output is routed to a 'asmId.names.tab' file for use elsewhere\n";
   exit 255;
 }
 
 sub chromSizes($) {
   my ($sizeFile) = @_;
   if ( -s $sizeFile ) {
     printf STDERR "# reading chrom.sizes file:\n#\t'%s\'\n", $sizeFile;
     my $ix = 0;
     my $contigCount = 0;
 
     my %sizes;	# key is contigName, value is size
 
     if ($sizeFile eq "stdin") {
 	while (my $line = <>) {
 	    next if ($line =~ m/^\s*#/);
@@ -141,49 +145,48 @@
   }
   close (FH);
 
   if ( -s $jpgImage ) {
     $imageSize = `identify $jpgImage | awk '{print \$3}'`;
     chomp $imageSize;
     ($imageWidth, $imageHeight) = split('x', $imageSize);
     $imageName = basename($jpgImage);
   }
 }
 
 # transform this path name into a chrom.sizes reference
 
 my $thisDir = `pwd`;
 chomp $thisDir;
-printf STDERR "# thisDir $thisDir\n";
 my $ftpName = dirname($thisDir);
 my $asmId = basename($ftpName);;
+my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3);
+my $accessionId = sprintf("%s_%s", $gcXPrefix, $accession);
+
 my $accessionDir = substr($asmId, 0 ,3);
 $accessionDir .= "/" . substr($asmId, 4 ,3);
 $accessionDir .= "/" . substr($asmId, 7 ,3);
 $accessionDir .= "/" . substr($asmId, 10 ,3);
-$accessionDir .= "/" . $asmId;
+$accessionDir .= "/" . $accessionId;
 
-my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3);
-my $accessionId = sprintf("%s_%s", $gcXPrefix, $accession);
 my $newStyleUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3),
    substr($accession,3,3), substr($accession,6,3), $asmId);
 my $localDataUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3),
    substr($accession,3,3), substr($accession,6,3), $accessionId);
 $ftpName =~ s#/hive/data/outside/ncbi/##;
 $ftpName =~ s#/hive/data/inside/ncbi/##;
 $ftpName =~ s#/hive/data/genomes/asmHubs/##;
-printf STDERR "# ftpName $ftpName\n";
 # my $urlDirectory = `basename $ftpName`;
 # chomp $urlDirectory;
 my $speciesSubgroup = $ftpName;
 my $asmType = "genbank";
 $asmType = "refseq" if ( $speciesSubgroup =~ m#refseq/#);
 $speciesSubgroup =~ s#genomes/$asmType/##;;
 $speciesSubgroup =~ s#/.*##;;
 
 my %taxIdCommonName;  # key is taxId, value is common name
                       # from NCBI taxonomy database dump
 open (FH, "<$ENV{'HOME'}/kent/src/hg/utils/automation/genbank/taxId.comName.tab") or die "can not read taxId.comName.tab";
 while (my $line = <FH>) {
   chomp $line;
   my ($taxId, $comName) = split('\t', $line);
   $taxIdCommonName{$taxId} = $comName;