186ce9edc8f61d7ad4c8b2e86437a81e29d77ccd hiram Wed Feb 26 14:41:28 2020 -0800 correctly locate buildDIr with accessionId vs asmId refs #23891 diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl index e3a658e..5024ee8 100755 --- src/hg/utils/automation/asmHubGatewayPage.pl +++ src/hg/utils/automation/asmHubGatewayPage.pl @@ -1,34 +1,38 @@ #!/usr/bin/env perl use strict; use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; +### XXX ### temporary hgdownload-test.gi +### my $sourceServer = "hgdownload-test.gi.ucsc.edu"; + my $sourceServer = "hgdownload.soe.ucsc.edu"; my @months = qw( 0 Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ); sub usage() { printf STDERR "usage: asmHubGatewayPage.pl <asmHubName> <pathTo>/*assembly_report.txt <pathTo>/asmId.chrom.sizes <pathTo>/image.jpg <pathTo>/photoCredits.txt\n"; printf STDERR "output is to stdout, redirect to file: > description.html\n"; printf STDERR "photoCredits.txt is a two line tag<tab>string file:\n"; printf STDERR "tags: photoCreditURL and photoCreditName\n"; printf STDERR "use string 'noPhoto' for image and credits when no photo\n"; + printf STDERR "stderr output is routed to a 'asmId.names.tab' file for use elsewhere\n"; exit 255; } sub chromSizes($) { my ($sizeFile) = @_; if ( -s $sizeFile ) { printf STDERR "# reading chrom.sizes file:\n#\t'%s\'\n", $sizeFile; my $ix = 0; my $contigCount = 0; my %sizes; # key is contigName, value is size if ($sizeFile eq "stdin") { while (my $line = <>) { next if ($line =~ m/^\s*#/); @@ -141,49 +145,48 @@ } close (FH); if ( -s $jpgImage ) { $imageSize = `identify $jpgImage | awk '{print \$3}'`; chomp $imageSize; ($imageWidth, $imageHeight) = split('x', $imageSize); $imageName = basename($jpgImage); } } # transform this path name into a chrom.sizes reference my $thisDir = `pwd`; chomp $thisDir; -printf STDERR "# thisDir $thisDir\n"; my $ftpName = dirname($thisDir); my $asmId = basename($ftpName);; +my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3); +my $accessionId = sprintf("%s_%s", $gcXPrefix, $accession); + my $accessionDir = substr($asmId, 0 ,3); $accessionDir .= "/" . substr($asmId, 4 ,3); $accessionDir .= "/" . substr($asmId, 7 ,3); $accessionDir .= "/" . substr($asmId, 10 ,3); -$accessionDir .= "/" . $asmId; +$accessionDir .= "/" . $accessionId; -my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3); -my $accessionId = sprintf("%s_%s", $gcXPrefix, $accession); my $newStyleUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), substr($accession,3,3), substr($accession,6,3), $asmId); my $localDataUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), substr($accession,3,3), substr($accession,6,3), $accessionId); $ftpName =~ s#/hive/data/outside/ncbi/##; $ftpName =~ s#/hive/data/inside/ncbi/##; $ftpName =~ s#/hive/data/genomes/asmHubs/##; -printf STDERR "# ftpName $ftpName\n"; # my $urlDirectory = `basename $ftpName`; # chomp $urlDirectory; my $speciesSubgroup = $ftpName; my $asmType = "genbank"; $asmType = "refseq" if ( $speciesSubgroup =~ m#refseq/#); $speciesSubgroup =~ s#genomes/$asmType/##;; $speciesSubgroup =~ s#/.*##;; my %taxIdCommonName; # key is taxId, value is common name # from NCBI taxonomy database dump open (FH, "<$ENV{'HOME'}/kent/src/hg/utils/automation/genbank/taxId.comName.tab") or die "can not read taxId.comName.tab"; while (my $line = <FH>) { chomp $line; my ($taxId, $comName) = split('\t', $line); $taxIdCommonName{$taxId} = $comName;