2248d4012416d220927d54ea0c47295560792625 hiram Mon Sep 12 22:06:58 2022 -0700 now working through gatewayPage although it needs to have URLs for downloads adjusted refs #29811 diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl index 2ba5366..97c0994 100755 --- src/hg/utils/automation/asmHubGatewayPage.pl +++ src/hg/utils/automation/asmHubGatewayPage.pl @@ -16,31 +16,30 @@ my @months = qw( 0 Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ); sub usage() { printf STDERR "usage: asmHubGatewayPage.pl /*assembly_report.txt /asmId.chrom.sizes /image.jpg /photoCredits.txt\n"; printf STDERR "output is to stdout, redirect to file: > description.html\n"; printf STDERR "photoCredits.txt is a two line tagstring file:\n"; printf STDERR "tags: photoCreditURL and photoCreditName\n"; printf STDERR "use string 'noPhoto' for image and credits when no photo\n"; printf STDERR "stderr output is routed to a 'asmId.names.tab' file for use elsewhere\n"; exit 255; } sub chromSizes($) { my ($sizeFile) = @_; if ( -s $sizeFile ) { - printf STDERR "# reading chrom.sizes file:\n#\t'%s\'\n", $sizeFile; my $ix = 0; my $contigCount = 0; my %sizes; # key is contigName, value is size if ($sizeFile eq "stdin") { while (my $line = <>) { next if ($line =~ m/^\s*#/); ++$contigCount; chomp ($line); my ($name, $size, $rest) = split('\s+', $line, 3); my $key = sprintf("%s_X_%d", $name, $ix++); $sizes{$key} = $size; } } else { @@ -122,82 +121,79 @@ } } my $buildDir = dirname($chromSizes); my $genesDir = "$buildDir/genes"; my $photoCreditURL = ""; my $photoCreditName = ""; my $imageSize = ""; my $imageName = ""; my $imageWidth = 0; my $imageHeight = 0; my $imageWidthBorder = 15; if ($jpgImage ne "noPhoto") { - printf STDERR "# reading $photoCredits\n"; open (FH, "<$photoCredits") or die "can not read $photoCredits"; while (my $line = ) { chomp $line; next if ($line =~ m/^#/); next if (length($line) < 2); my ($tag, $value) = split('\t', $line); if ($tag =~ m/photoCreditURL/) { $photoCreditURL = $value; } elsif ($tag =~ m/photoCreditName/) { $photoCreditName = $value; } } close (FH); if ( -s $jpgImage ) { $imageSize = `identify $jpgImage | awk '{print \$3}'`; chomp $imageSize; ($imageWidth, $imageHeight) = split('x', $imageSize); $imageName = basename($jpgImage); } } # transform this path name into a chrom.sizes reference my $thisDir = `pwd`; chomp $thisDir; my $ftpName = dirname($thisDir); -my $asmId = basename($ftpName);; +my $asmId = basename($asmReport); +$asmId =~ s/_assembly_report.txt//; my ($gcXPrefix, $accession, $rest) = split('_', $asmId, 3); my $accessionId = sprintf("%s_%s", $gcXPrefix, $accession); my $accessionDir = substr($asmId, 0 ,3); $accessionDir .= "/" . substr($asmId, 4 ,3); $accessionDir .= "/" . substr($asmId, 7 ,3); $accessionDir .= "/" . substr($asmId, 10 ,3); $accessionDir .= "/" . $accessionId; my $newStyleUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), substr($accession,3,3), substr($accession,6,3), $asmId); my $localDataUrl = sprintf("%s/%s/%s/%s/%s", $gcXPrefix, substr($accession,0,3), substr($accession,3,3), substr($accession,6,3), $accessionId); $ftpName =~ s#/hive/data/outside/ncbi/##; $ftpName =~ s#/hive/data/inside/ncbi/##; $ftpName =~ s#/hive/data/genomes/asmHubs/##; # my $urlDirectory = `basename $ftpName`; # chomp $urlDirectory; -my $speciesSubgroup = $ftpName; my $asmType = "genbank"; $asmType = "refseq" if ( $gcXPrefix =~ m#GCF#); -$speciesSubgroup =~ s#genomes/$asmType/##;; -$speciesSubgroup =~ s#/.*##;; my %taxIdCommonName; # key is taxId, value is common name # from NCBI taxonomy database dump open (FH, "<$ENV{'HOME'}/kent/src/hg/utils/automation/genbank/taxId.comName.tab") or die "can not read taxId.comName.tab"; while (my $line = ) { chomp $line; my ($taxId, $comName) = split('\t', $line); $taxIdCommonName{$taxId} = $comName; } close (FH); my $submitter = "(n/a)"; my $asmName = "(n/a)"; my $orgName = "(n/a)";