27a61987f2692b2fad37a9faada47b927f568e1e hiram Fri May 24 10:11:44 2024 -0700 correct ftp to https for references to ftp.ncbi.nih.gov no redmine diff --git src/hg/utils/automation/asmHubHtml.pl src/hg/utils/automation/asmHubHtml.pl index 5a27bc8..7a36cf7 100755 --- src/hg/utils/automation/asmHubHtml.pl +++ src/hg/utils/automation/asmHubHtml.pl @@ -1,107 +1,107 @@ #!/usr/bin/env perl use strict; use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 2) { printf STDERR "usage: hubHtml.pl / >> hubIndex.html\n"; printf STDERR "will look for files //html/.names.tab and\n"; printf STDERR "/html/.build.stats.txt\n"; exit 255; } my $hubLink = shift; my $baseName = shift; my $asmId = basename($baseName); my $ftpName = dirname($baseName); $ftpName =~ s#/hive/data/inside/ncbi/##; my $namesFile = "$baseName/html/$asmId.names.tab"; my $statsFile = "$baseName/html/$asmId.build.stats.txt"; my $faCountFile = "$baseName/$asmId.faCount.signature.txt"; my $geneStatsFile = "$baseName.ncbiGene.ncbi.stats.txt"; my $contigCount = 0; my $genomeSize = 0; my $n50 = 0; my $totalNucleotides = 0; my $adenine = 0; my $cytosine = 0; my $guanine = 0; my $thymine = 0; my $gapsN = 0; my $CpG = 0; my $gcContent = 0; my $NperCent = 0; # single line file, three numbers: contigCount genomeSize N50 open (FH, "<$statsFile") or die "can not read $statsFile"; while (my $line = ) { chomp $line; ($contigCount, $genomeSize, $n50) = split('\s+', $line); } close (FH); # single line file, the last line, the 'total' line output of faCount: # #seq len A C G T N cpg # total 16569 5124 5181 2169 4094 1 435 my $geneCount = 0; my $genePercentCoverage = 0; my $geneBasesCovered = 0; if ( -s $geneStatsFile ) { my $geneStats=`cat $geneStatsFile | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; ($geneCount, $geneBasesCovered) = split('\s+', $geneStats); $genePercentCoverage = 0; if ($genomeSize > 0) { $genePercentCoverage = sprintf("%.3f", 100.0 * $geneBasesCovered/$genomeSize); } } open (FH, "grep '^total' $faCountFile 2> /dev/null|tail -1|") or die "can not read $faCountFile"; while (my $line = ) { chomp $line; (undef, $totalNucleotides, $adenine, $cytosine, $guanine, $thymine, $gapsN, $CpG) = split('\s+', $line); $gcContent = 100.0*($cytosine+$guanine)/$totalNucleotides if ($totalNucleotides > 0); $NperCent = 100.0*$gapsN/$totalNucleotides if($totalNucleotides > 0); } close (FH); my $hubText = "hub.txt"; # first line is a comment, second line is the set of names data open (FH, "grep -v '^#' $namesFile|") or die "can not read $namesFile"; while (my $line = ) { chomp $line; my ($taxId, $commonName, $submitter, $asmName, $sciName, $bioSample, $asmType, $asmLevel, $asmDate, $asmAccession) = split('\t', $line); $asmDate =~ s/ / /g; $sciName =~ s/ *\(.*//; $sciName =~ s/ / /g; printf " %d", $taxId, $taxId; printf "%s", $asmDate; printf "%s", $hubLink, $hubText, $asmAccession, $asmName, $commonName; printf "%s", $sciName; if ($bioSample ne "(n/a)") { printf " %s", $bioSample, $bioSample; } else { printf "(n/a)"; } printf "%s", &AsmHub::commify($contigCount); printf "%s", &AsmHub::commify($genomeSize); printf "%s", &AsmHub::commify($n50); printf "%% %.2f", $gcContent; printf "%s
%% %.2f", &AsmHub::commify($gapsN), $NperCent; printf "%s
%s
%% %.2f", &AsmHub::commify($geneCount), &AsmHub::commify($geneBasesCovered), $genePercentCoverage; printf "%s", $asmAccession, $asmAccession; - printf "%s", $ftpName, $asmName; + printf "%s", $ftpName, $asmName; printf "%s", $asmType; printf "%s", $asmLevel; printf "%s\n", $submitter; } close (FH);