92509db0231e9b1ae83aa0c85182883dd8eabc9b hiram Wed Sep 14 13:59:35 2022 -0700 fixup chainNet trackDb and now sending files to dynablat and arranging hub files to place it in use refs #29811 diff --git src/hg/utils/automation/asmHubNcbiGene.pl src/hg/utils/automation/asmHubNcbiGene.pl index dabe90f..fbc0b3b 100755 --- src/hg/utils/automation/asmHubNcbiGene.pl +++ src/hg/utils/automation/asmHubNcbiGene.pl @@ -1,81 +1,85 @@ #!/usr/bin/env perl use strict; use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); -if ($argc != 3) { - printf STDERR "usage: asmHubNcbiGene.pl asmId asmId.names.tab .../trackData/\n"; +if ($argc != 4) { + printf STDERR "usage: asmHubNcbiGene.pl asmId ncbiAsmId asmId.names.tab .../trackData/\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and .../trackData/ is the path to the /trackData/ directory.\n"; + printf STDERR "asmId may be equal to ncbiAsmId if it is a GenArk build\n"; + printf STDERR "or asmId might be a default dbName if it is a UCSC style\n"; + printf STDERR "browser build.\n"; exit 255; } # from Perl Cookbook Recipe 2.17, print out large numbers with comma # delimiters: sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } my $asmId = shift; +my $ncbiAsmId = shift; my $namesFile = shift; my $trackDataDir = shift; my $ncbiGeneBbi = "$trackDataDir/ncbiGene/$asmId.ncbiGene.bb"; my $asmType = "refseq"; if ( ! -s $ncbiGeneBbi ) { printf STDERR "ERROR: can not find $asmId.ncbiGene.bb file\n"; exit 255; } -my @partNames = split('_', $asmId); +my @partNames = split('_', $ncbiAsmId); my $ftpDirPath = sprintf("%s/%s/%s/%s/%s", $partNames[0], substr($partNames[1],0,3), substr($partNames[1],3,3), - substr($partNames[1],6,3), $asmId); + substr($partNames[1],6,3), $ncbiAsmId); $asmType = "genbank" if ($partNames[0] =~ m/GCA/); my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/ncbiGene/${asmId}.ncbiGene.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); $itemCount = commify($itemCount); $basesCovered = commify($basesCovered); $totalBases = commify($totalBases); my $em = "<em>"; my $noEm = "</em>"; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; print <<_EOF_ <h2>Description</h2> <p> -The NCBI Gene track for the $assemblyDate $em${organism}$noEm/$asmId -genome assembly is constructed from the gff file <b>${asmId}_genomic.gff.gz</b> +The NCBI Gene track for the $assemblyDate $em${organism}$noEm/$ncbiAsmId +genome assembly is constructed from the gff file <b>${ncbiAsmId}_genomic.gff.gz</b> delivered with the NCBI RefSeq genome assemblies at the FTP location:<br> <a href='ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/' target='_blank'>ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/</a> </p> <h2>Track statistics summary</h2> <p> <b>Total genome size: </b>$totalBases<br> <b>Gene count: </b>$itemCount<br> <b>Bases in genes: </b>$basesCovered<br> <b>Percent genome coverage: </b>% $percentCoverage<br> </p> _EOF_ ;