cf078156da3eb0042503c279d0c0bddb9786c355 hiram Thu Mar 5 12:01:59 2020 -0800 now running up Ensembl genes on assembly hubs refs #24963 diff --git src/hg/utils/automation/asmHubEnsGene.pl src/hg/utils/automation/asmHubEnsGene.pl index 4a4b82a..d8f08cf 100755 --- src/hg/utils/automation/asmHubEnsGene.pl +++ src/hg/utils/automation/asmHubEnsGene.pl @@ -1,85 +1,105 @@ #!/usr/bin/env perl use strict; use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 4) { printf STDERR "usage: asmHubEnsGene.pl asmId asmId.names.tab bbi/asmId ensVersion\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and asmId.names.tab is naming file for this assembly,\n"; printf STDERR "and bbi/asmId is the path prefix to .ensGene.bb.\n"; printf STDERR "the ensVersion is from trackData/ensGene/version.txt\n"; exit 255; } +# from Perl Cookbook Recipe 2.17, print out large numbers with comma +# delimiters: +sub commify($) { + my $text = reverse $_[0]; + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return scalar reverse $text +} + +# $scriptDir/asmHubEnsGene.pl $asmId $buildDir/html/$asmId.names.tab $buildDir/bbi/$asmId > $buildDir/html/$asmId.ensGene.html "${ensVersion}" + my $asmId = shift; my $namesFile = shift; my $bbiPrefix = shift; my $ensVersion = shift; my $ensGeneBbi = "$bbiPrefix.ensGene.bb"; +my $runDir = $bbiPrefix; +$runDir =~ s#/bbi/.*#/trackData/ensGene#; +my $fbResults = "${runDir}/fb.$asmId.ensGene.txt"; +my $fbBases = ""; +if ( -s "${fbResults}" ) { + ($fbBases, undef) = split('\s+', `cat $fbResults`); +} if ( ! -s $ensGeneBbi ) { printf STDERR "ERROR: can not find ensGene bbi file:\n\t'%s'\n", $ensGeneBbi; exit 255; } my $em = ""; my $noEm = ""; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; my $geneCount = `bigBedInfo $ensGeneBbi | egrep "itemCount:|basesCovered:" | xargs echo | sed -e 's/itemCount/Gene count/; s/ basesCovered/; Bases covered/;'`; chomp $geneCount; +if (length($fbBases)) { + $geneCount .= sprintf(" (%s bases in exons only)", commify($fbBases)); +} print <<_EOF_

Description

This track shows the Ensembl gene, $ensVersion, annotations on the $assemblyDate $em${organism}$noEm/$asmId genome assembly.

These gene predictions were generated by Ensembl.

$geneCount

Methods

For a description of the methods used in Ensembl gene predictions, please refer to Hubbard et al. (2002), also listed in the References section below.

Credits

We would like to thank Ensembl for providing this annotation. For more information, please see: Ensembl's genome annotation page.

References

Hubbard T, Barker D, Birney E, Cameron G, Chen Y, Clark L, Cox T, Cuff J, Curwen V, Down T et al. The Ensembl genome database project. Nucleic Acids Res. 2002 Jan 1;30(1):38-41. PMID: 11752248; PMC: PMC99161

_EOF_ ;