cf078156da3eb0042503c279d0c0bddb9786c355 hiram Thu Mar 5 12:01:59 2020 -0800 now running up Ensembl genes on assembly hubs refs #24963 diff --git src/hg/utils/automation/asmHubEnsGene.pl src/hg/utils/automation/asmHubEnsGene.pl index 4a4b82a..d8f08cf 100755 --- src/hg/utils/automation/asmHubEnsGene.pl +++ src/hg/utils/automation/asmHubEnsGene.pl @@ -1,85 +1,105 @@ #!/usr/bin/env perl use strict; use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 4) { printf STDERR "usage: asmHubEnsGene.pl asmId asmId.names.tab bbi/asmId ensVersion\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and asmId.names.tab is naming file for this assembly,\n"; printf STDERR "and bbi/asmId is the path prefix to .ensGene.bb.\n"; printf STDERR "the ensVersion is from trackData/ensGene/version.txt\n"; exit 255; } +# from Perl Cookbook Recipe 2.17, print out large numbers with comma +# delimiters: +sub commify($) { + my $text = reverse $_[0]; + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return scalar reverse $text +} + +# $scriptDir/asmHubEnsGene.pl $asmId $buildDir/html/$asmId.names.tab $buildDir/bbi/$asmId > $buildDir/html/$asmId.ensGene.html "${ensVersion}" + my $asmId = shift; my $namesFile = shift; my $bbiPrefix = shift; my $ensVersion = shift; my $ensGeneBbi = "$bbiPrefix.ensGene.bb"; +my $runDir = $bbiPrefix; +$runDir =~ s#/bbi/.*#/trackData/ensGene#; +my $fbResults = "${runDir}/fb.$asmId.ensGene.txt"; +my $fbBases = ""; +if ( -s "${fbResults}" ) { + ($fbBases, undef) = split('\s+', `cat $fbResults`); +} if ( ! -s $ensGeneBbi ) { printf STDERR "ERROR: can not find ensGene bbi file:\n\t'%s'\n", $ensGeneBbi; exit 255; } my $em = "<em>"; my $noEm = "</em>"; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; my $geneCount = `bigBedInfo $ensGeneBbi | egrep "itemCount:|basesCovered:" | xargs echo | sed -e 's/itemCount/Gene count/; s/ basesCovered/; Bases covered/;'`; chomp $geneCount; +if (length($fbBases)) { + $geneCount .= sprintf(" (%s bases in exons only)", commify($fbBases)); +} print <<_EOF_ <h2>Description</h2> <p> This track shows the Ensembl gene, $ensVersion, annotations on the $assemblyDate $em${organism}$noEm/$asmId genome assembly.<br> <br> These gene predictions were generated by <a href="http://www.ensembl.org/index.html" target="_blank">Ensembl</a>.<br> <br> $geneCount </p> <h2>Methods</h2> <p> For a description of the methods used in Ensembl gene predictions, please refer to <a href="https://academic.oup.com/nar/article/30/1/38/1332872/The-Ensembl-genome-database-project" target="_blank">Hubbard <em>et al</em>. (2002)</a>, also listed in the References section below. </p> <h2>Credits</h2> <p> We would like to thank Ensembl for providing this annotation. For more information, please see: <a href="http://www.ensembl.org/info/genome/genebuild/genome_annotation.html" target=_blank>Ensembl's genome annotation page.</a> </p> <h2>References</h2> <p> Hubbard T, Barker D, Birney E, Cameron G, Chen Y, Clark L, Cox T, Cuff J, Curwen V, Down T <em>et al</em>. <a href="https://academic.oup.com/nar/article/30/1/38/1332872/The-Ensembl-genome-database-project" target="_blank">The Ensembl genome database project</a>. <em>Nucleic Acids Res</em>. 2002 Jan 1;30(1):38-41. PMID: <a href="https://www.ncbi.nlm.nih.gov/pubmed/11752248" target="_blank">11752248</a>; PMC: <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC99161/" target="_blank">PMC99161</a> </p> _EOF_ ;