cf078156da3eb0042503c279d0c0bddb9786c355 hiram Thu Mar 5 12:01:59 2020 -0800 now running up Ensembl genes on assembly hubs refs #24963 diff --git src/hg/utils/automation/asmHubEnsGene.pl src/hg/utils/automation/asmHubEnsGene.pl index 4a4b82a..d8f08cf 100755 --- src/hg/utils/automation/asmHubEnsGene.pl +++ src/hg/utils/automation/asmHubEnsGene.pl @@ -6,51 +6,71 @@ use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 4) { printf STDERR "usage: asmHubEnsGene.pl asmId asmId.names.tab bbi/asmId ensVersion\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and asmId.names.tab is naming file for this assembly,\n"; printf STDERR "and bbi/asmId is the path prefix to .ensGene.bb.\n"; printf STDERR "the ensVersion is from trackData/ensGene/version.txt\n"; exit 255; } +# from Perl Cookbook Recipe 2.17, print out large numbers with comma +# delimiters: +sub commify($) { + my $text = reverse $_[0]; + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return scalar reverse $text +} + +# $scriptDir/asmHubEnsGene.pl $asmId $buildDir/html/$asmId.names.tab $buildDir/bbi/$asmId > $buildDir/html/$asmId.ensGene.html "${ensVersion}" + my $asmId = shift; my $namesFile = shift; my $bbiPrefix = shift; my $ensVersion = shift; my $ensGeneBbi = "$bbiPrefix.ensGene.bb"; +my $runDir = $bbiPrefix; +$runDir =~ s#/bbi/.*#/trackData/ensGene#; +my $fbResults = "${runDir}/fb.$asmId.ensGene.txt"; +my $fbBases = ""; +if ( -s "${fbResults}" ) { + ($fbBases, undef) = split('\s+', `cat $fbResults`); +} if ( ! -s $ensGeneBbi ) { printf STDERR "ERROR: can not find ensGene bbi file:\n\t'%s'\n", $ensGeneBbi; exit 255; } my $em = "<em>"; my $noEm = "</em>"; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; my $geneCount = `bigBedInfo $ensGeneBbi | egrep "itemCount:|basesCovered:" | xargs echo | sed -e 's/itemCount/Gene count/; s/ basesCovered/; Bases covered/;'`; chomp $geneCount; +if (length($fbBases)) { + $geneCount .= sprintf(" (%s bases in exons only)", commify($fbBases)); +} print <<_EOF_ <h2>Description</h2> <p> This track shows the Ensembl gene, $ensVersion, annotations on the $assemblyDate $em${organism}$noEm/$asmId genome assembly.<br> <br> These gene predictions were generated by <a href="http://www.ensembl.org/index.html" target="_blank">Ensembl</a>.<br> <br> $geneCount </p> <h2>Methods</h2>