cf078156da3eb0042503c279d0c0bddb9786c355
hiram
  Thu Mar 5 12:01:59 2020 -0800
now running up Ensembl genes on assembly hubs refs #24963

diff --git src/hg/utils/automation/asmHubEnsGene.pl src/hg/utils/automation/asmHubEnsGene.pl
index 4a4b82a..d8f08cf 100755
--- src/hg/utils/automation/asmHubEnsGene.pl
+++ src/hg/utils/automation/asmHubEnsGene.pl
@@ -6,51 +6,71 @@
 use lib "$Bin";
 use AsmHub;
 use File::Basename;
 
 my $argc = scalar(@ARGV);
 
 if ($argc != 4) {
   printf STDERR "usage: asmHubEnsGene.pl asmId asmId.names.tab bbi/asmId ensVersion\n";
   printf STDERR "where asmId is the assembly identifier,\n";
   printf STDERR "and   asmId.names.tab is naming file for this assembly,\n";
   printf STDERR "and bbi/asmId is the path prefix to .ensGene.bb.\n";
   printf STDERR "the ensVersion is from trackData/ensGene/version.txt\n";
   exit 255;
 }
 
+# from Perl Cookbook Recipe 2.17, print out large numbers with comma
+# delimiters:
+sub commify($) {
+    my $text = reverse $_[0];
+    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
+    return scalar reverse $text
+}
+
+# $scriptDir/asmHubEnsGene.pl $asmId $buildDir/html/$asmId.names.tab $buildDir/bbi/$asmId > $buildDir/html/$asmId.ensGene.html "${ensVersion}"
+
 my $asmId = shift;
 my $namesFile = shift;
 my $bbiPrefix = shift;
 my $ensVersion = shift;
 my $ensGeneBbi = "$bbiPrefix.ensGene.bb";
+my $runDir = $bbiPrefix;
+$runDir =~ s#/bbi/.*#/trackData/ensGene#;
+my $fbResults = "${runDir}/fb.$asmId.ensGene.txt";
+my $fbBases = "";
+if ( -s "${fbResults}" ) {
+  ($fbBases, undef) = split('\s+', `cat $fbResults`);
+}
 
 if ( ! -s $ensGeneBbi ) {
   printf STDERR "ERROR: can not find ensGene bbi file:\n\t'%s'\n", $ensGeneBbi;
   exit 255;
 }
 
 my $em = "<em>";
 my $noEm = "</em>";
 my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`;
 chomp $assemblyDate;
 my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`;
 chomp $ncbiAssemblyId;
 my $organism = `grep -v "^#" $namesFile | cut -f5`;
 chomp $organism;
 my $geneCount = `bigBedInfo $ensGeneBbi | egrep "itemCount:|basesCovered:" | xargs echo | sed -e 's/itemCount/Gene count/; s/ basesCovered/; Bases covered/;'`;
 chomp $geneCount;
+if (length($fbBases)) {
+  $geneCount .= sprintf(" (%s bases in exons only)", commify($fbBases));
+}
 
 print <<_EOF_
 <h2>Description</h2>
 <p>
 This track shows the Ensembl gene, $ensVersion, annotations on
 the $assemblyDate $em${organism}$noEm/$asmId genome assembly.<br>
 <br>
 These gene predictions were generated by
 <a href="http://www.ensembl.org/index.html" target="_blank">Ensembl</a>.<br>
 <br>
 $geneCount
 </p>
 
 <h2>Methods</h2>