src/hg/utils/automation/asmHubXenoRefGene.pl 595d44078d99eb1f717f097e2e4c886c450b2fe1

595d44078d99eb1f717f097e2e4c886c450b2fe1
hiram
  Tue Mar 28 12:19:29 2023 -0700
adding download links on the gene track description pages for the GTF file no redmine

diff --git src/hg/utils/automation/asmHubXenoRefGene.pl src/hg/utils/automation/asmHubXenoRefGene.pl
index 90a17e7..80f09dc 100755
--- src/hg/utils/automation/asmHubXenoRefGene.pl
+++ src/hg/utils/automation/asmHubXenoRefGene.pl
@@ -4,77 +4,78 @@
 use warnings;
 use FindBin qw($Bin);
 use lib "$Bin";
 use AsmHub;
 use File::Basename;
 
 my $argc = scalar(@ARGV);
 
 if ($argc != 3) {
   printf STDERR "usage: asmHubXenoRefGene.pl asmId asmId.names.tab .../trackData/\n";
   printf STDERR "where asmId is the assembly identifier,\n";
   printf STDERR "and .../trackData/ is the path to the /trackData/ directory.\n";
   exit 255;
 }
 
-# from Perl Cookbook Recipe 2.17, print out large numbers with comma
-# delimiters:
-sub commify($) {
-    my $text = reverse $_[0];
-    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
-    return scalar reverse $text
-}
-
 my $asmId = shift;
+my @parts = split('_', $asmId, 3);
+my $accession = "$parts[0]_$parts[1]";
 my $namesFile = shift;
 my $trackDataDir = shift;
 my $xenoRefGeneBbi = "$trackDataDir/xenoRefGene/$asmId.xenoRefGene.bb";
+my $asmIdPath = &AsmHub::asmIdToPath($asmId);
+my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.xenoRefGene.gtf.gz";
 
 if ( ! -s $xenoRefGeneBbi ) {
   printf STDERR "ERROR: can not find $asmId.xenoRefGene.bb file\n";
   exit 255;
 }
 
 my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`;
 chomp $totalBases;
 my $geneStats = `cat $trackDataDir/xenoRefGene/${asmId}.xenoRefGene.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`;
 chomp $geneStats;
 my ($itemCount, $basesCovered) = split('\s+', $geneStats);
 my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases);
-$itemCount = commify($itemCount);
-$basesCovered = commify($basesCovered);
-$totalBases = commify($totalBases);
+$itemCount = &AsmHub::commify($itemCount);
+$basesCovered = &AsmHub::commify($basesCovered);
+$totalBases = &AsmHub::commify($totalBases);
 
 my $em = "<em>";
 my $noEm = "</em>";
 my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`;
 chomp $assemblyDate;
 my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`;
 chomp $ncbiAssemblyId;
 my $organism = `grep -v "^#" $namesFile | cut -f5`;
 chomp $organism;
 
 print <<_EOF_
 <h2>Description</h2>
 
 <p>
 The RefSeq mRNAs gene track for the $assemblyDate $em${organism}$noEm/$asmId
 genome assembly displays translated blat alignments of vertebrate and
 invertebrate mRNA in
 <a href="https://www.ncbi.nlm.nih.gov/genbank/" target="_blank"> GenBank</a>.
 </p>
 
+<h2>Data Access</h2>
+<p>
+Download <a href='$downloadGtf' target=_blank> $asmId.xenoRefGene.gtf.gz </a> GTF file.
+</p>
+
 <h2>Track statistics summary</h2>
 <p>
 <b>Total genome size: </b>$totalBases<br>
 <b>Gene count: </b>$itemCount<br>
 <b>Bases in genes: </b>$basesCovered<br>
 <b>Percent genome coverage: </b>% $percentCoverage<br>
 </p>
 
 <h2>Methods</h2>
 
 <p>
 The mRNAs were aligned against the $em${organism}$noEm/$asmId genome using
 translated blat.  When a single mRNA aligned in multiple places, the alignment
 having the highest base identity was found.  Only those alignments having a base
 identity level within 1% of the best and at least 25% base identity with the