src/hg/utils/automation/asmHubAugustusGene.pl 595d44078d99eb1f717f097e2e4c886c450b2fe1

595d44078d99eb1f717f097e2e4c886c450b2fe1
hiram
  Tue Mar 28 12:19:29 2023 -0700
adding download links on the gene track description pages for the GTF file no redmine

diff --git src/hg/utils/automation/asmHubAugustusGene.pl src/hg/utils/automation/asmHubAugustusGene.pl
index fad0f2f..1f65119 100755
--- src/hg/utils/automation/asmHubAugustusGene.pl
+++ src/hg/utils/automation/asmHubAugustusGene.pl
@@ -6,33 +6,37 @@
 use lib "$Bin";
 use AsmHub;
 use File::Basename;
 
 my $argc = scalar(@ARGV);
 
 if ($argc != 3) {
   printf STDERR "usage: asmHubAugustusGene.pl asmId asmId.names.tab bbi/asmId\n";
   printf STDERR "where asmId is the assembly identifier,\n";
   printf STDERR "and   asmId.names.tab is naming file for this assembly,\n";
   printf STDERR "and bbi/asmId is the path prefix to .augustus.bb.\n";
   exit 255;
 }
 
 my $asmId = shift;
+my @parts = split('_', $asmId, 3);
+my $accession = "$parts[0]_$parts[1]";
 my $namesFile = shift;
 my $bbiPrefix = shift;
 my $augustusBbi = "$bbiPrefix.augustus.bb";
+my $asmIdPath = &AsmHub::asmIdToPath($asmId);
+my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.augustus.gtf.gz";
 
 if ( ! -s $augustusBbi ) {
   printf STDERR "ERROR: can not find augustus bbi file:\n\t'%s'\n", $augustusBbi;
   exit 255;
 }
 
 my $em = "<em>";
 my $noEm = "</em>";
 my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`;
 chomp $assemblyDate;
 my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`;
 chomp $ncbiAssemblyId;
 my $organism = `grep -v "^#" $namesFile | cut -f5`;
 chomp $organism;
 
@@ -40,30 +44,35 @@
 chomp $geneCount;
 
 print <<_EOF_
 <h2>Description</h2>
 <p>
 This track shows <i>ab initio</i> predictions from the program
   <a href="http://bioinf.uni-greifswald.de/augustus/"
      target="_blank">AUGUSTUS</a> (version 3.1).
 for the $assemblyDate $em${organism}$noEm/$asmId genome assembly.<br>
 <br>
 The predictions are based on the genome sequence alone.<br>
 <br>
 $geneCount
 </p>
 
+<h2>Data Access</h2>
+<p>
+Download <a href='$downloadGtf' target=_blank> $asmId.augustus.gtf.gz </a> GTF file.
+</p>
+
 <h2>Methods</h2>
 
 <p>
 Statistical signal models were built for splice sites, branch-point
 patterns, translation start sites, and the poly-A signal.
 Furthermore, models were built for the sequence content of
 protein-coding and non-coding regions as well as for the length distributions
 of different exon and intron types. Detailed descriptions of most of these different models
 can be found in Mario Stanke's
 <a href="http://ediss.uni-goettingen.de/handle/11858/00-1735-0000-0006-B3F8-4" target="_blank">dissertation</a>.
 This track shows the most likely gene structure according to a
 Semi-Markov Conditional Random Field model.
 Alternative splicing transcripts were obtained with
 a sampling algorithm (<tt>--alternatives-from-sampling=true --sample=100 --minexonintronprob=0.2
 --minmeanexonintronprob=0.5 --maxtracks=3 --temperature=2</tt>).