595d44078d99eb1f717f097e2e4c886c450b2fe1 hiram Tue Mar 28 12:19:29 2023 -0700 adding download links on the gene track description pages for the GTF file no redmine diff --git src/hg/utils/automation/asmHubXenoRefGene.pl src/hg/utils/automation/asmHubXenoRefGene.pl index 90a17e7..80f09dc 100755 --- src/hg/utils/automation/asmHubXenoRefGene.pl +++ src/hg/utils/automation/asmHubXenoRefGene.pl @@ -4,77 +4,78 @@ use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 3) { printf STDERR "usage: asmHubXenoRefGene.pl asmId asmId.names.tab .../trackData/\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and .../trackData/ is the path to the /trackData/ directory.\n"; exit 255; } -# from Perl Cookbook Recipe 2.17, print out large numbers with comma -# delimiters: -sub commify($) { - my $text = reverse $_[0]; - $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; - return scalar reverse $text -} - my $asmId = shift; +my @parts = split('_', $asmId, 3); +my $accession = "$parts[0]_$parts[1]"; my $namesFile = shift; my $trackDataDir = shift; my $xenoRefGeneBbi = "$trackDataDir/xenoRefGene/$asmId.xenoRefGene.bb"; +my $asmIdPath = &AsmHub::asmIdToPath($asmId); +my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.xenoRefGene.gtf.gz"; if ( ! -s $xenoRefGeneBbi ) { printf STDERR "ERROR: can not find $asmId.xenoRefGene.bb file\n"; exit 255; } my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/xenoRefGene/${asmId}.xenoRefGene.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); -$itemCount = commify($itemCount); -$basesCovered = commify($basesCovered); -$totalBases = commify($totalBases); +$itemCount = &AsmHub::commify($itemCount); +$basesCovered = &AsmHub::commify($basesCovered); +$totalBases = &AsmHub::commify($totalBases); my $em = ""; my $noEm = ""; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; print <<_EOF_

Description

The RefSeq mRNAs gene track for the $assemblyDate $em${organism}$noEm/$asmId genome assembly displays translated blat alignments of vertebrate and invertebrate mRNA in GenBank.

+

Data Access

+

+Download $asmId.xenoRefGene.gtf.gz GTF file. +

+

Track statistics summary

Total genome size: $totalBases
Gene count: $itemCount
Bases in genes: $basesCovered
Percent genome coverage: % $percentCoverage

Methods

The mRNAs were aligned against the $em${organism}$noEm/$asmId genome using translated blat. When a single mRNA aligned in multiple places, the alignment having the highest base identity was found. Only those alignments having a base identity level within 1% of the best and at least 25% base identity with the