595d44078d99eb1f717f097e2e4c886c450b2fe1 hiram Tue Mar 28 12:19:29 2023 -0700 adding download links on the gene track description pages for the GTF file no redmine diff --git src/hg/utils/automation/asmHubXenoRefGene.pl src/hg/utils/automation/asmHubXenoRefGene.pl index 90a17e7..80f09dc 100755 --- src/hg/utils/automation/asmHubXenoRefGene.pl +++ src/hg/utils/automation/asmHubXenoRefGene.pl @@ -4,77 +4,78 @@ use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 3) { printf STDERR "usage: asmHubXenoRefGene.pl asmId asmId.names.tab .../trackData/\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and .../trackData/ is the path to the /trackData/ directory.\n"; exit 255; } -# from Perl Cookbook Recipe 2.17, print out large numbers with comma -# delimiters: -sub commify($) { - my $text = reverse $_[0]; - $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; - return scalar reverse $text -} - my $asmId = shift; +my @parts = split('_', $asmId, 3); +my $accession = "$parts[0]_$parts[1]"; my $namesFile = shift; my $trackDataDir = shift; my $xenoRefGeneBbi = "$trackDataDir/xenoRefGene/$asmId.xenoRefGene.bb"; +my $asmIdPath = &AsmHub::asmIdToPath($asmId); +my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.xenoRefGene.gtf.gz"; if ( ! -s $xenoRefGeneBbi ) { printf STDERR "ERROR: can not find $asmId.xenoRefGene.bb file\n"; exit 255; } my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/xenoRefGene/${asmId}.xenoRefGene.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); -$itemCount = commify($itemCount); -$basesCovered = commify($basesCovered); -$totalBases = commify($totalBases); +$itemCount = &AsmHub::commify($itemCount); +$basesCovered = &AsmHub::commify($basesCovered); +$totalBases = &AsmHub::commify($totalBases); my $em = "<em>"; my $noEm = "</em>"; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; print <<_EOF_ <h2>Description</h2> <p> The RefSeq mRNAs gene track for the $assemblyDate $em${organism}$noEm/$asmId genome assembly displays translated blat alignments of vertebrate and invertebrate mRNA in <a href="https://www.ncbi.nlm.nih.gov/genbank/" target="_blank"> GenBank</a>. </p> +<h2>Data Access</h2> +<p> +Download <a href='$downloadGtf' target=_blank> $asmId.xenoRefGene.gtf.gz </a> GTF file. +</p> + <h2>Track statistics summary</h2> <p> <b>Total genome size: </b>$totalBases<br> <b>Gene count: </b>$itemCount<br> <b>Bases in genes: </b>$basesCovered<br> <b>Percent genome coverage: </b>% $percentCoverage<br> </p> <h2>Methods</h2> <p> The mRNAs were aligned against the $em${organism}$noEm/$asmId genome using translated blat. When a single mRNA aligned in multiple places, the alignment having the highest base identity was found. Only those alignments having a base identity level within 1% of the best and at least 25% base identity with the