595d44078d99eb1f717f097e2e4c886c450b2fe1 hiram Tue Mar 28 12:19:29 2023 -0700 adding download links on the gene track description pages for the GTF file no redmine diff --git src/hg/utils/automation/asmHubNcbiRefSeq.pl src/hg/utils/automation/asmHubNcbiRefSeq.pl index 64a2be6..e79b908 100755 --- src/hg/utils/automation/asmHubNcbiRefSeq.pl +++ src/hg/utils/automation/asmHubNcbiRefSeq.pl @@ -4,64 +4,60 @@ use warnings; use FindBin qw($Bin); use lib "$Bin"; use AsmHub; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 3) { printf STDERR "usage: asmHubNcbiGene.pl asmId asmId.names.tab .../trackData/\n"; printf STDERR "where asmId is the assembly identifier,\n"; printf STDERR "and .../trackData/ is the path to the /trackData/ directory.\n"; exit 255; } -# from Perl Cookbook Recipe 2.17, print out large numbers with comma -# delimiters: -sub commify($) { - my $text = reverse $_[0]; - $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; - return scalar reverse $text -} - my $asmId = shift; +my @parts = split('_', $asmId, 3); +my $accession = "$parts[0]_$parts[1]"; my $namesFile = shift; my $trackDataDir = shift; my $ncbiRefSeqBbi = "$trackDataDir/ncbiRefSeq/$asmId.ncbiRefSeq.bb"; my $asmType = "refseq"; +my $asmIdPath = &AsmHub::asmIdToPath($asmId); +my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.ncbiRefSeq.gtf.gz"; if ( ! -s $ncbiRefSeqBbi ) { printf STDERR "ERROR: can not find $asmId.ncbiRefSeq.bb file\n"; exit 255; } my @partNames = split('_', $asmId); my $ftpDirPath = sprintf("%s/%s/%s/%s/%s", $partNames[0], substr($partNames[1],0,3), substr($partNames[1],3,3), substr($partNames[1],6,3), $asmId); $asmType = "genbank" if ($partNames[0] =~ m/GCA/); my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeq.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); -$itemCount = commify($itemCount); -$basesCovered = commify($basesCovered); -my $totalBasesCmfy = commify($totalBases); +$itemCount = &AsmHub::commify($itemCount); +$basesCovered = &AsmHub::commify($basesCovered); +my $totalBasesCmfy = &AsmHub::commify($totalBases); my $em = "<em>"; my $noEm = "</em>"; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`; chomp $ncbiAssemblyId; my $organism = `grep -v "^#" $namesFile | cut -f5`; chomp $organism; print <<_EOF_ <h2>Description</h2> <p> The NCBI RefSeq Genes composite track shows $assemblyDate $em${organism}$noEm/$asmId @@ -71,30 +67,35 @@ the different tracks were created. </p> <p> Please visit NCBI's <a href="https://www.ncbi.nlm.nih.gov/projects/RefSeq/update.cgi" target="_blank"> Feedback for Gene and Reference Sequences (RefSeq)</a> page to make suggestions, submit additions and corrections, or ask for help concerning RefSeq records. </p> <p> For more information on the different gene tracks, see our <a target=_blank href="/FAQ/FAQgenes.html">Genes FAQ</a>. </p> +<h2>Data Access</h2> +<p> +Download <a href='$downloadGtf' target=_blank> $asmId.ncbiRefSeq.gtf.gz </a> GTF file. +</p> + <h2>Display Conventions and Configuration</h2> <p> To show only a selected set of subtracks, uncheck the boxes next to the tracks that you wish to hide. </p> The tracks available here can include (not all may be present): <dl> <dt><em><strong>RefSeq annotations and alignments</strong></em></dt> <ul> <li><em>RefSeq All</em> – all curated and predicted annotations provided by RefSeq.</li> <li><em>RefSeq Curated</em> – subset of <em>RefSeq All</em> that includes only those annotations whose accessions begin with NM, NR, NP or YP. <small>(NP and YP are used only for protein-coding genes on @@ -190,76 +191,76 @@ <p> <b>Total genome size: </b>$totalBasesCmfy <b>bases</b><br><br> <b>Curated and Predicted Gene count: </b>$itemCount<br> <b>Bases in these genes: </b>$basesCovered<br> <b>Percent genome coverage: </b>% $percentCoverage<br> </p> _EOF_ ; if ( -s "$trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeqCurated.stats.txt" ) { $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeqCurated.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; ($itemCount, $basesCovered) = split('\s+', $geneStats); $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); - $itemCount = commify($itemCount); - $basesCovered = commify($basesCovered); + $itemCount = &AsmHub::commify($itemCount); + $basesCovered = &AsmHub::commify($basesCovered); printf <<_EOF_ <p> <b>Curated gene count: </b>$itemCount<br> <b>Bases in curated genes: </b>$basesCovered<br> <b>Percent genome coverage: </b>%% $percentCoverage<br> </p> _EOF_ } else { printf <<_EOF_ <p> <b>There are no curated gene annotations.</b> </p> _EOF_ } if ( -s "$trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeqPredicted.stats.txt" ) { $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeqPredicted.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; ($itemCount, $basesCovered) = split('\s+', $geneStats); $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); - $itemCount = commify($itemCount); - $basesCovered = commify($basesCovered); + $itemCount = &AsmHub::commify($itemCount); + $basesCovered = &AsmHub::commify($basesCovered); printf <<_EOF_ <p> <b>Predicted gene count: </b>$itemCount<br> <b>Bases in genes: </b>$basesCovered<br> <b>Percent genome coverage: </b>%% $percentCoverage<br> </p> _EOF_ } else { printf <<_EOF_ <p> <b>there are no predicted gene annotations</b> </p> _EOF_ } if ( -s "$trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeqOther.stats.txt" ) { $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeqOther.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; ($itemCount, $basesCovered) = split('\s+', $geneStats); $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); - $itemCount = commify($itemCount); - $basesCovered = commify($basesCovered); + $itemCount = &AsmHub::commify($itemCount); + $basesCovered = &AsmHub::commify($basesCovered); printf <<_EOF_ <p> <b>Other annotation count: </b>$itemCount<br> <b>Bases in other annotations: </b>$basesCovered<br> <b>Percent genome coverage: </b>%% $percentCoverage<br> </p> _EOF_ } printf <<_EOF_ <h2>Credits</h2> <p> This track was produced at UCSC from data generated by scientists worldwide and curated by the NCBI RefSeq project. </p>