e76c704d9c5c6dfa06f7d7e15387fd14f651d9bf hiram Fri May 24 10:09:39 2024 -0700 correct FTP path no redmine diff --git src/hg/utils/automation/asmHubNcbiRefSeq.pl src/hg/utils/automation/asmHubNcbiRefSeq.pl index c254633..e4b63c0 100755 --- src/hg/utils/automation/asmHubNcbiRefSeq.pl +++ src/hg/utils/automation/asmHubNcbiRefSeq.pl @@ -18,45 +18,45 @@ my $asmId = shift; my @parts = split('_', $asmId, 3); my $accession = "$parts[0]_$parts[1]"; my $namesFile = shift; my $trackDataDir = shift; my $ncbiRefSeqBbi = "$trackDataDir/ncbiRefSeq/$asmId.ncbiRefSeq.bb"; my $srcGff = `ls $trackDataDir/ncbiRefSeq/download/*_genomic.gff.gz | head -1`; chomp $srcGff; my $srcAsmId = $asmId; my $gcfToGcaLiftedText = ""; if (length($srcGff) > 10) { $srcAsmId = basename($srcGff); $srcAsmId =~ s/_genomic.gff.gz//; if ($srcAsmId ne $asmId) { - $gcfToGcaLiftedText = "RefSeq annotations from $srcAsmId were lifted to this $asmId assembly to provide these gene annotations on this corresponding assembly." + $gcfToGcaLiftedText = "RefSeq annotations from $srcAsmId were lifted to this $asmId assembly to provide these gene annotations on this corresponding assembly." } } my $asmIdPath = &AsmHub::asmIdToPath($asmId); my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.ncbiRefSeq.gtf.gz"; if ( ! -s $ncbiRefSeqBbi ) { printf STDERR "ERROR: can not find $asmId.ncbiRefSeq.bb file\n"; exit 255; } my @partNames = split('_', $srcAsmId); my $ftpDirPath = sprintf("%s/%s/%s/%s/%s", $partNames[0], substr($partNames[1],0,3), substr($partNames[1],3,3), - substr($partNames[1],6,3), $asmId); + substr($partNames[1],6,3), $srcAsmId); my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeq.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); $itemCount = &AsmHub::commify($itemCount); $basesCovered = &AsmHub::commify($basesCovered); my $totalBasesCmfy = &AsmHub::commify($totalBases); my $em = ""; my $noEm = ""; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; @@ -175,31 +175,31 @@

Codon coloring: This track has an optional codon coloring feature that allows users to quickly validate and compare gene predictions. To display codon colors, select the genomic codons option from the Color track by codons pull-down menu. For more information about this feature, go to the Coloring Gene Predictions and Annotations by Codon page.

Methods

The RefSeq annotation and RefSeq RNA alignment tracks were created at UCSC using data from the NCBI RefSeq project. GFF format data files were downloaded from the file ${srcAsmId}_genomic.gff.gz delivered with the NCBI RefSeq genome assemblies at the FTP location:
-ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/ +https://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/ $gcfToGcaLiftedText The GFF file was converted to the genePred and PSL table formats for display in the Genome Browser. Information about the NCBI annotation pipeline can be found here.

Track statistics summary

Total genome size: $totalBasesCmfy bases

Curated and Predicted Gene count: $itemCount
Bases in these genes: $basesCovered