e76c704d9c5c6dfa06f7d7e15387fd14f651d9bf hiram Fri May 24 10:09:39 2024 -0700 correct FTP path no redmine diff --git src/hg/utils/automation/asmHubNcbiRefSeq.pl src/hg/utils/automation/asmHubNcbiRefSeq.pl index c254633..e4b63c0 100755 --- src/hg/utils/automation/asmHubNcbiRefSeq.pl +++ src/hg/utils/automation/asmHubNcbiRefSeq.pl @@ -18,45 +18,45 @@ my $asmId = shift; my @parts = split('_', $asmId, 3); my $accession = "$parts[0]_$parts[1]"; my $namesFile = shift; my $trackDataDir = shift; my $ncbiRefSeqBbi = "$trackDataDir/ncbiRefSeq/$asmId.ncbiRefSeq.bb"; my $srcGff = `ls $trackDataDir/ncbiRefSeq/download/*_genomic.gff.gz | head -1`; chomp $srcGff; my $srcAsmId = $asmId; my $gcfToGcaLiftedText = ""; if (length($srcGff) > 10) { $srcAsmId = basename($srcGff); $srcAsmId =~ s/_genomic.gff.gz//; if ($srcAsmId ne $asmId) { - $gcfToGcaLiftedText = "RefSeq annotations from $srcAsmId were lifted to this $asmId assembly to provide these gene annotations on this corresponding assembly." + $gcfToGcaLiftedText = "RefSeq annotations from $srcAsmId were lifted to this $asmId assembly to provide these gene annotations on this corresponding assembly." } } my $asmIdPath = &AsmHub::asmIdToPath($asmId); my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.ncbiRefSeq.gtf.gz"; if ( ! -s $ncbiRefSeqBbi ) { printf STDERR "ERROR: can not find $asmId.ncbiRefSeq.bb file\n"; exit 255; } my @partNames = split('_', $srcAsmId); my $ftpDirPath = sprintf("%s/%s/%s/%s/%s", $partNames[0], substr($partNames[1],0,3), substr($partNames[1],3,3), - substr($partNames[1],6,3), $asmId); + substr($partNames[1],6,3), $srcAsmId); my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeq.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); $itemCount = &AsmHub::commify($itemCount); $basesCovered = &AsmHub::commify($basesCovered); my $totalBasesCmfy = &AsmHub::commify($totalBases); my $em = ""; my $noEm = ""; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; @@ -175,31 +175,31 @@
The RefSeq annotation and RefSeq RNA alignment tracks
were created at UCSC using data from the NCBI RefSeq project. GFF format
data files were downloaded from the file ${srcAsmId}_genomic.gff.gz
delivered with the NCBI RefSeq genome assemblies at the FTP location:
-ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/
+https://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/
$gcfToGcaLiftedText
The GFF file was converted to the
genePred and PSL table formats for display in the Genome Browser.
Information about the NCBI annotation pipeline can be found
here.
Total genome size: $totalBasesCmfy bases
Curated and Predicted Gene count: $itemCount
Bases in these genes: $basesCovered