e76c704d9c5c6dfa06f7d7e15387fd14f651d9bf hiram Fri May 24 10:09:39 2024 -0700 correct FTP path no redmine diff --git src/hg/utils/automation/asmHubNcbiRefSeq.pl src/hg/utils/automation/asmHubNcbiRefSeq.pl index c254633..e4b63c0 100755 --- src/hg/utils/automation/asmHubNcbiRefSeq.pl +++ src/hg/utils/automation/asmHubNcbiRefSeq.pl @@ -18,45 +18,45 @@ my $asmId = shift; my @parts = split('_', $asmId, 3); my $accession = "$parts[0]_$parts[1]"; my $namesFile = shift; my $trackDataDir = shift; my $ncbiRefSeqBbi = "$trackDataDir/ncbiRefSeq/$asmId.ncbiRefSeq.bb"; my $srcGff = `ls $trackDataDir/ncbiRefSeq/download/*_genomic.gff.gz | head -1`; chomp $srcGff; my $srcAsmId = $asmId; my $gcfToGcaLiftedText = ""; if (length($srcGff) > 10) { $srcAsmId = basename($srcGff); $srcAsmId =~ s/_genomic.gff.gz//; if ($srcAsmId ne $asmId) { - $gcfToGcaLiftedText = "RefSeq annotations from $srcAsmId were lifted to this $asmId assembly to provide these gene annotations on this corresponding assembly." + $gcfToGcaLiftedText = "RefSeq annotations from <b>$srcAsmId</b> were lifted to this <b>$asmId</b> assembly to provide these gene annotations on this corresponding assembly." } } my $asmIdPath = &AsmHub::asmIdToPath($asmId); my $downloadGtf = "https://hgdownload.soe.ucsc.edu/hubs/$asmIdPath/$accession/genes/$asmId.ncbiRefSeq.gtf.gz"; if ( ! -s $ncbiRefSeqBbi ) { printf STDERR "ERROR: can not find $asmId.ncbiRefSeq.bb file\n"; exit 255; } my @partNames = split('_', $srcAsmId); my $ftpDirPath = sprintf("%s/%s/%s/%s/%s", $partNames[0], substr($partNames[1],0,3), substr($partNames[1],3,3), - substr($partNames[1],6,3), $asmId); + substr($partNames[1],6,3), $srcAsmId); my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`; chomp $totalBases; my $geneStats = `cat $trackDataDir/ncbiRefSeq/${asmId}.ncbiRefSeq.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`; chomp $geneStats; my ($itemCount, $basesCovered) = split('\s+', $geneStats); my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases); $itemCount = &AsmHub::commify($itemCount); $basesCovered = &AsmHub::commify($basesCovered); my $totalBasesCmfy = &AsmHub::commify($totalBases); my $em = "<em>"; my $noEm = "</em>"; my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`; chomp $assemblyDate; @@ -175,31 +175,31 @@ <li><strong>Codon coloring:</strong> This track has an optional codon coloring feature that allows users to quickly validate and compare gene predictions. To display codon colors, select the <em>genomic codons</em> option from the <em>Color track by codons</em> pull-down menu. For more information about this feature, go to the <a href="../goldenPath/help/hgCodonColoring.html" target="_blank">Coloring Gene Predictions and Annotations by Codon</a> page.</li> </ul> <a name="methods"></a> <h2>Methods</h2> <p> The RefSeq annotation and RefSeq RNA alignment tracks were created at UCSC using data from the NCBI RefSeq project. GFF format data files were downloaded from the file <b>${srcAsmId}_genomic.gff.gz</b> delivered with the NCBI RefSeq genome assemblies at the FTP location:<br> -<a href='ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/' target='_blank'>ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/</a> +<a href='https://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/' target='_blank'>https://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/</a> $gcfToGcaLiftedText The GFF file was converted to the genePred and PSL table formats for display in the Genome Browser. Information about the NCBI annotation pipeline can be found <a href="https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/" target="_blank">here</a>. </p> <h2>Track statistics summary</h2> <p> <b>Total genome size: </b>$totalBasesCmfy <b>bases</b><br><br> <b>Curated and Predicted Gene count: </b>$itemCount<br> <b>Bases in these genes: </b>$basesCovered<br>