e603c24b1c5f12d0fdb9c3d8b85d93cc54599921 hiram Mon Feb 3 15:10:57 2025 -0800 better to make a gtf for ncbiGene like all the others refs #35027 diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index 37f711013cf..ce42347d0f1 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -1821,44 +1821,48 @@ &HgAutomate::mustMkdir($runDir); my $whatItDoes = "translate NCBI GFF3 gene definitions into a track"; my $bossScript = newBash HgRemoteScript("$runDir/doNcbiGene.bash", $workhorse, $runDir, $whatItDoes); my $dupList = ""; if ( -s "${buildDir}/download/${asmId}.remove.dups.list" ) { $dupList = " | (grep -v -f \"${buildDir}/download/${asmId}.remove.dups.list\" || true)"; } $bossScript->add(<<_EOF_ export asmId=$defaultName export gffFile=$gffFile +export DS=`date "+%F"` function cleanUp() { rm -f \$asmId.ncbiGene.genePred.gz \$asmId.ncbiGene.genePred rm -f \$asmId.geneAttrs.ncbi.txt } if [ \$gffFile -nt \$asmId.ncbiGene.bb ]; then ln -s \$gffFile ./ (gff3ToGenePred -warnAndContinue -useName \\ -attrsOut=\$asmId.geneAttrs.ncbi.txt \$gffFile stdout \\ 2>> \$asmId.ncbiGene.log.txt || true) | genePredFilter \\ -chromSizes=../../\$asmId.chrom.sizes stdin stdout \\ $dupList | gzip -c > \$asmId.ncbiGene.genePred.gz genePredCheck \$asmId.ncbiGene.genePred.gz + zcat \$asmId.ncbiGene.genePred.gz > ncbiGene.\$DS + genePredToGtf -utr file ncbiGene.\$DS stdout | gzip -c > \$asmId.ncbiGene.gtf.gz + rm -f ncbiGene.\$DS export howMany=`genePredCheck \$asmId.ncbiGene.genePred.gz 2>&1 | grep "^checked" | awk '{print \$2}'` if [ "\${howMany}" -eq 0 ]; then printf "# ncbiGene: no gene definitions found in \$gffFile\n"; cleanUp exit 0 fi export ncbiGenePred="\$asmId.ncbiGene.genePred.gz" _EOF_ ); if ($ucscNames) { $bossScript->add(<<_EOF_ liftUp -extGenePred -type=.gp stdout \\ ../../sequence/$asmId.ncbiToUcsc.lift warn \\ \$asmId.ncbiGene.genePred.gz | gzip -c \\ > \$asmId.ncbiGene.ucsc.genePred.gz