cd52f66464cce415303d7ae24ba28603c927ee4f hiram Thu Mar 6 15:39:33 2025 -0800 better names in ncbiRefSeq track fields and updated related trackDb URLs refs #32704 diff --git src/hg/utils/automation/doNcbiRefSeq.pl src/hg/utils/automation/doNcbiRefSeq.pl index a7ff0a18a67..e2899b22bf1 100755 --- src/hg/utils/automation/doNcbiRefSeq.pl +++ src/hg/utils/automation/doNcbiRefSeq.pl @@ -608,96 +608,103 @@ export db="$db" export asmId="$asmId" export verString="$verString" _EOF_ ); if (! $dbExists) { $bossScript->add(<<_EOF_ export target2bit=$dbTwoBit twoBitInfo \$target2bit stdout | sort -k2,2nr > \$db.chrom.sizes wget -O bigGenePred.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigGenePred.as' wget -O bigPsl.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigPsl.as' ### overall gene track with both predicted and curated -genePredToBigGenePred process/\$db.ncbiRefSeq.gp stdout | sort -k1,1 -k2,2n > \$db.ncbiRefSeq.bigGp +\$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ + process/\$db.ncbiRefSeq.gp | sort -k1,1 -k2,2n > \$db.ncbiRefSeq.bigGp genePredToBed -tab -fillSpace process/\$db.ncbiRefSeq.gp stdout \\ | bedToExons stdin stdout | bedSingleCover.pl stdin > \$asmId.exons.bed export baseCount=`awk '{sum+=\$3-\$2}END{printf "%d", sum}' \$asmId.exons.bed` export asmSizeNoGaps=`grep sequences ../../\$asmId.faSize.txt | awk '{print \$5}'` export perCent=`echo \$baseCount \$asmSizeNoGaps | awk '{printf "%.3f", 100.0*\$1/\$2}'` bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ \$db.ncbiRefSeq.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeq.bb bigBedInfo \$db.ncbiRefSeq.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeq.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeq %s %'d %s %'d\\n" `cat \$db.ncbiRefSeq.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.ncbiRefSeq.gp \\ | sort -u > \$db.ncbiRefSeq.ix.txt ixIxx \$db.ncbiRefSeq.ix.txt \$db.ncbiRefSeq.ix{,x} rm -f \$db.ncbiRefSeq.ix.txt ### curated only if present if [ -s process/\$db.curated.gp ]; then - genePredToBigGenePred process/\$db.curated.gp stdout | sort -k1,1 -k2,2n > \$db.ncbiRefSeqCurated.bigGp + \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ + process/\$db.curated.gp | sort -k1,1 -k2,2n > \$db.ncbiRefSeqCurated.bigGp bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ \$db.ncbiRefSeqCurated.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqCurated.bb rm -f \$db.ncbiRefSeqCurated.bigGp bigBedInfo \$db.ncbiRefSeqCurated.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqCurated.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqCurated %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqCurated.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.curated.gp \\ | sort -u > \$db.ncbiRefSeqCurated.ix.txt ixIxx \$db.ncbiRefSeqCurated.ix.txt \$db.ncbiRefSeqCurated.ix{,x} rm -f \$db.ncbiRefSeqCurated.ix.txt ### and refseqSelect if exists (a subset of curated) if [ -s process/\$db.refseqSelect.curated.gp ]; then - genePredToBigGenePred process/\$db.refseqSelect.curated.gp stdout | sort -k1,1 -k2,2n > \$db.ncbiRefSeqSelectCurated.bigGp + \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ + process/\$db.refseqSelect.curated.gp | sort -k1,1 -k2,2n \\ + > \$db.ncbiRefSeqSelectCurated.bigGp bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ \$db.ncbiRefSeqSelectCurated.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqSelectCurated.bb rm -f \$db.ncbiRefSeqSelectCurated.bigGp bigBedInfo \$db.ncbiRefSeqSelectCurated.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqSelectCurated.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqSelectCurated %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqSelectCurated.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.refseqSelect.curated.gp \\ | sort -u > \$db.ncbiRefSeqSelectCurated.ix.txt ixIxx \$db.ncbiRefSeqSelectCurated.ix.txt \$db.ncbiRefSeqSelectCurated.ix{,x} rm -f \$db.ncbiRefSeqSelectCurated.ix.txt fi ### and hgmd if exists (a subset of curated) if [ -s process/hgmd.curated.gp ]; then - genePredToBigGenePred process/hgmd.curated.gp stdout | sort -k1,1 -k2,2n > \$db.ncbiRefSeqHgmd.bigGp + \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ + process/hgmd.curated.gp | sort -k1,1 -k2,2n > \$db.ncbiRefSeqHgmd.bigGp bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ \$db.ncbiRefSeqHgmd.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqHgmd.bb rm -f \$db.ncbiRefSeqHgmd.bigGp bigBedInfo \$db.ncbiRefSeqHgmd.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqHgmd.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqHgmd %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqHgmd.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/hgmd.curated.gp \\ | sort -u > \$db.ncbiRefSeqHgmd.ix.txt ixIxx \$db.ncbiRefSeqHgmd.ix.txt \$db.ncbiRefSeqHgmd.ix{,x} rm -f \$db.ncbiRefSeqHgmd.ix.txt fi fi ### predicted only if present if [ -s process/\$db.predicted.gp ]; then - genePredToBigGenePred process/\$db.predicted.gp stdout | sort -k1,1 -k2,2n > \$db.ncbiRefSeqPredicted.bigGp + \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ + process/\$db.predicted.gp | sort -k1,1 -k2,2n \\ + > \$db.ncbiRefSeqPredicted.bigGp bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ \$db.ncbiRefSeqPredicted.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqPredicted.bb rm -f \$db.ncbiRefSeqPredicted.bigGp bigBedInfo \$db.ncbiRefSeqPredicted.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqPredicted.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqPredicted %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqPredicted.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.predicted.gp \\ | sort -u > \$db.ncbiRefSeqPredicted.ix.txt ixIxx \$db.ncbiRefSeqPredicted.ix.txt \$db.ncbiRefSeqPredicted.ix{,x} rm -f \$db.ncbiRefSeqPredicted.ix.txt fi ### all other annotations, not necessarily genes if [ -s "process/\$db.other.bb" ]; then