30502ec213047397912c79d846c92ad726af87e6 hiram Tue Mar 11 15:35:07 2025 -0700 custom description comments on fields for NCBI RefSeq track refs #34704 diff --git src/hg/utils/automation/doNcbiRefSeq.pl src/hg/utils/automation/doNcbiRefSeq.pl index e2899b22bf1..ebab70bb683 100755 --- src/hg/utils/automation/doNcbiRefSeq.pl +++ src/hg/utils/automation/doNcbiRefSeq.pl @@ -604,108 +604,108 @@ $bossScript->add(<<_EOF_ # establish all variables to use here export db="$db" export asmId="$asmId" export verString="$verString" _EOF_ ); if (! $dbExists) { $bossScript->add(<<_EOF_ export target2bit=$dbTwoBit twoBitInfo \$target2bit stdout | sort -k2,2nr > \$db.chrom.sizes -wget -O bigGenePred.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigGenePred.as' +wget -O bigGenePredRefSeq.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigGenePredRefSeq.as' wget -O bigPsl.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigPsl.as' ### overall gene track with both predicted and curated \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ process/\$db.ncbiRefSeq.gp | sort -k1,1 -k2,2n > \$db.ncbiRefSeq.bigGp genePredToBed -tab -fillSpace process/\$db.ncbiRefSeq.gp stdout \\ | bedToExons stdin stdout | bedSingleCover.pl stdin > \$asmId.exons.bed export baseCount=`awk '{sum+=\$3-\$2}END{printf "%d", sum}' \$asmId.exons.bed` export asmSizeNoGaps=`grep sequences ../../\$asmId.faSize.txt | awk '{print \$5}'` export perCent=`echo \$baseCount \$asmSizeNoGaps | awk '{printf "%.3f", 100.0*\$1/\$2}'` -bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ +bedToBigBed -type=bed12+8 -tab -as=bigGenePredRefSeq.as -extraIndex=name \\ \$db.ncbiRefSeq.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeq.bb bigBedInfo \$db.ncbiRefSeq.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeq.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeq %s %'d %s %'d\\n" `cat \$db.ncbiRefSeq.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.ncbiRefSeq.gp \\ | sort -u > \$db.ncbiRefSeq.ix.txt ixIxx \$db.ncbiRefSeq.ix.txt \$db.ncbiRefSeq.ix{,x} rm -f \$db.ncbiRefSeq.ix.txt ### curated only if present if [ -s process/\$db.curated.gp ]; then \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ process/\$db.curated.gp | sort -k1,1 -k2,2n > \$db.ncbiRefSeqCurated.bigGp - bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ + bedToBigBed -type=bed12+8 -tab -as=bigGenePredRefSeq.as -extraIndex=name \\ \$db.ncbiRefSeqCurated.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqCurated.bb rm -f \$db.ncbiRefSeqCurated.bigGp bigBedInfo \$db.ncbiRefSeqCurated.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqCurated.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqCurated %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqCurated.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.curated.gp \\ | sort -u > \$db.ncbiRefSeqCurated.ix.txt ixIxx \$db.ncbiRefSeqCurated.ix.txt \$db.ncbiRefSeqCurated.ix{,x} rm -f \$db.ncbiRefSeqCurated.ix.txt ### and refseqSelect if exists (a subset of curated) if [ -s process/\$db.refseqSelect.curated.gp ]; then \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ process/\$db.refseqSelect.curated.gp | sort -k1,1 -k2,2n \\ > \$db.ncbiRefSeqSelectCurated.bigGp - bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ + bedToBigBed -type=bed12+8 -tab -as=bigGenePredRefSeq.as -extraIndex=name \\ \$db.ncbiRefSeqSelectCurated.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqSelectCurated.bb rm -f \$db.ncbiRefSeqSelectCurated.bigGp bigBedInfo \$db.ncbiRefSeqSelectCurated.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqSelectCurated.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqSelectCurated %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqSelectCurated.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.refseqSelect.curated.gp \\ | sort -u > \$db.ncbiRefSeqSelectCurated.ix.txt ixIxx \$db.ncbiRefSeqSelectCurated.ix.txt \$db.ncbiRefSeqSelectCurated.ix{,x} rm -f \$db.ncbiRefSeqSelectCurated.ix.txt fi ### and hgmd if exists (a subset of curated) if [ -s process/hgmd.curated.gp ]; then \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ process/hgmd.curated.gp | sort -k1,1 -k2,2n > \$db.ncbiRefSeqHgmd.bigGp - bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ + bedToBigBed -type=bed12+8 -tab -as=bigGenePredRefSeq.as -extraIndex=name \\ \$db.ncbiRefSeqHgmd.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqHgmd.bb rm -f \$db.ncbiRefSeqHgmd.bigGp bigBedInfo \$db.ncbiRefSeqHgmd.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqHgmd.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqHgmd %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqHgmd.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/hgmd.curated.gp \\ | sort -u > \$db.ncbiRefSeqHgmd.ix.txt ixIxx \$db.ncbiRefSeqHgmd.ix.txt \$db.ncbiRefSeqHgmd.ix{,x} rm -f \$db.ncbiRefSeqHgmd.ix.txt fi fi ### predicted only if present if [ -s process/\$db.predicted.gp ]; then \$HOME/kent/src/hg/utils/automation/updateName2.pl process/\$db.attrs.txt \\ process/\$db.predicted.gp | sort -k1,1 -k2,2n \\ > \$db.ncbiRefSeqPredicted.bigGp - bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ + bedToBigBed -type=bed12+8 -tab -as=bigGenePredRefSeq.as -extraIndex=name \\ \$db.ncbiRefSeqPredicted.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeqPredicted.bb rm -f \$db.ncbiRefSeqPredicted.bigGp bigBedInfo \$db.ncbiRefSeqPredicted.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeqPredicted.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeqPredicted %s %'d %s %'d\\n" `cat \$db.ncbiRefSeqPredicted.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.predicted.gp \\ | sort -u > \$db.ncbiRefSeqPredicted.ix.txt ixIxx \$db.ncbiRefSeqPredicted.ix.txt \$db.ncbiRefSeqPredicted.ix{,x} rm -f \$db.ncbiRefSeqPredicted.ix.txt fi ### all other annotations, not necessarily genes if [ -s "process/\$db.other.bb" ]; then ln -f -s process/\$db.other.bb \$db.ncbiRefSeqOther.bb