95ee44a5d737970678a6d856d1228b908941de2c hiram Fri Sep 20 19:00:51 2024 -0700 genome-source has now become githubusercontent refs #34449 diff --git src/hg/utils/automation/doNcbiRefSeq.pl src/hg/utils/automation/doNcbiRefSeq.pl index 31516a6..ab28dea 100755 --- src/hg/utils/automation/doNcbiRefSeq.pl +++ src/hg/utils/automation/doNcbiRefSeq.pl @@ -604,32 +604,32 @@ $bossScript->add(<<_EOF_ # establish all variables to use here export db="$db" export asmId="$asmId" export verString="$verString" _EOF_ ); if (! $dbExists) { $bossScript->add(<<_EOF_ export target2bit=$dbTwoBit twoBitInfo \$target2bit stdout | sort -k2,2nr > \$db.chrom.sizes -wget -O bigGenePred.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/bigGenePred.as' -wget -O bigPsl.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/bigPsl.as' +wget -O bigGenePred.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigGenePred.as' +wget -O bigPsl.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigPsl.as' ### overall gene track with both predicted and curated genePredToBigGenePred process/\$db.ncbiRefSeq.gp stdout | sort -k1,1 -k2,2n > \$db.ncbiRefSeq.bigGp genePredToBed -tab -fillSpace process/\$db.ncbiRefSeq.gp stdout \\ | bedToExons stdin stdout | bedSingleCover.pl stdin > \$asmId.exons.bed export baseCount=`awk '{sum+=\$3-\$2}END{printf "%d", sum}' \$asmId.exons.bed` export asmSizeNoGaps=`grep sequences ../../\$asmId.faSize.txt | awk '{print \$5}'` export perCent=`echo \$baseCount \$asmSizeNoGaps | awk '{printf "%.3f", 100.0*\$1/\$2}'` bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as -extraIndex=name \\ \$db.ncbiRefSeq.bigGp \$db.chrom.sizes \\ \$db.ncbiRefSeq.bb bigBedInfo \$db.ncbiRefSeq.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$db.ncbiRefSeq.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# ncbiRefSeq %s %'d %s %'d\\n" `cat \$db.ncbiRefSeq.stats.txt` | xargs echo ~/kent/src/hg/utils/automation/gpToIx.pl process/\$db.ncbiRefSeq.gp \\ @@ -776,31 +776,31 @@ | grep -Fwf \$db.noRna.available.list \\ | egrep "$nonNucNames" > missingChrMFa.psl if [ -s missingChrMFa.psl ]; then pslToBed missingChrMFa.psl stdout \\ | twoBitToFa -bed=stdin \$target2bit stdout >> \$db.rna.fa fi fi if [ -s process/\$asmId.rna.cds ]; then cat process/\$asmId.rna.cds | grep '[0-9]\\+\\.\\.[0-9]\\+' \\ | pslMismatchGapToBed -cdsFile=stdin -db=\$db -ignoreQNamePrefix=X \\ process/\$asmId.\$db.psl.gz \$target2bit \\ \$db.rna.fa ncbiRefSeqGenomicDiff || true if [ -s ncbiRefSeqGenomicDiff.bed ]; then - wget -O txAliDiff.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/txAliDiff.as' + wget -O txAliDiff.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/txAliDiff.as" bedToBigBed -type=bed9+ -tab -as=txAliDiff.as \\ ncbiRefSeqGenomicDiff.bed \$db.chrom.sizes ncbiRefSeqGenomicDiff.bb else rm -f ncbiRefSeqGenomicDiff.bed fi fi export totalBases=`ave -col=2 \$db.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'` export basesCovered=`bedSingleCover.pl \$db.ncbiRefSeq.bigGp | ave -col=4 stdin | grep "^total" | awk '{printf "%d", \$2}'` export percentCovered=`echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" \\ "\$totalBases" "\$percentCovered" > fb.ncbiRefSeq.\$db.txt printf "%d bases of %d (%s%%) in intersection\\n" "\$baseCount" "\$asmSizeNoGaps" "\$perCent" > fb.\$asmId.ncbiRefSeq.txt rm -f \$db.ncbiRefSeq.bigGp \$asmId.exons.bed