97199fbf1ff56c9ee01956deb736b97244ea3ac6 hiram Sun Mar 1 21:52:28 2020 -0800 calculating featureBits like measurement for gene tracks, removing duplicates for ncbiRefSeq, remove blanks from gene names for ncbiRefSeq, and fix fundamental bug reference to geneToId in ncbiRefSeqOtherAttrs.pl refs #23891 diff --git src/hg/utils/automation/doAugustus.pl src/hg/utils/automation/doAugustus.pl index 0f3b52d..7f277bf 100755 --- src/hg/utils/automation/doAugustus.pl +++ src/hg/utils/automation/doAugustus.pl @@ -295,30 +295,37 @@ my $dbCheck = "-db=$db"; $dbCheck = "" if (0 == $noDbGenePredCheck); $bossScript->add(<<_EOF_ export db=$db find ./run.augustus/gtf -type f | grep ".gtf.gz\$" \\ | sed -e 's#/# _D_ #g; s#\\.# _dot_ #g;' \\ | sort -k11,11 -k13,13n \\ | sed -e 's# _dot_ #.#g; s# _D_ #/#g' | xargs zcat \\ | $augustusDir/scripts/join_aug_pred.pl \\ | grep -P "\\t(CDS|exon|stop_codon|start_codon|tts|tss)\\t" \\ > \$db.augustus.gtf gtfToGenePred -genePredExt -infoOut=\$db.info \$db.augustus.gtf \$db.augustus.gp genePredCheck $dbCheck \$db.augustus.gp +genePredToBed \$db.augustus.gp stdout \\ + | bedToExons stdin stdout | bedSingleCover.pl stdin > \$db.exons.bed +export baseCount=`awk '{sum+=\$3-\$2}END{printf "%d", sum}' \$db.exons.bed` +export asmSizeNoGaps=`grep sequences ../../\$db.faSize.txt | awk '{print \$5}'` +export perCent=`echo \$baseCount \$asmSizeNoGaps | awk '{printf "%.3f", 100.0*\$1/\$2}'` +printf "%d bases of %d (%s%%) in intersection\\n" "\$baseCount" "\$asmSizeNoGaps" "\$perCent" > fb.\$db.augustus.txt +rm -f \$db.exons.bed genePredToBigGenePred \$db.augustus.gp stdout | sort -k1,1 -k2,2n > \$db.augustus.bgp bedToBigBed -type=bed12+8 -tab -as=$ENV{'HOME'}/kent/src/hg/lib/bigGenePred.as \$db.augustus.bgp partition/\$db.chrom.sizes \$db.augustus.bb getRnaPred -genePredExt -keepMasking -genomeSeqs=$maskedSeq \$db \$db.augustus.gp all \$db.augustusGene.rna.fa getRnaPred -genePredExt -peptides -genomeSeqs=$maskedSeq \$db \$db.augustus.gp all \$db.augustusGene.faa _EOF_ ); $bossScript->execute(); } # doMakeGp ######################################################################### # * step: load [dbHost] sub doLoadAugustus { my $runDir = $buildDir; &HgAutomate::mustMkdir($runDir); my $tableName = "augustusGene";