7f813719b7ad2dde53149de49c2ec989b9630ea7 hiram Tue Sep 13 12:09:58 2022 -0700 now working through augustus step refs #29811 diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index 7e09fe6..8c3deb2 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -1195,90 +1195,91 @@ if ( -s "$buildDir/download/${asmId}_rm.out.gz" ) { $rmskOpts = " \\ -ncbiRmsk=\"$buildDir/download/${asmId}_rm.out.gz\" "; if ( -s "${buildDir}/download/${asmId}.remove.dups.list" ) { $rmskOpts .= " \\ -dupList=\"${buildDir}/download/${asmId}.remove.dups.list\" "; } if ($ucscNames) { $rmskOpts .= " \\ -liftSpec=\"$buildDir/sequence/$asmId.ncbiToUcsc.lift\""; } } } $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName +export ncbiAsmId=$asmId if [ $buildDir/\$asmId.2bit -nt faSize.rmsk.txt ]; then export species=`echo $rmskSpecies | sed -e 's/_/ /g;'` rm -f versionInfo.txt doRepeatMasker.pl -stop=mask -buildDir=`pwd` -unmaskedSeq=$buildDir/\$asmId.2bit $rmskOpts \\ -bigClusterHub=$bigClusterHub -workhorse=$workhorse -species="\$species" \$asmId if [ -s "\$asmId.fa.out" ]; then gzip \$asmId.fa.out fi gzip \$asmId.sorted.fa.out \$asmId.nestedRepeats.bed doRepeatMasker.pl -continue=cleanup -buildDir=`pwd` -unmaskedSeq=$buildDir/\$asmId.2bit $rmskOpts \\ -bigClusterHub=$bigClusterHub -workhorse=$workhorse -species="\$species" \$asmId if [ ! -s versionInfo.txt ]; then - if [ -s ../../download/${asmId}_rm.run ]; then - ln -s ../../download/${asmId}_rm.run versionInfo.txt + if [ -s ../../download/\${ncbiAsmId}_rm.run ]; then + ln -s ../../download/\${ncbiAsmId}_rm.run versionInfo.txt fi fi \$HOME/kent/src/hg/utils/automation/asmHubRepeatMasker.sh \$asmId `pwd`/\$asmId.sorted.fa.out.gz `pwd` else printf "# repeatMasker step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # repeatMasker ######################################################################### # * step: simpleRepeat [workhorse] sub doSimpleRepeat { my $runDir = "$buildDir/trackData/simpleRepeat"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct TRF/simpleRepeat track data"; my $bossScript = newBash HgRemoteScript("$runDir/doSimpleRepeat.bash", $workhorse, $runDir, $whatItDoes); my $trfClusterHub = $smallClusterHub; - my $seqCount = `cat $buildDir/$asmId.chrom.sizes | wc -l`; + my $seqCount = `cat $buildDir/$defaultName.chrom.sizes | wc -l`; chomp $seqCount; # check for large seqCount and large genome, then use bigCluster # the 100000 and 20000000 are from doSimpleRepeat.pl if ( $seqCount > 100000 ) { - my $genomeSize = `ave -col=2 $buildDir/$asmId.chrom.sizes | grep -w total | awk '{printf "%d", \$NF}'`; + my $genomeSize = `ave -col=2 $buildDir/$defaultName.chrom.sizes | grep -w total | awk '{printf "%d", \$NF}'`; chomp $genomeSize; if ($genomeSize > 200000000) { $trfClusterHub = $bigClusterHub; } } $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName export buildDir=$buildDir if [ \$buildDir/\$asmId.2bit -nt trfMask.bed.gz ]; then doSimpleRepeat.pl -stop=filter -buildDir=`pwd` \\ -unmaskedSeq=\$buildDir/\$asmId.2bit \\ -trf409=6 -dbHost=$dbHost -smallClusterHub=$trfClusterHub \\ -workhorse=$workhorse \$asmId doSimpleRepeat.pl -buildDir=`pwd` \\ -continue=cleanup -stop=cleanup -unmaskedSeq=\$buildDir/\$asmId.2bit \\ -trf409=6 -dbHost=$dbHost -smallClusterHub=$trfClusterHub \\ -workhorse=$workhorse \$asmId if [ -s simpleRepeat.bed ]; then gzip simpleRepeat.bed & else rm -f simpleRepeat.bed @@ -1300,31 +1301,31 @@ ## } ## twoBitMask ../repeatMasker/\$asmId.rmsk.2bit -add trfMask.bed \\ ## \$asmId.RM_TRF_masked.2bit ######################################################################### # * step: allGaps [workhorse] sub doAllGaps { my $runDir = "$buildDir/trackData/allGaps"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct 'all' gap track data"; my $bossScript = newBash HgRemoteScript("$runDir/doAllGaps.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName export buildDir=$buildDir if [ \$buildDir/\$asmId.2bit -nt \$asmId.allGaps.bb ]; then twoBitInfo -nBed ../../\$asmId.2bit stdout | awk '{printf "%s\\t%d\\t%d\\t%d\\t%d\\t+\\n", \$1, \$2, \$3, NR, \$3-\$2}' > \$asmId.allGaps.bed if [ ! -s \$asmId.allGaps.bed ]; then exit 0 fi if [ -s ../assemblyGap/\$asmId.gap.bb ]; then bigBedToBed ../assemblyGap/\$asmId.gap.bb \$asmId.gap.bed # verify the 'all' gaps should include the gap track items bedIntersect -minCoverage=0.0000000014 \$asmId.allGaps.bed \$asmId.gap.bed \\ \$asmId.verify.annotated.gap.bed gapTrackCoverage=`awk '{print \$3-\$2}' \$asmId.gap.bed \\ | ave stdin | grep "^total" | awk '{print \$NF}' | sed -e 's/.000000//;'` intersectCoverage=`ave -col=5 \$asmId.verify.annotated.gap.bed \\ @@ -1379,108 +1380,114 @@ rm -f toBbi.bed gzip *.bed else printf "# allgaps step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # allGaps ######################################################################### # * step: idKeys [workhorse] sub doIdKeys { my $runDir = "$buildDir/trackData/idKeys"; - if (! -s "$buildDir/$asmId.2bit") { - &HgAutomate::verbose(1, "ERROR: idKeys can not find $asmId.2bit\n"); + if (! -s "$buildDir/$defaultName.2bit") { + &HgAutomate::verbose(1, "ERROR: idKeys can not find $defaultName.2bit\n"); exit 255; } - if (! needsUpdate("$buildDir/$asmId.2bit", "$runDir/$asmId.keySignature.txt")) { + if (! needsUpdate("$buildDir/$defaultName.2bit", "$runDir/$defaultName.keySignature.txt")) { &HgAutomate::verbose(1, "# idKeys step previously completed\n"); return; } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct ID key data for each contig/chr"; my $bossScript = newBash HgRemoteScript("$runDir/doIdKeys.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName export twoBit=$buildDir/\$asmId.2bit if [ ../../\$asmId.2bit -nt \$asmId.keySignature.txt ]; then doIdKeys.pl \$asmId -buildDir=`pwd` -twoBit=\$twoBit touch -r \$twoBit \$asmId.keySignature.txt else printf "# idKeys step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # doIdKeys ######################################################################### # * step: addMask [workhorse] sub doAddMask { my $runDir = "$buildDir/trackData/addMask"; my $goNoGo = 0; if (! $noRmsk) { - if ( ! -s "$buildDir/trackData/repeatMasker/$asmId.rmsk.2bit" ) { + if ( ! -s "$buildDir/trackData/repeatMasker/$defaultName.rmsk.2bit" ) { printf STDERR "ERROR: repeatMasker step not completed\n"; - printf STDERR "can not find: $buildDir/trackData/repeatMasker/$asmId.rmsk.2bit\n"; + printf STDERR "can not find: $buildDir/trackData/repeatMasker/$defaultName.rmsk.2bit\n"; $goNoGo = 1; } } - if ( ! -s "$buildDir/trackData/windowMasker/$asmId.cleanWMSdust.2bit" ) { + if ( ! -s "$buildDir/trackData/windowMasker/$defaultName.cleanWMSdust.2bit" ) { printf STDERR "ERROR: windowMasker step not completed\n"; - printf STDERR "can not find: $buildDir/trackData/windowMasker/$asmId.cleanWMSdust.2bit\n"; + printf STDERR "can not find: $buildDir/trackData/windowMasker/$defaultName.cleanWMSdust.2bit\n"; $goNoGo = 1; } if ( ! -s "$buildDir/trackData/simpleRepeat/doCleanup.csh" ) { printf STDERR "ERROR: simpleRepeat step not completed\n"; printf STDERR "can not find: $buildDir/trackData/simpleRepeat/doCleanup.csh\n"; $goNoGo = 1; } if ($goNoGo) { printf STDERR "ERROR: must complete repeatMasker, windowMasker and simpleRepeat before addMask\n"; exit 255; } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "add together (windowMasker or repeatMasker) and trf/simpleRepeats to construct masked 2bit file"; my $bossScript = newBash HgRemoteScript("$runDir/doAddMask.bash", $workhorse, $runDir, $whatItDoes); - my $wmMasked=`grep "masked total" $buildDir/trackData/windowMasker/faSize.$asmId.cleanWMSdust.txt | awk '{print \$1}' | sed -e 's/%//;'`; + my $wmMasked=`grep "masked total" $buildDir/trackData/windowMasker/faSize.$defaultName.cleanWMSdust.txt | awk '{print \$1}' | sed -e 's/%//;'`; my $rmMasked = 0; if (! $noRmsk) { $rmMasked=`grep "masked total" $buildDir/trackData/repeatMasker/faSize.rmsk.txt | awk '{print \$1}' | sed -e 's/%//;'`; } - my $src2BitToMask = "../repeatMasker/$asmId.rmsk.2bit"; + my $src2BitToMask = "../repeatMasker/$defaultName.rmsk.2bit"; if ($noRmsk || ($wmMasked > $rmMasked)) { - $src2BitToMask = "../windowMasker/$asmId.cleanWMSdust.2bit"; + $src2BitToMask = "../windowMasker/$defaultName.cleanWMSdust.2bit"; + } + + my $accessionId = $defaultName; + if ($accessionId =~ m/^GC[AF]_/) { + my @a = split('_', $defaultName); + $accessionId = sprintf("%s_%s", $a[0], $a[1]); } $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName export src2Bit=$src2BitToMask -export accessionId=`echo \$asmId | cut -d'_' -f1-2` +export accessionId=$accessionId # if simple repeat has a result, add it, otherwise no add if [ -s ../simpleRepeat/trfMask.bed.gz ]; then if [ ../simpleRepeat/trfMask.bed.gz -nt \$asmId.masked.faSize.txt ]; then twoBitMask \$src2Bit -type=.bed \\ -add ../simpleRepeat/trfMask.bed.gz \$asmId.masked.2bit fi else cp -p \$src2Bit \$asmId.masked.2bit fi if [ \$asmId.masked.2bit -nt \$asmId.masked.faSize.txt ]; then twoBitToFa \$asmId.masked.2bit stdout | faSize stdin > \$asmId.masked.faSize.txt touch -r \$asmId.masked.2bit \$asmId.masked.faSize.txt bptForTwoBit \$asmId.masked.2bit \$asmId.masked.2bit.bpt @@ -1512,31 +1519,31 @@ $bossScript->execute(); } # addMask ######################################################################### # * step: windowMasker [workhorse] sub doWindowMasker { my $runDir = "$buildDir/trackData/windowMasker"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "run windowMasker procedure"; my $bossScript = newBash HgRemoteScript("$runDir/doWindowMasker.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName if [ ../../\$asmId.unmasked.2bit -nt faSize.\$asmId.cleanWMSdust.txt ]; then \$HOME/kent/src/hg/utils/automation/doWindowMasker.pl -stop=twobit -buildDir=`pwd` -dbHost=$dbHost \\ -workhorse=$workhorse -unmaskedSeq=$buildDir/\$asmId.unmasked.2bit \$asmId bedInvert.pl ../../\$asmId.chrom.sizes ../allGaps/\$asmId.allGaps.bed.gz \\ > not.gap.bed bedIntersect -minCoverage=0.0000000014 windowmasker.sdust.bed \\ not.gap.bed stdout | sort -k1,1 -k2,2n > cleanWMask.bed twoBitMask $buildDir/\$asmId.unmasked.2bit cleanWMask.bed \\ \$asmId.cleanWMSdust.2bit twoBitToFa \$asmId.cleanWMSdust.2bit stdout \\ | faSize stdin > faSize.\$asmId.cleanWMSdust.txt export intersectRmskWM=0 if [ -s ../repeatMasker/\$asmId.sorted.fa.out.gz ]; then zcat ../repeatMasker/\$asmId.sorted.fa.out.gz | sed -e 's/^ *//; /^\$/d;' \\ @@ -1573,104 +1580,104 @@ ); $bossScript->execute(); } # windowMasker ######################################################################### # * step: gapOverlap [workhorse] sub doGapOverlap { my $runDir = "$buildDir/trackData/gapOverlap"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct gap overlap track (duplicate sequence on each side of a gap)"; my $bossScript = newBash HgRemoteScript("$runDir/doGapOverlap.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName if [ ../../\$asmId.unmasked.2bit -nt \$asmId.gapOverlap.bed.gz ]; then doGapOverlap.pl -buildDir=`pwd` -bigClusterHub=$bigClusterHub -smallClusterHub=$smallClusterHub -workhorse=$workhorse -twoBit=../../\$asmId.2bit \$asmId else printf "# gapOverlap step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # doGapOverlap ######################################################################### # * step: tandemDups [workhorse] sub doTandemDups { my $runDir = "$buildDir/trackData/tandemDups"; - if (! -s "$buildDir/$asmId.unmasked.2bit") { + if (! -s "$buildDir/$defaultName.unmasked.2bit") { &HgAutomate::verbose(1, - "ERROR: tandemDups: can not find $buildDir/$asmId.unmasked.2bit\n"); + "ERROR: tandemDups: can not find $buildDir/$defaultName.unmasked.2bit\n"); exit 255; } - my $ctgCount = `grep -c '^' $buildDir/$asmId.chrom.sizes`; + my $ctgCount = `grep -c '^' $buildDir/$defaultName.chrom.sizes`; chomp $ctgCount; if ( $ctgCount > 100000) { &HgAutomate::verbose(1, "# tandemDups step too many contigs at $ctgCount\n"); return; } if (-d "${runDir}" ) { - if (! -s "$runDir/$asmId.tandemDups.bb") { + if (! -s "$runDir/$defaultName.tandemDups.bb") { &HgAutomate::verbose(1, "WARNING tandemDups step may already be running, but not completed ?\n"); return; - } elsif (! needsUpdate("$buildDir/$asmId.unmasked.2bit", "$runDir/$asmId.tandemDups.bb")) { + } elsif (! needsUpdate("$buildDir/$defaultName.unmasked.2bit", "$runDir/$defaultName.tandemDups.bb")) { &HgAutomate::verbose(1, "# tandemDups step previously completed\n"); return; } } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct tandem dups track (nearby pairs of exact duplicate sequence)"; my $bossScript = newBash HgRemoteScript("$runDir/doTandemDups.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName export twoBit=$buildDir/\$asmId.unmasked.2bit if [ \$twoBit -nt \$asmId.tandemDups.bb ]; then doTandemDup.pl -buildDir=`pwd` -bigClusterHub=$bigClusterHub -smallClusterHub=$smallClusterHub -workhorse=$workhorse -twoBit=\$twoBit \$asmId touch -r \$twoBit \$asmId.tandemDups.bb else printf "# tandemDups step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # doTandemDups ######################################################################### # * step: cpgIslands [workhorse] sub doCpgIslands { my $runDir = "$buildDir/trackData/cpgIslands"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "run CPG Islands procedures, both masked and unmasked"; my $bossScript = newBash HgRemoteScript("$runDir/doCpgIslands.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName mkdir -p masked unmasked cd unmasked if [ ../../../\$asmId.unmasked.2bit -nt \$asmId.cpgIslandExtUnmasked.bb ]; then doCpgIslands.pl -stop=makeBed -buildDir=`pwd` -dbHost=$dbHost \\ -smallClusterHub=$smallClusterHub -bigClusterHub=$bigClusterHub -tableName=cpgIslandExtUnmasked \\ -workhorse=$workhorse -maskedSeq=$buildDir/\$asmId.unmasked.2bit \\ -chromSizes=$buildDir/\$asmId.chrom.sizes \$asmId doCpgIslands.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` \\ -dbHost=$dbHost \\ -smallClusterHub=$smallClusterHub -bigClusterHub=$bigClusterHub -tableName=cpgIslandExtUnmasked \\ -workhorse=$workhorse -maskedSeq=$buildDir/\$asmId.unmasked.2bit \\ -chromSizes=$buildDir/\$asmId.chrom.sizes \$asmId else printf "# cpgIslands unmasked previously completed\\n" 1>&2 @@ -1699,84 +1706,84 @@ # * step: ncbiGene [workhorse] sub doNcbiGene { my $gffFile = "$assemblySource/${asmId}_genomic.gff.gz"; if ( ! -s "${gffFile}" ) { &HgAutomate::verbose(1, "# step ncbiGene: no gff file found at:\n# $gffFile\n"); return; } if ($ucscNames) { if ( ! -s "$buildDir/sequence/$asmId.ncbiToUcsc.lift" ) { &HgAutomate::verbose(1, "# ERROR: ncbiGene: can not find $buildDir/sequence/$asmId.ncbiToUcsc.lift\n"); exit 255; } } my $runDir = "$buildDir/trackData/ncbiGene"; if (-d "${runDir}" ) { - if (! -s "$runDir/$asmId.ncbiGene.bb") { + if (! -s "$runDir/$defaultName.ncbiGene.bb") { &HgAutomate::verbose(1, "WARNING ncbiGene step may already be running, but not completed ?\n"); return; - } elsif (! needsUpdate("$gffFile", "$runDir/$asmId.ncbiGene.bb")) { + } elsif (! needsUpdate("$gffFile", "$runDir/$defaultName.ncbiGene.bb")) { &HgAutomate::verbose(1, "# ncbiGene step previously completed\n"); return; } } - if (! -s "$buildDir/$asmId.faSize.txt") { - &HgAutomate::verbose(1, "# step ncbiGene: can not find faSize.txt at:\n# $buildDir/$asmId.faSize.txt\n"); + if (! -s "$buildDir/$defaultName.faSize.txt") { + &HgAutomate::verbose(1, "# step ncbiGene: can not find faSize.txt at:\n# $buildDir/$defaultName.faSize.txt\n"); exit 255; } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "translate NCBI GFF3 gene definitions into a track"; my $bossScript = newBash HgRemoteScript("$runDir/doNcbiGene.bash", $workhorse, $runDir, $whatItDoes); my $dupList = ""; if ( -s "${buildDir}/download/${asmId}.remove.dups.list" ) { $dupList = " | (grep -v -f \"${buildDir}/download/${asmId}.remove.dups.list\" || true)"; } $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName export gffFile=$gffFile function cleanUp() { rm -f \$asmId.ncbiGene.genePred.gz \$asmId.ncbiGene.genePred rm -f \$asmId.geneAttrs.ncbi.txt } if [ \$gffFile -nt \$asmId.ncbiGene.bb ]; then (gff3ToGenePred -warnAndContinue -useName \\ -attrsOut=\$asmId.geneAttrs.ncbi.txt \$gffFile stdout \\ 2>> \$asmId.ncbiGene.log.txt || true) | genePredFilter \\ -chromSizes=../../\$asmId.chrom.sizes stdin stdout \\ $dupList | gzip -c > \$asmId.ncbiGene.genePred.gz genePredCheck \$asmId.ncbiGene.genePred.gz export howMany=`genePredCheck \$asmId.ncbiGene.genePred.gz 2>&1 | grep "^checked" | awk '{print \$2}'` if [ "\${howMany}" -eq 0 ]; then printf "# ncbiGene: no gene definitions found in \$gffFile\n"; cleanUp exit 0 fi export ncbiGenePred="\$asmId.ncbiGene.genePred.gz" _EOF_ ); if ($ucscNames) { $bossScript->add(<<_EOF_ liftUp -extGenePred -type=.gp stdout \\ - ../../sequence/\$asmId.ncbiToUcsc.lift warn \\ + ../../sequence/$asmId.ncbiToUcsc.lift warn \\ \$asmId.ncbiGene.genePred.gz | gzip -c \\ > \$asmId.ncbiGene.ucsc.genePred.gz ncbiGenePred="\$asmId.ncbiGene.ucsc.genePred.gz" _EOF_ ); } $bossScript->add(<<_EOF_ genePredToBed -tab -fillSpace \$ncbiGenePred stdout \\ | bedToExons stdin stdout | bedSingleCover.pl stdin > \$asmId.exons.bed export baseCount=`awk '{sum+=\$3-\$2}END{printf "%d", sum}' \$asmId.exons.bed` export asmSizeNoGaps=`grep sequences ../../\$asmId.faSize.txt | awk '{print \$5}'` export perCent=`echo \$baseCount \$asmSizeNoGaps | awk '{printf "%.3f", 100.0*\$1/\$2}'` rm -f \$asmId.exons.bed ~/kent/src/hg/utils/automation/gpToIx.pl \$ncbiGenePred \\ | sort -u > \$asmId.ncbiGene.ix.txt @@ -1826,85 +1833,85 @@ if ($filesFound < $filesExpected) { printf STDERR "# step ncbiRefSeq does not have all files required\n"; return; } my $runDir = "$buildDir/trackData/ncbiRefSeq"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "run NCBI RefSeq gene procedures"; my $bossScript = newBash HgRemoteScript("$runDir/doNcbiRefSeq.bash", $workhorse, $runDir, $whatItDoes); my $liftSpec = ""; if ($ucscNames) { - $liftSpec="-liftFile=\"\$buildDir/sequence/\$asmId.ncbiToUcsc.lift\""; + $liftSpec="-liftFile=\"\$buildDir/sequence/$asmId.ncbiToUcsc.lift\""; } $bossScript->add(<<_EOF_ -export asmId="$asmId" +export asmId="$defaultName" export buildDir="$buildDir" export liftSpec="$liftSpec" export target2bit="\$buildDir/\$asmId.2bit" if [ $buildDir/\$asmId.2bit -nt \$asmId.ncbiRefSeq.bb ]; then ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -toGpWarnOnly -buildDir=`pwd` \\ -assemblyHub -bigClusterHub=$bigClusterHub -dbHost=$dbHost $liftSpec \\ -target2bit="\$target2bit" \\ -stop=load -fileServer=$fileServer -smallClusterHub=$smallClusterHub -workhorse=$workhorse \\ - \$asmId \$asmId + $asmId \$asmId else printf "# ncbiRefSeq step previously completed\\n" 1>&2 fi _EOF_ ); $bossScript->execute(); } # ncbiRefSeq ######################################################################### # * step: augustus [workhorse] sub doAugustus { if ($noAugustus) { &HgAutomate::verbose(1, "# -noAugustus == Augustus gene track not created\n"); return; } my $runDir = "$buildDir/trackData/augustus"; - if (! -s "$buildDir/$asmId.2bit") { + if (! -s "$buildDir/$defaultName.2bit") { &HgAutomate::verbose(1, - "ERROR: augustus step can not find $buildDir/$asmId.2bit\n"); + "ERROR: augustus step can not find $buildDir/$defaultName.2bit\n"); exit 255; } if (-d "${runDir}" ) { - if (! -s "$runDir/$asmId.augustus.bb") { + if (! -s "$runDir/$defaultName.augustus.bb") { &HgAutomate::verbose(1, "WARNING augustus step may already be running, but not completed ?\n"); return; - } elsif (! needsUpdate("$buildDir/$asmId.2bit", "$runDir/$asmId.augustus.bb")) { + } elsif (! needsUpdate("$buildDir/$defaultName.2bit", "$runDir/$defaultName.augustus.bb")) { &HgAutomate::verbose(1, "# augustus step previously completed\n"); return; } } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "run Augustus gene prediction procedures"; my $bossScript = newBash HgRemoteScript("$runDir/doAugustus.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName if [ $buildDir/\$asmId.2bit -nt \$asmId.augustus.bb ]; then time (~/kent/src/hg/utils/automation/doAugustus.pl -stop=makeGp -buildDir=`pwd` -dbHost=$dbHost \\ -bigClusterHub=$bigClusterHub -species=$augustusSpecies -workhorse=$workhorse \\ -noDbGenePredCheck -maskedSeq=$buildDir/\$asmId.2bit \$asmId) > makeDb.log 2>&1 time (~/kent/src/hg/utils/automation/doAugustus.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` -dbHost=$dbHost \\ -bigClusterHub=$bigClusterHub -species=$augustusSpecies -workhorse=$workhorse \\ -noDbGenePredCheck -maskedSeq=$buildDir/\$asmId.2bit \$asmId) > cleanup.log 2>&1 else printf "# augustus genes step previously completed\\n" 1>&2 fi _EOF_ ); $bossScript->execute(); } # doAugustus @@ -1913,31 +1920,31 @@ # * step: xenoRefGene [bigClusterHub] sub doXenoRefGene { if ($noXenoRefSeq) { &HgAutomate::verbose(1, "# -noXenoRefSeq == Xeno RefSeq gene track not created\n"); return; } my $runDir = "$buildDir/trackData/xenoRefGene"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "run xeno RefSeq gene mapping procedures"; my $bossScript = newBash HgRemoteScript("$runDir/doXenoRefGene.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -export asmId=$asmId +export asmId=$defaultName if [ $buildDir/\$asmId.2bit -nt \$asmId.xenoRefGene.bb ]; then time (~/kent/src/hg/utils/automation/doXenoRefGene.pl -buildDir=`pwd` -dbHost=$dbHost \\ -bigClusterHub=$bigClusterHub -mrnas=$xenoRefSeq -workhorse=$workhorse \\ -maskedSeq=$buildDir/trackData/addMask/\$asmId.masked.2bit \$asmId) > do.log 2>&1 if [ -s "\$asmId.xenoRefGene.bb" ]; then bigBedInfo \$asmId.xenoRefGene.bb | egrep "^itemCount:|^basesCovered:" \\ | sed -e 's/,//g' > \$asmId.xenoRefGene.stats.txt LC_NUMERIC=en_US /usr/bin/printf "# xenoRefGene %s %'d %s %'d\\n" `cat \$asmId.xenoRefGene.stats.txt` | xargs echo fi else printf "# xenoRefGene step previously completed\\n" 1>&2 fi _EOF_ );