b3833e993f98922e22e1278e632716e729ec54ad hiram Fri Jul 19 15:24:34 2019 -0700 adding gapOverlap and tandemDups tracks to the asmHub build refs #23734 diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index b4829df..9400076 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -31,30 +31,32 @@ /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'download', func => \&doDownload }, { name => 'sequence', func => \&doSequence }, { name => 'assemblyGap', func => \&doAssemblyGap }, { name => 'gatewayPage', func => \&doGatewayPage }, { name => 'gc5Base', func => \&doGc5Base }, { name => 'repeatMasker', func => \&doRepeatMasker }, { name => 'simpleRepeat', func => \&doSimpleRepeat }, { name => 'allGaps', func => \&doAllGaps }, { name => 'idKeys', func => \&doIdKeys }, { name => 'windowMasker', func => \&doWindowMasker }, { name => 'addMask', func => \&doAddMask }, + { name => 'gapOverlap', func => \&doGapOverlap }, + { name => 'tandemDups', func => \&doTandemDups }, { name => 'cpgIslands', func => \&doCpgIslands }, { name => 'augustus', func => \&doAugustus }, { name => 'trackDb', func => \&doTrackDb }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: my $dbHost = 'hgwdev'; my $sourceDir = "/hive/data/outside/ncbi/genomes"; my $augustusSpecies = "human"; my $ucscNames = 0; # default 'FALSE' (== 0) my $workhorse = "hgwdev"; # default workhorse when none chosen my $fileServer = "hgwdev"; # default when none chosen my $bigClusterHub = "ku"; # default when none chosen @@ -906,31 +908,31 @@ my $bossScript = newBash HgRemoteScript("$runDir/doIdKeys.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ export asmId=$asmId if [ ../../\$asmId.2bit -nt \$asmId.keySignature.txt ]; then doIdKeys.pl \$asmId -buildDir=`pwd` -twoBit=../../\$asmId.2bit else printf "# idKeys step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); -} # idKeys +} # doIdKeys ######################################################################### # * step: addMask [workhorse] sub doAddMask { my $runDir = "$buildDir/trackData/addMask"; my $goNoGo = 0; if ( ! -s "$buildDir/trackData/repeatMasker/$asmId.rmsk.2bit" ) { printf STDERR "ERROR: repeatMasker step not completed\n"; printf STDERR "can not find: $buildDir/trackData/repeatMasker/$asmId.rmsk.2bit\n"; $goNoGo = 1; } if ( ! -s "$buildDir/trackData/windowMasker/$asmId.cleanWMSdust.2bit" ) { printf STDERR "ERROR: windowMasker step not completed\n"; printf STDERR "can not find: $buildDir/trackData/windowMasker/$asmId.cleanWMSdust.2bit\n"; @@ -1016,30 +1018,78 @@ rm -f not.gap.bed rmsk.bed bedToBigBed -type=bed3 cleanWMask.bed ../../\$asmId.chrom.sizes \$asmId.windowMasker.bb gzip cleanWMask.bed \$HOME/kent/src/hg/utils/automation/doWindowMasker.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` -dbHost=$dbHost \\ -workhorse=$workhorse -unmaskedSeq=$buildDir/\$asmId.unmasked.2bit \$asmId else printf "# windowMasker step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # windowMasker ######################################################################### +# * step: gapOverlap [workhorse] +sub doGapOverlap { + my $runDir = "$buildDir/trackData/gapOverlap"; + &HgAutomate::mustMkdir($runDir); + + my $whatItDoes = "construct gap overlap track (duplicate sequence on each side of a gap)"; + my $bossScript = newBash HgRemoteScript("$runDir/doGapOverlap.bash", + $workhorse, $runDir, $whatItDoes); + + $bossScript->add(<<_EOF_ +export asmId=$asmId + +if [ ../../\$asmId.unmasked.2bit -nt \$asmId.gapOverlap.bed.gz ]; then + doGapOverlap.pl -buildDir=`pwd` -bigClusterHub=$bigClusterHub -smallClusterHub=$smallClusterHub -workhorse=$workhorse -twoBit=../../\$asmId.2bit \$asmId +else + printf "# gapOverlap step previously completed\\n" 1>&2 + exit 0 +fi +_EOF_ + ); + $bossScript->execute(); +} # doGapOverlap + +######################################################################### +# * step: tandemDups [workhorse] +sub doTandemDups { + my $runDir = "$buildDir/trackData/gapOverlap"; + &HgAutomate::mustMkdir($runDir); + + my $whatItDoes = "construct gap overlap track (duplicate sequence on each side of a gap)"; + my $bossScript = newBash HgRemoteScript("$runDir/doTandemDups.bash", + $workhorse, $runDir, $whatItDoes); + + $bossScript->add(<<_EOF_ +export asmId=$asmId + +if [ ../../\$asmId.unmasked.2bit -nt \$asmId.gapOverlap.bed.gz ]; then + doTandemDup.pl -buildDir=`pwd` -bigClusterHub=$bigClusterHub -smallClusterHub=$smallClusterHub -workhorse=$workhorse -twoBit=../../\$asmId.2bit \$asmId +else + printf "# tandemDups step previously completed\\n" 1>&2 + exit 0 +fi +_EOF_ + ); + $bossScript->execute(); +} # doTandemDups + +######################################################################### # * step: cpgIslands [workhorse] sub doCpgIslands { my $runDir = "$buildDir/trackData/cpgIslands"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "run CPG Islands procedures, both masked and unmasked"; my $bossScript = newBash HgRemoteScript("$runDir/doCpgIslands.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ export asmId=$asmId mkdir -p masked unmasked cd unmasked