2c8115414f9f247308f4d43fe71d2ff92b156c15 hiram Wed Nov 17 10:31:46 2021 -0800 correct name on bash script refs #28401 diff --git src/hg/utils/automation/doCpgIslands.pl src/hg/utils/automation/doCpgIslands.pl index c573ff1..b4c24d6 100755 --- src/hg/utils/automation/doCpgIslands.pl +++ src/hg/utils/automation/doCpgIslands.pl @@ -1,278 +1,278 @@ #!/usr/bin/env perl # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit ~/kent/src/hg/utils/automation/doCpgIslands.pl instead. use Getopt::Long; use warnings; use strict; use FindBin qw($Bin); use lib "$Bin"; use HgAutomate; use HgRemoteScript; use HgStepManager; # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_maskedSeq $opt_chromSizes $opt_tableName /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'hardMask', func => \&doHardMask }, { name => 'cpg', func => \&doCpg }, { name => 'makeBed', func => \&doMakeBed }, { name => 'load', func => \&doLoadCpg }, { name => 'cleanup', func => \&doCleanup }, ] ); my $cpgLh = "/hive/data/staging/data/cpgIslandExt/cpglh"; # Option defaults: # my $bigClusterHub = 'swarm'; my $bigClusterHub = 'ku'; # my $smallClusterHub = 'encodek'; my $smallClusterHub = 'ku'; my $workhorse = 'hgwdev'; my $dbHost = 'hgwdev'; my $defaultWorkhorse = 'hgwdev'; my $maskedSeq = "$HgAutomate::clusterData/\$db/\$db.2bit"; my $chromSizes = "$HgAutomate::clusterData/\$db/chrom.sizes"; my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base db options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/cpgIslands (necessary when continuing at a later date). -maskedSeq seq.2bit Use seq.2bit as the masked input sequence instead of default ($maskedSeq). -chromSizes chrom.sizes Use chrom.sizes instead of default ($chromSizes). -tableName name Load table 'name' instead of default cpgIslandExt e.g.: -tableName cpgIslandExtUnmasked _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'bigClusterHub' => $bigClusterHub, 'smallClusterHub' => $smallClusterHub, 'workhorse' => $defaultWorkhorse); print STDERR " Automates UCSC's CpG Island finder for genome database \$db. Steps: hardMask: Creates hard-masked fastas needed for the CpG Island program. cpg: Run /hive/data/staging/data/cpgIslandExt/cpglh on the hard-masked fastas makeBed: Transform output from cpglh into cpgIsland.bed load: Load cpgIsland.bed into \$db. cleanup: Removes hard-masked fastas and output from cpglh. All operations are performed in the build directory which is $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/cpgIslands unless -buildDir is given. "; # Detailed help (-help): print STDERR " Assumptions: 1. $HgAutomate::clusterData/\$db/\$db.2bit contains RepeatMasked sequence for database/assembly \$db. " if ($detailed); print "\n"; exit $status; } # Globals: # Command line args: db my ($db); # Other: my ($buildDir, $tableName, $secondsStart, $secondsEnd); sub checkOptions { # Make sure command line options are valid/supported. my $ok = GetOptions(@HgStepManager::optionSpec, 'buildDir=s', 'maskedSeq=s', 'chromSizes=s', 'tableName=s', @HgAutomate::commonOptionSpec, ); &usage(1) if (!$ok); &usage(0, 1) if ($opt_help); &HgAutomate::processCommonOptions(); my $err = $stepper->processOptions(); usage(1) if ($err); $workhorse = $opt_workhorse if ($opt_workhorse); $bigClusterHub = $opt_bigClusterHub if ($opt_bigClusterHub); $smallClusterHub = $opt_smallClusterHub if ($opt_smallClusterHub); $dbHost = $opt_dbHost if ($opt_dbHost); } ######################################################################### # * step: hard mask [workhorse] sub doHardMask { printf STDERR "# doHardMask: obsolete step, no longer needed\n"; return 0; } # doHardMask ######################################################################### # * step: cpg [workhorse] sub doCpg { # Set up and perform the cluster run to run the CpG function on the # hard masked sequence. my $paraHub = $bigClusterHub; my $runDir = $buildDir; # Second, make sure we're starting clean. if (-e "$runDir/cpglh.result") { die "doCpg: looks like this was run successfully already " . "(cpglh.result exists). Either run with -continue makeBed or some later " . "stage, or move aside/remove $runDir/ and run again.\n"; } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "Run /hive/data/staging/data/cpgIslandExt/cpglh on masked sequence."; my $bossScript = newBash HgRemoteScript("$runDir/doCpg.bcsh", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ export twoBit=\"$maskedSeq\" twoBitToFa \$twoBit stdout | maskOutFa stdin hard stdout \\ | /hive/data/staging/data/cpgIslandExt/cpglh /dev/stdin 2> cpglh.stderr \\ > cpglh.result _EOF_ ); $bossScript->execute(); } # doCpg ######################################################################### # * step: make bed [workhorse] sub doMakeBed { my $runDir = $buildDir; &HgAutomate::mustMkdir($runDir); # First, make sure we're starting clean. if (-e "$runDir/cpgIsland.bed") { die "doMakeBed: looks like this was run successfully already " . "(cpgIsland.bed exists). Either run with -continue load or cleanup " . "or move aside/remove $runDir/cpgIsland.bed and run again.\n"; } if (! -e "$runDir/cpglh.result") { die "doMakeBed: previous step doCpg has not completed, cpglh.result not found\n"; } my $whatItDoes = "Makes bed from cpglh output."; - my $bossScript = newBash HgRemoteScript("$runDir/doMakeBed.bcsh", $workhorse, + my $bossScript = newBash HgRemoteScript("$runDir/doMakeBed.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ awk \'\{\$2 = \$2 - 1; width = \$3 - \$2; printf\(\"\%s\\t\%d\\t\%s\\t\%s \%s\\t\%s\\t\%s\\t\%0.0f\\t\%0.1f\\t\%s\\t\%s\\n\", \$1, \$2, \$3, \$5, \$6, width, \$6, width\*\$7\*0.01, 100.0\*2\*\$6\/width, \$7, \$9\);}\' cpglh.result \\ | sort -k1,1 -k2,2n > cpgIsland.bed bedToBigBed -tab -type=bed4+6 -as=\$HOME/kent/src/hg/lib/cpgIslandExt.as \\ cpgIsland.bed $chromSizes $db.$tableName.bb _EOF_ ); $bossScript->execute(); } # doMakeBed ######################################################################### # * step: load [dbHost] sub doLoadCpg { my $runDir = $buildDir; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "Loads cpgIsland.bed."; my $bossScript = new HgRemoteScript("$runDir/doLoadCpg.csh", $dbHost, $runDir, $whatItDoes); if (! -e "$runDir/cpgIsland.bed") { die "doLoadCpg previous step doMakeBed has not completed, cpgIsland.bed not found\n"; } $bossScript->add(<<_EOF_ set C=`cut -f1 cpgIsland.bed | sort -u | awk '{print length(\$0)}' | sort -rn | sed -n -e '1,1 p'` sed -e "s/14/\${C}/" \$HOME/kent/src/hg/lib/cpgIslandExt.sql > cpgIslandExt.sql hgLoadBed -noLoad -sqlTable=cpgIslandExt.sql -tab $db $tableName cpgIsland.bed hgLoadSqlTab $db $tableName cpgIslandExt.sql bed.tab checkTableCoords -verboseBlocks -table=$tableName $db featureBits $db $tableName >&fb.$db.$tableName.txt _EOF_ ); $bossScript->execute(); } # doLoad ######################################################################### # * step: cleanup [fileServer] sub doCleanup { my $runDir = $buildDir; my $whatItDoes = "It cleans up or compresses intermediate files."; my $fileServer = &HgAutomate::chooseFileServer($runDir); my $bossScript = new HgRemoteScript("$runDir/doCleanup.csh", $fileServer, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ rm -f bed.tab cpgIslandExt.sql gzip cpgIsland.bed _EOF_ ); $bossScript->execute(); } # doCleanup ######################################################################### # main # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); # Make sure we have valid options and exactly 1 argument: &checkOptions(); &usage(1) if (scalar(@ARGV) != 1); $secondsStart = `date "+%s"`; chomp $secondsStart; ($db) = @ARGV; # Force debug and verbose until this is looking pretty solid: #$opt_debug = 1; #$opt_verbose = 3 if ($opt_verbose < 3); # Establish what directory we will work in. $buildDir = $opt_buildDir ? $opt_buildDir : "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/cpgIslands"; $maskedSeq = $opt_maskedSeq ? $opt_maskedSeq : "$HgAutomate::clusterData/$db/$db.2bit"; $chromSizes = $opt_chromSizes ? $opt_chromSizes : "$HgAutomate::clusterData/$db/chrom.sizes"; $tableName = $opt_tableName ? $opt_tableName : "cpgIslandExt"; # Do everything. $stepper->execute(); # Tell the user anything they should know. my $stopStep = $stepper->getStopStep(); my $upThrough = ($stopStep eq 'cleanup') ? "" : " (through the '$stopStep' step)"; $secondsEnd = `date "+%s"`; chomp $secondsEnd; my $elapsedSeconds = $secondsEnd - $secondsStart; my $elapsedMinutes = int($elapsedSeconds/60); $elapsedSeconds -= $elapsedMinutes * 60; &HgAutomate::verbose(1, "\n *** All done ! Elapsed time: ${elapsedMinutes}m${elapsedSeconds}s\n"); &HgAutomate::verbose(1, "\n *** All done!$upThrough\n"); &HgAutomate::verbose(1, " *** Steps were performed in $buildDir\n"); &HgAutomate::verbose(1, "\n");