1c0cb9b18d0e449cbdae2c681d0a0abdcacd1e7e
hiram
  Fri Jul 19 10:34:46 2019 -0700
add augustusSpecies argument rearrange masking fix allGaps refs #23734

diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl
index 4b538f6..b4829df 100755
--- src/hg/utils/automation/doAssemblyHub.pl
+++ src/hg/utils/automation/doAssemblyHub.pl
@@ -14,119 +14,121 @@
 use File::stat;
 use warnings;
 use strict;
 use FindBin qw($Bin);
 use lib "$Bin";
 use HgAutomate;
 use HgRemoteScript;
 use HgStepManager;
 
 # Option variable names, both common and peculiar to this script:
 use vars @HgAutomate::commonOptionVars;
 use vars @HgStepManager::optionVars;
 use vars qw/
     $opt_buildDir
     $opt_sourceDir
+    $opt_augustusSpecies
     $opt_ucscNames
     /;
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'download',   func => \&doDownload },
       { name => 'sequence',   func => \&doSequence },
       { name => 'assemblyGap',   func => \&doAssemblyGap },
       { name => 'gatewayPage',   func => \&doGatewayPage },
       { name => 'gc5Base',   func => \&doGc5Base },
       { name => 'repeatMasker',   func => \&doRepeatMasker },
       { name => 'simpleRepeat',   func => \&doSimpleRepeat },
       { name => 'allGaps',   func => \&doAllGaps },
       { name => 'idKeys',   func => \&doIdKeys },
-      { name => 'addMask',   func => \&doAddMask },
       { name => 'windowMasker',   func => \&doWindowMasker },
+      { name => 'addMask',   func => \&doAddMask },
       { name => 'cpgIslands',   func => \&doCpgIslands },
       { name => 'augustus',   func => \&doAugustus },
       { name => 'trackDb',   func => \&doTrackDb },
       { name => 'cleanup', func => \&doCleanup },
     ]
 				);
 
 # Option defaults:
 my $dbHost = 'hgwdev';
 my $sourceDir = "/hive/data/outside/ncbi/genomes";
+my $augustusSpecies = "human";
 my $ucscNames = 0;  # default 'FALSE' (== 0)
 my $workhorse = "hgwdev";  # default workhorse when none chosen
 my $fileServer = "hgwdev";  # default when none chosen
 my $bigClusterHub = "ku";  # default when none chosen
 my $smallClusterHub = "ku";  # default when none chosen
 
 my $base = $0;
 $base =~ s/^(.*\/)?//;
 
 sub usage {
   # Usage / help / self-documentation:
   my ($status, $detailed) = @_;
   # Basic help (for incorrect usage):
   print STDERR "
 usage: $base [options] genbank|refseq subGroup species asmId
 required arguments:
     genbank|refseq - specify either genbank or refseq hierarchy source
     subGroup       - specify subGroup at NCBI FTP site, examples:
                    - vertebrate_mammalian vertebrate_other plant etc...
     species        - species directory at NCBI FTP site, examples:
                    - Homo_sapiens Mus_musculus etc...
     asmId          - assembly identifier at NCBI FTP site, examples:
                    - GCF_000001405.32_GRCh38.p6 GCF_000001635.24_GRCm38.p4 etc..
 
 options:
 ";
   print STDERR $stepper->getOptionHelp();
   print STDERR <<_EOF_
     -buildDir dir     Construct assembly hub in dir instead of default
        $HgAutomate::clusterData/asmHubs/{genbank|refseq}/subGroup/species/asmId/
-    -sourceDir dir    Find assembly in dir instead of default
-                          $sourceDir
-                          the assembly is found at:
-  sourceDir/{genbank|refseq}/subGroup/species/all_assembly_versions/asmId/
+    -sourceDir dir    Find assembly in dir instead of default:
+       $sourceDir/<genbank|refseq>/subGroup/species/all_assembly_versions/asmId
     -ucscNames        Translate NCBI/INSDC/RefSeq names to UCSC names
                       default is to use the given NCBI/INSDC/RefSeq names
+    -augustusSpecies <human|chicken|zebrafish> default 'human'
 _EOF_
   ;
   print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 						'workhorse' => $workhorse,
 						'fileServer' => $fileServer,
 						'bigClusterHub' => $bigClusterHub,
 						'smallClusterHub' => $smallClusterHub);
   print STDERR "
 Automates build of assembly hub.  Steps:
     download: sets up sym link working hierarchy from already mirrored
                 files from NCBI in:
                       $sourceDir/{genbank|refseq}/
     sequence: establish AGP and 2bit file from NCBI directory
     assemblyGap: create assembly and gap bigBed files and indexes
                  for assembly track names
     gatewayPage: create html/asmId.description.html contents
     gc5Base: create bigWig file for gc5Base track
     repeatMasker: run repeat masker cluster run and create bigBed files for
                   the composite track categories of repeats
     simpleRepeat: run trf cluster run and create bigBed file for simple repeats
     allGaps: calculate all actual real gaps due to N's in sequence, can be
                   more than were specified in the AGP file
     idKeys: calculate md5sum for each sequence in the assembly to be used to
             find identical sequences in similar assemblies
-    addMask: combine repeatMasker and trf simpleRepeats into one 2bit file
     windowMasker: run windowMasker cluster run, create windowMasker bigBed file
                   and compute intersection with repeatMasker results
+    addMask: combine the higher masking of (windowMasker or repeatMasker) with
+                  trf simpleRepeats into one 2bit file
     cpgIslands: run CpG islands cluster runs for both masked and unmasked
                 sequences and create bigBed files for this composite track
     trackDb: create trackDb.txt file for assembly hub to include all constructed
              bigBed and bigWig tracks
     cleanup: Removes or compresses intermediate files.
 All operations are performed in the build directory which is
 $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/template.\$date unless -buildDir is given.
 ";
   # Detailed help (-help):
   print STDERR "
 Assumptions:
 1. $HgAutomate::clusterData/\$db/\$db.2bit contains RepeatMasked sequence for
    database/assembly \$db.
 2. $HgAutomate::clusterData/\$db/chrom.sizes contains all sequence names and sizes from
    \$db.2bit.
@@ -137,30 +139,32 @@
   print "\n";
   exit $status;
 }
 
 
 # Globals:
 # Command line args: genbankRefseq subGroup species asmId
 my ($genbankRefseq, $subGroup, $species, $asmId);
 # Other:
 my ($buildDir, $secondsStart, $secondsEnd, $assemblySource);
 
 sub checkOptions {
   # Make sure command line options are valid/supported.
   my $ok = GetOptions(@HgStepManager::optionSpec,
 		      'buildDir=s',
+		      'sourceDir=s',
+		      'augustusSpecies=s',
 		      'ucscNames',
 		      @HgAutomate::commonOptionSpec,
 		      );
   &usage(1) if (!$ok);
   &usage(0, 1) if ($opt_help);
   &HgAutomate::processCommonOptions();
   my $err = $stepper->processOptions();
   usage(1) if ($err);
   $dbHost = $opt_dbHost if ($opt_dbHost);
 }
 
 #########################################################################
 #########################################################################
 #  assistant subroutines here.  The 'do' steps follow this section
 #########################################################################
@@ -590,30 +594,31 @@
 faToTwoBit -noMask *.fa.gz ../\$asmId.unmasked.2bit
 touch -r ../download/\$asmId.2bit ../\$asmId.2bit
 touch -r ../download/\$asmId.2bit ../\$asmId.unmasked.2bit
 touch -r ../download/\$asmId.2bit ../\$asmId.agp.gz
 twoBitInfo ../\$asmId.2bit stdout | sort -k2nr > ../\$asmId.chrom.sizes
 touch -r ../\$asmId.2bit ../\$asmId.chrom.sizes
 # verify everything is there
 twoBitInfo ../download/\$asmId.2bit stdout | sort -k2nr > source.\$asmId.chrom.sizes
 export newTotal=`ave -col=2 ../\$asmId.chrom.sizes | grep "^total"`
 export oldTotal=`ave -col=2 source.\$asmId.chrom.sizes | grep "^total"`
 if [ "\$newTotal" != "\$oldTotal" ]; then
   printf "# ERROR: sequence construction error: not same totals source vs. new:\n" 1>&2
   printf "# \$newTotal != \$oldTotal\n" 1>&2
   exit 255
 fi
+rm source.\$asmId.chrom.sizes
 export checkAgp=`checkAgpAndFa ../\$asmId.agp.gz ../\$asmId.2bit 2>&1 | tail -1`
 if [ "\$checkAgp" != "All AGP and FASTA entries agree - both files are valid" ]; then
   printf "# ERROR: checkAgpAndFa \$asmId.agp.gz \$asmId.2bit failing\n" 1>&2
   exit 255
 fi
 
 twoBitToFa ../\$asmId.2bit stdout | faCount stdin | gzip -c > \$asmId.faCount.txt.gz
 touch -r ../\$asmId.2bit \$asmId.faCount.txt.gz
 zgrep -P "^total\t" \$asmId.faCount.txt.gz > \$asmId.faCount.signature.txt
 touch -r ../\$asmId.2bit \$asmId.faCount.signature.txt
 _EOF_
   );
   $bossScript->execute();
 } # doSequence
 
@@ -811,47 +816,48 @@
 #########################################################################
 # * step: allGaps [workhorse]
 sub doAllGaps {
   my $runDir = "$buildDir/trackData/allGaps";
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "construct 'all' gap track data";
   my $bossScript = newBash HgRemoteScript("$runDir/doAllGaps.bash",
                     $workhorse, $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 export buildDir=$buildDir
 
 if [ \$buildDir/\$asmId.2bit -nt \$asmId.allGaps.bb ]; then
-  findMotif -motif=gattaca -verbose=4 -strand=+ ../../\$asmId.2bit \\
-       > \$asmId.findMotif.txt 2>&1
-  grep "^#GAP " \$asmId.findMotif.txt | sed -e 's/^#GAP //;' \\
-      > \$asmId.allGaps.bed
+  twoBitInfo -nBed ../../\$asmId.2bit stdout | awk '{printf "%s\\t%d\\t%d\\t%d\\t%d\\t+\\n", \$1, \$2, \$3, NR, \$3-\$2}' > \$asmId.allGaps.bed
+  if [ -s ../assemblyGap/\$asmId.gap.bb ]; then
     bigBedToBed ../assemblyGap/\$asmId.gap.bb \$asmId.gap.bed
     # verify the 'all' gaps should include the gap track items
     bedIntersect -minCoverage=0.0000000014 \$asmId.allGaps.bed \$asmId.gap.bed \\
       \$asmId.verify.annotated.gap.bed
     gapTrackCoverage=`awk '{print \$3-\$2}' \$asmId.gap.bed \\
       | ave stdin | grep "^total" | sed -e 's/.000000//;'`
     intersectCoverage=`ave -col=5 \$asmId.verify.annotated.gap.bed \\
       | grep "^total" | sed -e 's/.000000//;'`
     if [ \$gapTrackCoverage -ne \$intersectCoverage ]; then
-    printf "ERROR: 'all' gaps does not include gap track coverage\n" 1>&2
-    printf "gapTrackCoverage: \$gapTrackCoverage != \$intersectCoverage intersection\n" 1>&2
+      printf "ERROR: 'all' gaps does not include gap track coverage\\n" 1>&2
+      printf "gapTrackCoverage: \$gapTrackCoverage != \$intersectCoverage intersection\\n" 1>&2
       exit 255
     fi
+  else
+    touch \$asmId.gap.bed
+  fi
   bedInvert.pl ../../\$asmId.chrom.sizes \$asmId.allGaps.bed \\
     > \$asmId.NOT.allGaps.bed
   # verify bedInvert worked correctly
   #   sum of both sizes should equal genome size
   both=`cat \$asmId.NOT.allGaps.bed \$asmId.allGaps.bed \\
     | awk '{print \$3-\$2}' | ave stdin | grep "^total" \\
     | sed -e 's/.000000//;'`
   genomeSize=`ave -col=2 ../../\$asmId.chrom.sizes | grep "^total" \\
     | sed -e 's/.000000//;'`
   if [ \$genomeSize -ne \$both ]; then
      printf "ERROR: bedInvert.pl did not function correctly on allGaps.bed\n" 1>&2
      printf "genomeSize: \$genomeSize != \$both both gaps data\n" 1>&2
      exit 255
   fi
   bedInvert.pl ../../\$asmId.chrom.sizes \$asmId.gap.bed \\
@@ -868,31 +874,31 @@
   bedIntersect -minCoverage=0.0000000014 \$asmId.allGaps.bed \\
      \$asmId.NOT.gap.bed \$asmId.notAnnotated.gap.bed
   # verify the intersect functioned correctly
   # sum of new gaps plus gap track should equal all gaps
   allGapCoverage=`awk '{print \$3-\$2}' \$asmId.allGaps.bed \\
      | ave stdin | grep "^total" | sed -e 's/.000000//;'`
   both=`cat \$asmId.notAnnotated.gap.bed \$asmId.gap.bed \\
     | awk '{print \$3-\$2}' | ave stdin | grep "^total" | sed -e 's/.000000//;'`
   if [ \$allGapCoverage -ne \$both ]; then
      printf "ERROR: bedIntersect to identify new gaps did not function correctly\n" 1>&2
      printf "allGaps: \$allGapCoverage != \$both (new + gap track)\n" 1>&2
   fi
   cut -f1-3 \$asmId.allGaps.bed | sort -k1,1 -k2,2n > toBbi.bed
   bedToBigBed -type=bed3 toBbi.bed ../../\$asmId.chrom.sizes \$asmId.allGaps.bb
   rm -f toBbi.bed
-  gzip *.bed *.txt
+  gzip *.bed
 else
   printf "# allgaps step previously completed\\n" 1>&2
   exit 0
 fi
 _EOF_
   );
   $bossScript->execute();
 } # allGaps
 
 #########################################################################
 # * step: idKeys [workhorse]
 sub doIdKeys {
   my $runDir = "$buildDir/trackData/idKeys";
   &HgAutomate::mustMkdir($runDir);
 
@@ -913,58 +919,72 @@
   );
   $bossScript->execute();
 } # idKeys
 
 #########################################################################
 # * step: addMask [workhorse]
 sub doAddMask {
   my $runDir = "$buildDir/trackData/addMask";
 
   my $goNoGo = 0;
   if ( ! -s "$buildDir/trackData/repeatMasker/$asmId.rmsk.2bit" ) {
       printf STDERR "ERROR: repeatMasker step not completed\n";
       printf STDERR "can not find: $buildDir/trackData/repeatMasker/$asmId.rmsk.2bit\n";
       $goNoGo = 1;
   }
+  if ( ! -s "$buildDir/trackData/windowMasker/$asmId.cleanWMSdust.2bit" ) {
+      printf STDERR "ERROR: windowMasker step not completed\n";
+      printf STDERR "can not find: $buildDir/trackData/windowMasker/$asmId.cleanWMSdust.2bit\n";
+      $goNoGo = 1;
+  }
   if ( ! -s "$buildDir/trackData/simpleRepeat/trfMask.bed.gz" ) {
       printf STDERR "ERROR: simpleRepeat step not completed\n";
       printf STDERR "can not find: $buildDir/trackData/simpleRepeat/trfMask.bed.gz\n";
       $goNoGo = 1;
   }
   if ($goNoGo) {
-      printf STDERR "ERROR: must complete both repeatMasker and simpleRepeat before addMask\n";
+      printf STDERR "ERROR: must complete repeatMasker, windowMasker and simpleRepeat before addMask\n";
       exit 255;
   }
   &HgAutomate::mustMkdir($runDir);
 
-  my $whatItDoes = "add together repeatMasker and trf/simpleRepeats to construct masked 2bit file";
+  my $whatItDoes = "add together (windowMasker or repeatMasker) and trf/simpleRepeats to construct masked 2bit file";
   my $bossScript = newBash HgRemoteScript("$runDir/doAddMask.bash",
                     $workhorse, $runDir, $whatItDoes);
 
+  my $wmMasked=`grep "masked total" $buildDir/trackData/windowMasker/faSize.$asmId.cleanWMSdust.txt | awk '{print \$1}' | sed -e 's/%//;'`;
+  my $rmMasked=`grep "masked total" $buildDir/trackData/repeatMasker/faSize.rmsk.txt | awk '{print \$1}' | sed -e 's/%//;'`;
+
+  my $src2BitToMask = "../repeatMasker/$asmId.rmsk.2bit";
+  if ($wmMasked > $rmMasked) {
+    $src2BitToMask = "../windowMasker/$asmId.cleanWMSdust.2bit";
+  }
+
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 
-if [ ../simpleRepeat/trfMask.bed.gz -nt \$asmId.trfRM.faSize.txt ]; then
-  twoBitMask ../repeatMasker/\$asmId.rmsk.2bit -type=.bed \\
-     -add ../simpleRepeat/trfMask.bed.gz \$asmId.trfRM.2bit
-  twoBitToFa \$asmId.trfRM.2bit stdout | faSize stdin > \$asmId.trfRM.faSize.txt
+if [ ../simpleRepeat/trfMask.bed.gz -nt \$asmId.masked.faSize.txt ]; then
+  twoBitMask $src2BitToMask -type=.bed \\
+     -add ../simpleRepeat/trfMask.bed.gz \$asmId.masked.2bit
+  twoBitToFa \$asmId.masked.2bit stdout | faSize stdin > \$asmId.masked.faSize.txt
 else
   printf "# addMask step previously completed\\n" 1>&2
   exit 0
 fi
 _EOF_
   );
+
   $bossScript->execute();
 } # addMask
 
 #########################################################################
 # * step: windowMasker [workhorse]
 sub doWindowMasker {
   my $runDir = "$buildDir/trackData/windowMasker";
 
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "run windowMasker procedure";
   my $bossScript = newBash HgRemoteScript("$runDir/doWindowMasker.bash",
                     $workhorse, $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
@@ -1025,69 +1045,69 @@
 cd unmasked
 if [ ../../../\$asmId.unmasked.2bit -nt \$asmId.cpgIslandExtUnmasked.bb ]; then
   doCpgIslands.pl -stop=makeBed -buildDir=`pwd` -dbHost=$dbHost \\
     -smallClusterHub=$smallClusterHub -bigClusterHub=$bigClusterHub -tableName=cpgIslandExtUnmasked \\
     -workhorse=$workhorse -maskedSeq=$buildDir/\$asmId.unmasked.2bit \\
     -chromSizes=$buildDir/\$asmId.chrom.sizes \$asmId
   doCpgIslands.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` \\
     -dbHost=$dbHost \\
     -smallClusterHub=$smallClusterHub -bigClusterHub=$bigClusterHub -tableName=cpgIslandExtUnmasked \\
     -workhorse=$workhorse -maskedSeq=$buildDir/\$asmId.unmasked.2bit \\
     -chromSizes=$buildDir/\$asmId.chrom.sizes \$asmId
 else
   printf "# cpgIslands unmasked previously completed\\n" 1>&2
 fi
 cd ../masked
-if [ ../../addMask/\$asmId.trfRM.2bit -nt \$asmId.cpgIslandExt.bb ]; then
+if [ ../../addMask/\$asmId.masked.2bit -nt \$asmId.cpgIslandExt.bb ]; then
   doCpgIslands.pl -stop=makeBed -buildDir=`pwd` -dbHost=$dbHost \\
     -smallClusterHub=$smallClusterHub -bigClusterHub=$bigClusterHub -workhorse=$workhorse \\
-    -maskedSeq=$buildDir/trackData/addMask/\$asmId.trfRM.2bit \\
+    -maskedSeq=$buildDir/trackData/addMask/\$asmId.masked.2bit \\
     -chromSizes=$buildDir/\$asmId.chrom.sizes \$asmId
   doCpgIslands.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` \\
     -dbHost=$dbHost \\
     -smallClusterHub=$smallClusterHub -bigClusterHub=$bigClusterHub -workhorse=$workhorse \\
-    -maskedSeq=$buildDir/trackData/addMask/\$asmId.trfRM.2bit \\
+    -maskedSeq=$buildDir/trackData/addMask/\$asmId.masked.2bit \\
     -chromSizes=$buildDir/\$asmId.chrom.sizes \$asmId
 else
   printf "# cpgIslands masked previously completed\\n" 1>&2
   exit 0
 fi
 _EOF_
   );
   $bossScript->execute();
 } # sub doCpgIslands
 
 #########################################################################
 # * step: augustus [workhorse]
 sub doAugustus {
   my $runDir = "$buildDir/trackData/augustus";
 
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "run Augustus gene prediction procedures";
   my $bossScript = newBash HgRemoteScript("$runDir/doAugustus.bash",
                     $workhorse, $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 
 if [ $buildDir/\$asmId.2bit -nt \$asmId.augustus.bb ]; then
-  time (/cluster/home/hiram/kent/src/hg/utils/automation/doAugustus.pl -stop=makeGp -buildDir=`pwd` -dbHost=$dbHost \\
-    -bigClusterHub=$bigClusterHub -species=human -workhorse=$workhorse \\
+  time (~/kent/src/hg/utils/automation/doAugustus.pl -stop=makeGp -buildDir=`pwd` -dbHost=$dbHost \\
+    -bigClusterHub=$bigClusterHub -species=$augustusSpecies -workhorse=$workhorse \\
     -noDbGenePredCheck -maskedSeq=$buildDir/\$asmId.2bit \$asmId) > makeDb.log 2>&1
-  time (/cluster/home/hiram/kent/src/hg/utils/automation/doAugustus.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` -dbHost=$dbHost \\
-    -bigClusterHub=$bigClusterHub -species=human -workhorse=$workhorse \\
+  time (~/kent/src/hg/utils/automation/doAugustus.pl -continue=cleanup -stop=cleanup -buildDir=`pwd` -dbHost=$dbHost \\
+    -bigClusterHub=$bigClusterHub -species=$augustusSpecies -workhorse=$workhorse \\
     -noDbGenePredCheck -maskedSeq=$buildDir/\$asmId.2bit \$asmId) > cleanup.log 2>&1
 else
   printf "# augustus genes previously completed\\n" 1>&2
 fi
 _EOF_
   );
   $bossScript->execute();
 } # windowMasker
 
 #########################################################################
 # * step: trackDb [workhorse]
 sub doTrackDb {
   my $runDir = "$buildDir";
   &HgAutomate::mustMkdir($runDir);
 
@@ -1132,40 +1152,43 @@
 $secondsStart = `date "+%s"`;
 chomp $secondsStart;
 
 # expected command line arguments after options are processed
 ($genbankRefseq, $subGroup, $species, $asmId) = @ARGV;
 
 # Force debug and verbose until this is looking pretty solid:
 # $opt_debug = 1;
 # $opt_verbose = 3 if ($opt_verbose < 3);
 
 # Establish what directory we will work in.
 $buildDir = $opt_buildDir ? $opt_buildDir :
   "$HgAutomate::clusterData/asmHubs/$genbankRefseq/$subGroup/$species/$asmId";
 
 $sourceDir = $opt_sourceDir ? $opt_sourceDir : $sourceDir;
+$augustusSpecies = $opt_augustusSpecies ? $opt_augustusSpecies : $augustusSpecies;
 $ucscNames = $opt_ucscNames ? 1 : $ucscNames;   # '1' == 'TRUE'
 $workhorse = $opt_workhorse ? $opt_workhorse : $workhorse;
 $bigClusterHub = $opt_bigClusterHub ? $opt_bigClusterHub : $bigClusterHub;
 $smallClusterHub = $opt_smallClusterHub ? $opt_smallClusterHub : $smallClusterHub;
 $fileServer = $opt_fileServer ? $opt_fileServer : $fileServer;
 
-$assemblySource = "$sourceDir/$genbankRefseq/$subGroup/$species/all_assembly_versions/$asmId";
+$assemblySource = $opt_sourceDir ? "$sourceDir" : "$sourceDir/$genbankRefseq/$subGroup/$species/all_assembly_versions/$asmId";
 
 die "can not find assembly source directory\n$assemblySource" if ( ! -d $assemblySource);
 printf STDERR "# buildDir: %s\n", $buildDir;
+printf STDERR "# sourceDir %s\n", $sourceDir;
+printf STDERR "# augustusSpecies %s\n", $augustusSpecies;
 printf STDERR "# assemblySource: %s\n", $assemblySource;
 
 # Do everything.
 $stepper->execute();
 
 # Tell the user anything they should know.
 my $stopStep = $stepper->getStopStep();
 my $upThrough = ($stopStep eq 'cleanup') ? "" :
   "  (through the '$stopStep' step)";
 
 $secondsEnd = `date "+%s"`;
 chomp $secondsEnd;
 my $elapsedSeconds = $secondsEnd - $secondsStart;
 my $elapsedMinutes = int($elapsedSeconds/60);
 $elapsedSeconds -= $elapsedMinutes * 60;