88603b198e1787fb26de0479d1782532fbb71b67
hiram
  Tue Jul 23 10:14:37 2019 -0700
added cytoBand navigation refs #23734

diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl
index 939efcf..514dee5 100755
--- src/hg/utils/automation/doAssemblyHub.pl
+++ src/hg/utils/automation/doAssemblyHub.pl
@@ -24,30 +24,31 @@
 use vars @HgAutomate::commonOptionVars;
 use vars @HgStepManager::optionVars;
 use vars qw/
     $opt_buildDir
     $opt_sourceDir
     $opt_augustusSpecies
     $opt_ucscNames
     /;
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'download',   func => \&doDownload },
       { name => 'sequence',   func => \&doSequence },
       { name => 'assemblyGap',   func => \&doAssemblyGap },
       { name => 'gatewayPage',   func => \&doGatewayPage },
+      { name => 'cytoBand',   func => \&doCytoBand },
       { name => 'gc5Base',   func => \&doGc5Base },
       { name => 'repeatMasker',   func => \&doRepeatMasker },
       { name => 'simpleRepeat',   func => \&doSimpleRepeat },
       { name => 'allGaps',   func => \&doAllGaps },
       { name => 'idKeys',   func => \&doIdKeys },
       { name => 'windowMasker',   func => \&doWindowMasker },
       { name => 'addMask',   func => \&doAddMask },
       { name => 'gapOverlap',   func => \&doGapOverlap },
       { name => 'tandemDups',   func => \&doTandemDups },
       { name => 'cpgIslands',   func => \&doCpgIslands },
       { name => 'augustus',   func => \&doAugustus },
       { name => 'trackDb',   func => \&doTrackDb },
       { name => 'cleanup', func => \&doCleanup },
     ]
 				);
@@ -95,30 +96,31 @@
   ;
   print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 						'workhorse' => $workhorse,
 						'fileServer' => $fileServer,
 						'bigClusterHub' => $bigClusterHub,
 						'smallClusterHub' => $smallClusterHub);
   print STDERR "
 Automates build of assembly hub.  Steps:
     download: sets up sym link working hierarchy from already mirrored
                 files from NCBI in:
                       $sourceDir/{genbank|refseq}/
     sequence: establish AGP and 2bit file from NCBI directory
     assemblyGap: create assembly and gap bigBed files and indexes
                  for assembly track names
     gatewayPage: create html/asmId.description.html contents
+    cytoBand: create cytoBand track and navigation ideogram
     gc5Base: create bigWig file for gc5Base track
     repeatMasker: run repeat masker cluster run and create bigBed files for
                   the composite track categories of repeats
     simpleRepeat: run trf cluster run and create bigBed file for simple repeats
     allGaps: calculate all actual real gaps due to N's in sequence, can be
                   more than were specified in the AGP file
     idKeys: calculate md5sum for each sequence in the assembly to be used to
             find identical sequences in similar assemblies
     windowMasker: run windowMasker cluster run, create windowMasker bigBed file
                   and compute intersection with repeatMasker results
     addMask: combine the higher masking of (windowMasker or repeatMasker) with
                   trf simpleRepeats into one 2bit file
     cpgIslands: run CpG islands cluster runs for both masked and unmasked
                 sequences and create bigBed files for this composite track
     trackDb: create trackDb.txt file for assembly hub to include all constructed
@@ -705,30 +707,63 @@
            > \$asmId.description.html 2> \$asmId.names.tab
   \$HOME/kent/src/hg/utils/automation/genbank/buildStats.pl \\
        ../\$asmId.chrom.sizes 2> \$asmId.build.stats.txt
   touch -r ../download/\${asmId}_assembly_report.txt \$asmId.description.html
   $photoLink
 else
   printf "# gatewayPage step previously completed\\n" 1>&2
   exit 0
 fi
 _EOF_
   );
   $bossScript->execute();
 } # gatewayPage
 
 #########################################################################
+# * step: cytoBand [workhorse]
+sub doCytoBand {
+  my $runDir = "$buildDir/trackData/cytoBand";
+  &HgAutomate::mustMkdir($runDir);
+
+  my $whatItDoes = "construct cytoBand track and navigation ideogram";
+  my $bossScript = newBash HgRemoteScript("$runDir/doCytoBand.bash",
+                    $workhorse, $runDir, $whatItDoes);
+
+  if ( ! -s "$buildDir/$asmId.chrom.sizes" ) {
+      printf STDERR "ERROR: sequence step not completed\n";
+      printf STDERR "can not find: $buildDir/$asmId.chrom.sizes\n";
+      exit 255;
+  }
+
+  $bossScript->add(<<_EOF_
+export asmId=$asmId
+
+if [ ../../\$asmId.chrom.sizes -nt \$asmId.cytoBand.bb ]; then
+  awk '{printf "%s\\t0\\t%d\\t\\tgneg\\n", \$1, \$2}' ../../\$asmId.chrom.sizes | sort -k1,1 -k2,2n > \$asmId.cytoBand.bed
+  bedToBigBed -type=bed4+1 -as=\$HOME/kent/src/hg/lib/cytoBand.as -tab \$asmId.cytoBand.bed ../../\$asmId.chrom.sizes \$asmId.cytoBand.bb
+
+  touch -r ../../\$asmId.chrom.sizes \$asmId.cytoBand.bb
+else
+  printf "# cytoBand step previously completed\\n" 1>&2
+  exit 0
+fi
+_EOF_
+  );
+  $bossScript->execute();
+} # cytoBand
+
+#########################################################################
 # * step: gc5Base [workhorse]
 sub doGc5Base {
   my $runDir = "$buildDir/trackData/gc5Base";
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "construct gc5Base bigWig track data";
   my $bossScript = newBash HgRemoteScript("$runDir/doGc5Base.bash",
                     $workhorse, $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 
 if [ ../../\$asmId.2bit -nt \$asmId.gc5Base.bw ]; then
   hgGcPercent -wigOut -doGaps -file=stdout -win=5 -verbose=0 test \\
     ../../\$asmId.2bit \\
@@ -981,31 +1016,31 @@
 #########################################################################
 # * step: windowMasker [workhorse]
 sub doWindowMasker {
   my $runDir = "$buildDir/trackData/windowMasker";
 
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "run windowMasker procedure";
   my $bossScript = newBash HgRemoteScript("$runDir/doWindowMasker.bash",
                     $workhorse, $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 
 ### if [ ../../\$asmId.unmasked.2bit -nt fb.\$asmId.rmsk.windowmaskerSdust.txt ]; then
-if [ ../../\$asmId.unmasked.2bit -nt faSize.\$asmId.wmsk.sdust.txt ]; then
+if [ ../../\$asmId.unmasked.2bit -nt faSize.\$asmId.cleanWMSdust.txt ]; then
   \$HOME/kent/src/hg/utils/automation/doWindowMasker.pl -stop=twobit -buildDir=`pwd` -dbHost=$dbHost \\
     -workhorse=$workhorse -unmaskedSeq=$buildDir/\$asmId.unmasked.2bit \$asmId
   bedInvert.pl ../../\$asmId.chrom.sizes ../allGaps/\$asmId.allGaps.bed.gz \\
     > not.gap.bed
   bedIntersect -minCoverage=0.0000000014 windowmasker.sdust.bed \\
      not.gap.bed stdout | sort -k1,1  -k2,2n > cleanWMask.bed
   twoBitMask $buildDir/\$asmId.unmasked.2bit cleanWMask.bed \\
     \$asmId.cleanWMSdust.2bit
   twoBitToFa \$asmId.cleanWMSdust.2bit stdout \\
     | faSize stdin > faSize.\$asmId.cleanWMSdust.txt
   zcat ../repeatMasker/\$asmId.sorted.fa.out.gz | sed -e 's/^  *//; /^\$/d;' \\
     | egrep -v "^SW|^score" | awk '{printf "%s\\t%d\\t%d\\n", \$5, \$6-1, \$7}' \\
       | bedSingleCover.pl stdin > rmsk.bed
   intersectRmskWM=`bedIntersect -minCoverage=0.0000000014 cleanWMask.bed \\
     rmsk.bed stdout | bedSingleCover.pl stdin | ave -col=4 stdin \\