4f09e8ddd0fc70577eb8727e38a87896bfd57e84
hiram
  Wed Sep 23 09:56:28 2020 -0700
adding chromAlias build refs #24396

diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl
index 222bfa4..0e93e6c 100755
--- src/hg/utils/automation/doAssemblyHub.pl
+++ src/hg/utils/automation/doAssemblyHub.pl
@@ -28,30 +28,31 @@
     $opt_sourceDir
     $opt_species
     $opt_rmskSpecies
     $opt_ncbiRmsk
     $opt_augustusSpecies
     $opt_xenoRefSeq
     $opt_ucscNames
     $opt_asmHubName
     /;
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'download',   func => \&doDownload },
       { name => 'sequence',   func => \&doSequence },
       { name => 'assemblyGap',   func => \&doAssemblyGap },
+      { name => 'chromAlias',   func => \&doChromAlias },
       { name => 'gatewayPage',   func => \&doGatewayPage },
       { name => 'cytoBand',   func => \&doCytoBand },
       { name => 'gc5Base',   func => \&doGc5Base },
       { name => 'repeatMasker',   func => \&doRepeatMasker },
       { name => 'simpleRepeat',   func => \&doSimpleRepeat },
       { name => 'allGaps',   func => \&doAllGaps },
       { name => 'idKeys',   func => \&doIdKeys },
       { name => 'windowMasker',   func => \&doWindowMasker },
       { name => 'addMask',   func => \&doAddMask },
       { name => 'gapOverlap',   func => \&doGapOverlap },
       { name => 'tandemDups',   func => \&doTandemDups },
       { name => 'cpgIslands',   func => \&doCpgIslands },
       { name => 'ncbiGene',   func => \&doNcbiGene },
       { name => 'ncbiRefSeq',   func => \&doNcbiRefSeq },
       { name => 'xenoRefGene',   func => \&doXenoRefGene },
@@ -120,30 +121,31 @@
 _EOF_
   ;
   print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 						'workhorse' => $workhorse,
 						'fileServer' => $fileServer,
 						'bigClusterHub' => $bigClusterHub,
 						'smallClusterHub' => $smallClusterHub);
   print STDERR "
 Automates build of assembly hub.  Steps:
     download: sets up sym link working hierarchy from already mirrored
                 files from NCBI in:
                       $sourceDir/GC[AF]/123/456/789/asmId
     sequence: establish AGP and 2bit file from NCBI directory
     assemblyGap: create assembly and gap bigBed files and indexes
                  for assembly track names
+    chromAlias:  construct asmId.chromAlias.txt for alias name recognition
     gatewayPage: create html/asmId.description.html contents (USE: asmHubName)
     cytoBand: create cytoBand track and navigation ideogram
     gc5Base: create bigWig file for gc5Base track
     repeatMasker: run repeat masker cluster run and create bigBed files for
                   the composite track categories of repeats
     simpleRepeat: run trf cluster run and create bigBed file for simple repeats
     allGaps: calculate all actual real gaps due to N's in sequence, can be
                   more than were specified in the AGP file
     idKeys: calculate md5sum for each sequence in the assembly to be used to
             find identical sequences in similar assemblies
     windowMasker: run windowMasker cluster run, create windowMasker bigBed file
                   and compute intersection with repeatMasker results
     addMask: combine the higher masking of (windowMasker or repeatMasker) with
                   trf simpleRepeats into one 2bit file
     gapOverlap: find duplicated sequence on each side of a gap
@@ -924,30 +926,61 @@
   touch -r ../../\$asmId.agp.gz \$asmId.gap.bb
 fi
 
 rm -f \$asmId.assembly.bed \$asmId.gap.bed \$asmId.assembly.ix.txt
 
 else
   printf "# assemblyGap step previously completed\\n" 1>&2
   exit 0
 fi
 _EOF_
   );
   $bossScript->execute();
 } # assemblyGap
 
 #########################################################################
+# * step: chromAlias [workhorse]
+sub doChromAlias {
+  my $runDir = "$buildDir/trackData/chromAlias";
+  &HgAutomate::mustMkdir($runDir);
+
+  my $whatItDoes = "construct asmId.chromAlias.txt for alias name recognition";
+  my $bossScript = newBash HgRemoteScript("$runDir/doChromAlias.bash",
+                    $workhorse, $runDir, $whatItDoes);
+
+  $bossScript->add(<<_EOF_
+export buildDir=$buildDir
+export asmId=$asmId
+
+\$HOME/kent/src/hg/utils/automation/asmHubChromAlias.pl \\
+    \${asmId} | sort > \${asmId}.chromAlias.txt
+# verify each sequence name has an alias
+export sizeCount=`cat ../../\${asmId}.chrom.sizes | wc -l`
+export aliasCount=`grep -v "^#" \${asmId}.chromAlias.txt | wc -l`
+if [ "\${sizeCount}" -ne "\${aliasCount}" ]; then
+  printf "ERROR: chromAlias: incorrect number of aliases %d != %d\\n" "\${sizeCount}" "\${aliasCount}" 1>&2
+  exit 255
+fi
+
+exit 0
+
+_EOF_
+  );
+  $bossScript->execute();
+} # chromAlias
+
+#########################################################################
 # * step: gatewayPage [workhorse]
 sub doGatewayPage {
   if ($asmHubName eq "n/a") {
     printf STDERR "ERROR: step gatewayPage needs argument -asmHubName <name>\n";
     exit 255;
   }
   my $runDir = "$buildDir/html";
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "construct html/$asmId.description.html";
   my $bossScript = newBash HgRemoteScript("$runDir/doGatewayPage.bash",
                     $workhorse, $runDir, $whatItDoes);
 
   my $photoJpg = "noPhoto";
   my $photoCredit = "noPhoto";
@@ -1719,33 +1752,39 @@
 _EOF_
   );
   $bossScript->execute();
 } # doXenoRefGene
 
 #########################################################################
 # * step: trackDb [workhorse]
 sub doTrackDb {
   my $runDir = "$buildDir";
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "construct asmId.trackDb.txt file";
   my $bossScript = newBash HgRemoteScript("$runDir/doTrackDb.bash",
                     $workhorse, $runDir, $whatItDoes);
 
+  if (! -s "${buildDir}/trackData/chromAlias/${asmId}.chromAlias.txt" ) {
+    die "ERROR: can not find ${asmId}.chromAlias.txt in\n# ${buildDir}/trackData/chromAlias/\n";
+  }
+
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 
+rm -f \$asmId.chromAlias.txt
+ln -s trackData/chromAlias/\${asmId}.chromAlias.txt .
 \$HOME/kent/src/hg/utils/automation/asmHubTrackDb.sh \$asmId $runDir \\
    > \$asmId.trackDb.txt
 
 _EOF_
   );
   $bossScript->execute();
 } # trackDb
 
 #########################################################################
 # * step: cleanup [fileServer]
 sub doCleanup {
   my $runDir = "$buildDir";
   my $whatItDoes = "clean up or compresses intermediate files.";
   my $bossScript = newBash HgRemoteScript("$runDir/doCleanup.bash", $fileServer,
 				      $runDir, $whatItDoes);