4f09e8ddd0fc70577eb8727e38a87896bfd57e84 hiram Wed Sep 23 09:56:28 2020 -0700 adding chromAlias build refs #24396 diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index 222bfa4..0e93e6c 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -28,30 +28,31 @@ $opt_sourceDir $opt_species $opt_rmskSpecies $opt_ncbiRmsk $opt_augustusSpecies $opt_xenoRefSeq $opt_ucscNames $opt_asmHubName /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'download', func => \&doDownload }, { name => 'sequence', func => \&doSequence }, { name => 'assemblyGap', func => \&doAssemblyGap }, + { name => 'chromAlias', func => \&doChromAlias }, { name => 'gatewayPage', func => \&doGatewayPage }, { name => 'cytoBand', func => \&doCytoBand }, { name => 'gc5Base', func => \&doGc5Base }, { name => 'repeatMasker', func => \&doRepeatMasker }, { name => 'simpleRepeat', func => \&doSimpleRepeat }, { name => 'allGaps', func => \&doAllGaps }, { name => 'idKeys', func => \&doIdKeys }, { name => 'windowMasker', func => \&doWindowMasker }, { name => 'addMask', func => \&doAddMask }, { name => 'gapOverlap', func => \&doGapOverlap }, { name => 'tandemDups', func => \&doTandemDups }, { name => 'cpgIslands', func => \&doCpgIslands }, { name => 'ncbiGene', func => \&doNcbiGene }, { name => 'ncbiRefSeq', func => \&doNcbiRefSeq }, { name => 'xenoRefGene', func => \&doXenoRefGene }, @@ -120,30 +121,31 @@ _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => $workhorse, 'fileServer' => $fileServer, 'bigClusterHub' => $bigClusterHub, 'smallClusterHub' => $smallClusterHub); print STDERR " Automates build of assembly hub. Steps: download: sets up sym link working hierarchy from already mirrored files from NCBI in: $sourceDir/GC[AF]/123/456/789/asmId sequence: establish AGP and 2bit file from NCBI directory assemblyGap: create assembly and gap bigBed files and indexes for assembly track names + chromAlias: construct asmId.chromAlias.txt for alias name recognition gatewayPage: create html/asmId.description.html contents (USE: asmHubName) cytoBand: create cytoBand track and navigation ideogram gc5Base: create bigWig file for gc5Base track repeatMasker: run repeat masker cluster run and create bigBed files for the composite track categories of repeats simpleRepeat: run trf cluster run and create bigBed file for simple repeats allGaps: calculate all actual real gaps due to N's in sequence, can be more than were specified in the AGP file idKeys: calculate md5sum for each sequence in the assembly to be used to find identical sequences in similar assemblies windowMasker: run windowMasker cluster run, create windowMasker bigBed file and compute intersection with repeatMasker results addMask: combine the higher masking of (windowMasker or repeatMasker) with trf simpleRepeats into one 2bit file gapOverlap: find duplicated sequence on each side of a gap @@ -924,30 +926,61 @@ touch -r ../../\$asmId.agp.gz \$asmId.gap.bb fi rm -f \$asmId.assembly.bed \$asmId.gap.bed \$asmId.assembly.ix.txt else printf "# assemblyGap step previously completed\\n" 1>&2 exit 0 fi _EOF_ ); $bossScript->execute(); } # assemblyGap ######################################################################### +# * step: chromAlias [workhorse] +sub doChromAlias { + my $runDir = "$buildDir/trackData/chromAlias"; + &HgAutomate::mustMkdir($runDir); + + my $whatItDoes = "construct asmId.chromAlias.txt for alias name recognition"; + my $bossScript = newBash HgRemoteScript("$runDir/doChromAlias.bash", + $workhorse, $runDir, $whatItDoes); + + $bossScript->add(<<_EOF_ +export buildDir=$buildDir +export asmId=$asmId + +\$HOME/kent/src/hg/utils/automation/asmHubChromAlias.pl \\ + \${asmId} | sort > \${asmId}.chromAlias.txt +# verify each sequence name has an alias +export sizeCount=`cat ../../\${asmId}.chrom.sizes | wc -l` +export aliasCount=`grep -v "^#" \${asmId}.chromAlias.txt | wc -l` +if [ "\${sizeCount}" -ne "\${aliasCount}" ]; then + printf "ERROR: chromAlias: incorrect number of aliases %d != %d\\n" "\${sizeCount}" "\${aliasCount}" 1>&2 + exit 255 +fi + +exit 0 + +_EOF_ + ); + $bossScript->execute(); +} # chromAlias + +######################################################################### # * step: gatewayPage [workhorse] sub doGatewayPage { if ($asmHubName eq "n/a") { printf STDERR "ERROR: step gatewayPage needs argument -asmHubName <name>\n"; exit 255; } my $runDir = "$buildDir/html"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct html/$asmId.description.html"; my $bossScript = newBash HgRemoteScript("$runDir/doGatewayPage.bash", $workhorse, $runDir, $whatItDoes); my $photoJpg = "noPhoto"; my $photoCredit = "noPhoto"; @@ -1719,33 +1752,39 @@ _EOF_ ); $bossScript->execute(); } # doXenoRefGene ######################################################################### # * step: trackDb [workhorse] sub doTrackDb { my $runDir = "$buildDir"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct asmId.trackDb.txt file"; my $bossScript = newBash HgRemoteScript("$runDir/doTrackDb.bash", $workhorse, $runDir, $whatItDoes); + if (! -s "${buildDir}/trackData/chromAlias/${asmId}.chromAlias.txt" ) { + die "ERROR: can not find ${asmId}.chromAlias.txt in\n# ${buildDir}/trackData/chromAlias/\n"; + } + $bossScript->add(<<_EOF_ export asmId=$asmId +rm -f \$asmId.chromAlias.txt +ln -s trackData/chromAlias/\${asmId}.chromAlias.txt . \$HOME/kent/src/hg/utils/automation/asmHubTrackDb.sh \$asmId $runDir \\ > \$asmId.trackDb.txt _EOF_ ); $bossScript->execute(); } # trackDb ######################################################################### # * step: cleanup [fileServer] sub doCleanup { my $runDir = "$buildDir"; my $whatItDoes = "clean up or compresses intermediate files."; my $bossScript = newBash HgRemoteScript("$runDir/doCleanup.bash", $fileServer, $runDir, $whatItDoes);