ad8d6746d5d68ff4e893262d2ff28d9b2a71ac26 hiram Mon Sep 12 11:45:58 2022 -0700 no longer any need for asmHubName argument to gateway page no redmine diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index 0d8dfb5..f709fac 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -23,31 +23,30 @@ # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_sourceDir $opt_species $opt_rmskSpecies $opt_ncbiRmsk $opt_noRmsk $opt_augustusSpecies $opt_noAugustus $opt_xenoRefSeq $opt_noXenoRefSeq $opt_ucscNames - $opt_asmHubName /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'download', func => \&doDownload }, { name => 'sequence', func => \&doSequence }, { name => 'assemblyGap', func => \&doAssemblyGap }, { name => 'chromAlias', func => \&doChromAlias }, { name => 'gatewayPage', func => \&doGatewayPage }, { name => 'cytoBand', func => \&doCytoBand }, { name => 'gc5Base', func => \&doGc5Base }, { name => 'repeatMasker', func => \&doRepeatMasker }, { name => 'simpleRepeat', func => \&doSimpleRepeat }, { name => 'allGaps', func => \&doAllGaps }, { name => 'idKeys', func => \&doIdKeys }, @@ -67,31 +66,30 @@ # Option defaults: my $dbHost = 'hgwdev'; my $sourceDir = "/hive/data/outside/ncbi/genomes"; my $species = ""; # usually found in asmId_assembly_report.txt my $ftpDir = ""; # will be determined from given asmId my $rmskSpecies = ""; my $noRmsk = 0; # when RepeatMasker is not possible, such as bacteria my $ncbiRmsk = 0; # when =1 call doRepeatMasker.pl # with -ncbiRmsk=path.out.gz and -liftSpec=... my $augustusSpecies = "human"; my $xenoRefSeq = "/hive/data/genomes/asmHubs/xenoRefSeq"; my $noAugustus = 0; # bacteria do *not* create an augustus track my $noXenoRefSeq = 0; # bacteria do *not* create a xenoRefSeq track my $ucscNames = 0; # default 'FALSE' (== 0) -my $asmHubName = "n/a"; # directory name in: /gbdb/hubs/asmHubName my $workhorse = "hgwdev"; # default workhorse when none chosen my $fileServer = "hgwdev"; # default when none chosen my $bigClusterHub = "ku"; # default when none chosen my $smallClusterHub = "ku"; # default when none chosen my $base = $0; $base =~ s/^(.*\/)?//; # key is original accession name from the remove.dups.list, value is 1 my %dupAccessionList; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): @@ -99,31 +97,30 @@ usage: $base [options] asmId required arguments: asmId - assembly identifier at NCBI FTP site, examples: - GCF_000001405.32_GRCh38.p6 GCF_000001635.24_GRCm38.p4 etc.. options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Construct assembly hub in dir instead of default $HgAutomate::clusterData/asmHubs/refseqBuild/GC[AF]/123/456/789/asmId/ -sourceDir dir Find assembly in dir instead of default: $sourceDir/GC[AF]/123/456/789/asmId -ucscNames Translate NCBI/INSDC/RefSeq names to UCSC names default is to use the given NCBI/INSDC/RefSeq names - -asmHubName <name> directory name in: /gbdb/hubs/asmHubName -species <name> use this species designation if there is no asmId_assembly_report.txt with an 'Organism name:' entry to obtain species -rmskSpecies <name> to override default 'species' name for repeat masker the default is found in the asmId_asssembly_report.txt e.g. -rmskSpecies=viruses -noRmsk when RepeatMasker is not possible, such as bacteria -ncbiRmsk use NCBI rm.out.gz file instead of local cluster run for repeat masking -augustusSpecies <human|chicken|zebrafish> default 'human' -noAugustus do *not* create the Augustus gene track -noXenoRefSeq do *not* create the Xeno RefSeq gene track -xenoRefSeq </path/to/xenoRefSeqMrna> - location of xenoRefMrna.fa.gz expanded directory of mrnas/ and xenoRefMrna.sizes, default $xenoRefSeq @@ -131,31 +128,31 @@ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => $workhorse, 'fileServer' => $fileServer, 'bigClusterHub' => $bigClusterHub, 'smallClusterHub' => $smallClusterHub); print STDERR " Automates build of assembly hub. Steps: download: sets up sym link working hierarchy from already mirrored files from NCBI in: $sourceDir/GC[AF]/123/456/789/asmId sequence: establish AGP and 2bit file from NCBI directory assemblyGap: create assembly and gap bigBed files and indexes for assembly track names chromAlias: construct asmId.chromAlias.txt for alias name recognition - gatewayPage: create html/asmId.description.html contents (USE: asmHubName) + gatewayPage: create html/asmId.description.html contents cytoBand: create cytoBand track and navigation ideogram gc5Base: create bigWig file for gc5Base track repeatMasker: run repeat masker cluster run and create bigBed files for the composite track categories of repeats simpleRepeat: run trf cluster run and create bigBed file for simple repeats allGaps: calculate all actual real gaps due to N's in sequence, can be more than were specified in the AGP file idKeys: calculate md5sum for each sequence in the assembly to be used to find identical sequences in similar assemblies windowMasker: run windowMasker cluster run, create windowMasker bigBed file and compute intersection with repeatMasker results addMask: combine the higher masking of (windowMasker or repeatMasker) with trf simpleRepeats into one 2bit file gapOverlap: find duplicated sequence on each side of a gap tandemDups: annotate all pairs of duplicated sequence with some gap between @@ -194,31 +191,30 @@ # Command line args: asmId my ( $asmId); # Other: my ($buildDir, $secondsStart, $secondsEnd, $assemblySource); sub checkOptions { # Make sure command line options are valid/supported. my $ok = GetOptions(@HgStepManager::optionSpec, 'buildDir=s', 'sourceDir=s', 'rmskSpecies=s', 'noRmsk', 'ncbiRmsk', 'augustusSpecies=s', 'xenoRefSeq=s', - 'asmHubName=s', 'noXenoRefSeq', 'noAugustus', 'ucscNames', @HgAutomate::commonOptionSpec, ); &usage(1) if (!$ok); &usage(0, 1) if ($opt_help); &HgAutomate::processCommonOptions(); my $err = $stepper->processOptions(); usage(1) if ($err); $dbHost = $opt_dbHost if ($opt_dbHost); } ######################################################################### ######################################################################### @@ -1051,59 +1047,55 @@ if [ "\${sizeCount}" -ne "\${testCount}" ]; then printf "ERROR: chromAlias: incorrect number of aliases chromSizes %d > %d testCount in bigBed file\\n" "\${sizeCount}" "\${testCount}" 1>&2 exit 255 fi exit 0 _EOF_ ); $bossScript->execute(); } # chromAlias ######################################################################### # * step: gatewayPage [workhorse] sub doGatewayPage { - if ($asmHubName eq "n/a") { - printf STDERR "ERROR: step gatewayPage needs argument -asmHubName <name>\n"; - exit 255; - } my $runDir = "$buildDir/html"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct html/$asmId.description.html"; my $bossScript = newBash HgRemoteScript("$runDir/doGatewayPage.bash", $workhorse, $runDir, $whatItDoes); my $photoJpg = "noPhoto"; my $photoCredit = "noPhoto"; my $photoLink = ""; my $speciesNoBlank = $species; $speciesNoBlank =~ s/ /_/g; if ( -s "$runDir/../photo/$speciesNoBlank.jpg" ) { $photoJpg = "../photo/${speciesNoBlank}.jpg"; $photoCredit = "../photo/photoCredits.txt"; $photoLink = "rm -f ${speciesNoBlank}.jpg; ln -s ../photo/${speciesNoBlank}.jpg ." } else { printf STDERR "# gatewayPage: warning: no photograph available\n"; } $bossScript->add(<<_EOF_ export asmId=$asmId \$HOME/kent/src/hg/utils/automation/asmHubGatewayPage.pl \\ - $asmHubName ../download/\${asmId}_assembly_report.txt \\ + ../download/\${asmId}_assembly_report.txt \\ ../\${asmId}.chrom.sizes \\ $photoJpg $photoCredit \\ > \$asmId.description.html 2> \$asmId.names.tab \$HOME/kent/src/hg/utils/automation/genbank/buildStats.pl \\ ../\$asmId.chrom.sizes 2> \$asmId.build.stats.txt $photoLink _EOF_ ); $bossScript->execute(); } # gatewayPage ######################################################################### # * step: cytoBand [workhorse] sub doCytoBand { my $runDir = "$buildDir/trackData/cytoBand"; @@ -2041,41 +2033,39 @@ if (length($species) < 1) { die "ERROR: no -species specified and can not find Organism name: in $asmReport"; } } $rmskSpecies = $opt_rmskSpecies ? $opt_rmskSpecies : $species; $augustusSpecies = $opt_augustusSpecies ? $opt_augustusSpecies : $augustusSpecies; $xenoRefSeq = $opt_xenoRefSeq ? $opt_xenoRefSeq : $xenoRefSeq; $ucscNames = $opt_ucscNames ? 1 : $ucscNames; # '1' == 'TRUE' $noAugustus = $opt_noAugustus ? 1 : $noAugustus; # '1' == 'TRUE' $noXenoRefSeq = $opt_noXenoRefSeq ? 1 : $noXenoRefSeq; # '1' == 'TRUE' $workhorse = $opt_workhorse ? $opt_workhorse : $workhorse; $bigClusterHub = $opt_bigClusterHub ? $opt_bigClusterHub : $bigClusterHub; $smallClusterHub = $opt_smallClusterHub ? $opt_smallClusterHub : $smallClusterHub; $fileServer = $opt_fileServer ? $opt_fileServer : $fileServer; -$asmHubName = $opt_asmHubName ? $opt_asmHubName : $asmHubName; $ncbiRmsk = $opt_ncbiRmsk ? 1 : 0; $noRmsk = $opt_noRmsk ? 1 : 0; die "can not find assembly source directory\n$assemblySource" if ( ! -d $assemblySource); printf STDERR "# buildDir: %s\n", $buildDir; printf STDERR "# sourceDir %s\n", $sourceDir; printf STDERR "# augustusSpecies %s\n", $augustusSpecies; printf STDERR "# xenoRefSeq %s\n", $xenoRefSeq; printf STDERR "# assemblySource: %s\n", $assemblySource; -printf STDERR "# asmHubName %s\n", $asmHubName; printf STDERR "# rmskSpecies %s\n", $rmskSpecies; printf STDERR "# augustusSpecies %s\n", $augustusSpecies; printf STDERR "# ncbiRmsk %s\n", $ncbiRmsk ? "TRUE" : "FALSE"; printf STDERR "# ucscNames %s\n", $ucscNames ? "TRUE" : "FALSE"; printf STDERR "# noAugustus %s\n", $noAugustus ? "TRUE" : "FALSE"; printf STDERR "# noXenoRefSeq %s\n", $noXenoRefSeq ? "TRUE" : "FALSE"; printf STDERR "# noRmsk %s\n", $noRmsk ? "TRUE" : "FALSE"; # Do everything. $stepper->execute(); # Tell the user anything they should know. my $stopStep = $stepper->getStopStep(); my $upThrough = ($stopStep eq 'cleanup') ? "" : " (through the '$stopStep' step)";