71afd9d91414afe788f7b4fc294dc9973b4baad3 hiram Mon Aug 31 14:57:08 2020 -0700 construct symLinks into goldenPath for GTF file and archive files refs #26415 diff --git src/hg/utils/automation/doEnsGeneUpdate.pl src/hg/utils/automation/doEnsGeneUpdate.pl index 0b30ffb..b10009f 100755 --- src/hg/utils/automation/doEnsGeneUpdate.pl +++ src/hg/utils/automation/doEnsGeneUpdate.pl @@ -21,30 +21,31 @@ use vars qw/ $opt_ensVersion $opt_vegaGene $opt_buildDir $opt_chromSizes $opt_species /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'download', func => \&doDownload }, { name => 'process', func => \&doProcess }, { name => 'load', func => \&doLoad }, { name => 'cleanup', func => \&doCleanup }, + { name => 'goldenPath', func => \&doGoldenPath }, { name => 'makeDoc', func => \&doMakeDoc }, ] ); # Option defaults: my $dbHost = 'hgwdev'; my $vegaSpecies = "human"; my $vegaPep = "Homo_sapiens.VEGA"; my $base = $0; $base =~ s/^(.*\/)?//; my (@versionList) = &EnsGeneAutomate::ensVersionList(); my $versionListString = join(", ", @versionList); my $versionString = ""; @@ -255,30 +256,32 @@ $prevSum eq $thisSum ? "==" : "!="; if (1 == $identicalToPrevious) { print STDERR "previous genes same as new genes"; } } else { $identicalToPrevious = 0; } # there are too many things to check to verify identical to previous $identicalToPrevious = 0; $bossScript->add(<<_EOF_ export db="$db" +genePredToGtf -utr file process/\$db.allGenes.gp.gz stdout | gzip -c > process/\$db.ensGene.v$ensVersion.gtf.gz + _EOF_ ); if ($dbExists && $identicalToPrevious ) { $bossScript->add(<<_EOF_ hgsql -e 'INSERT INTO trackVersion \\ (db, name, who, version, updateTime, comment, source, dateReference) \\ VALUES("\$db", "ensGene", "$ENV{'USER'}", "$ensVersion", now(), \\ "identical to previous version $previousEnsVersion", \\ "identical to previous version $previousEnsVersion", \\ "$ensVersionDateReference" );' hgFixed featureBits \$db ensGene > fb.\$db.ensGene.txt 2>&1 _EOF_ ); } else { @@ -588,31 +591,31 @@ genePredCheck -db=\$db not.vegaPseudo.gp.gz _EOF_ ); } if (-s "$chromSizes") { $bossScript->add(<<_EOF_ genePredCheck -chromSizes=$chromSizes \$db.allGenes.gp.gz _EOF_ ); } else { $bossScript->add(<<_EOF_ genePredCheck -db=\$db \$db.allGenes.gp.gz _EOF_ ); } - } + } # if (! defined $skipInvalid) $bossScript->execute() if (! $opt_debug); } # doProcess ######################################################################### # * step: download [dbHost] sub doDownload { my $runDir = "$buildDir/download"; # check if been already done if (-s "$runDir/$ensGtfFile" && -s "$runDir/$ensPepFile" ) { &HgAutomate::verbose(1, "# step download is already completed, continuing...\n"); return; } # If not already done, then it should be clean. if (-d "$runDir" && ! $opt_debug) { @@ -660,30 +663,62 @@ rm -f pseudo.name not.pseudo.name vegaGene.name vegaPepAll.$db.fa.tab vegaPep.name gzip vegaPep.$db.fa.tab _EOF_ ); } else { $bossScript->add(<<_EOF_ rm -f bed.tab ensPep.txt.gz ensPep.$db.fa.tab ensPep.name ensGene.name rm -f $db.ensGene.gp.bed _EOF_ ); } $bossScript->execute() if (! $opt_debug); } # doCleanup ######################################################################### +# * step: goldenPath [dbHost] +sub doGoldenPath { + my $runDir = "$buildDir"; + if (! -s "$runDir/process/$db.ensGene.v$ensVersion.gtf.gz" ) { + die "ERROR: step goldenPath can not find process/$db.ensGene.v$ensVersion.gtf.gz\n" . + "\tcheck if processing step has completed\n"; + } + + my $whatItDoes = "Create symlinks to make gtf files appear in goldenPath."; + my $gpGeneDir = "$HgAutomate::goldenPath/$db/bigZips/genes"; + my $gpArchiveDir = "$HgAutomate::goldenPath/archive/$db/ensGene"; + my $bossScript = newBash HgRemoteScript("$runDir/doGoldenPath.bash", $dbHost, + $runDir, $whatItDoes); + + &HgAutomate::mustMkdir($gpGeneDir); + &HgAutomate::mustMkdir($gpArchiveDir); + + $bossScript->add(<<_EOF_ +export db="$db" +rm -f $gpArchiveDir/\$db.ensGene.v$ensVersion.gtf.gz +rm -f $gpArchiveDir/\$db.ensGene.v$ensVersion.genePred.gz +ln -s `pwd`/process/\$db.ensGene.v$ensVersion.gtf.gz $gpArchiveDir/ +ln -s `pwd`/process/\$db.allGenes.genePred.gz $gpArchiveDir/\$db.ensGene.v$ensVersion.genePred.gz +rm -f $gpGeneDir/\$db.ensGene.gtf.gz +ln -s `pwd`/process/\$db.ensGene.v$ensVersion.gtf.gz $gpGeneDir/\$db.ensGene.gtf.gz +_EOF_ + ); + + $bossScript->execute() if (! $opt_debug); +} # doGoldenPath + +######################################################################### # * step: makeDoc [dbHost] sub doMakeDoc { my $runDir = "$buildDir"; my $whatItDoes = "Display the make doc text to stdout."; if (! $dbExists) { &HgAutomate::verbose(1, "# step makeDoc is not run when not a database build\n"); return; } my $updateTime = `hgsql -N -e 'select updateTime from trackVersion where db = "$db" order by updateTime DESC limit 1;' hgFixed`; chomp $updateTime; $updateTime =~ s/ .*//; # removes time my $organism = `hgsql -N -e 'select organism from dbDb where name = "$db";' hgcentraltest`; chomp $organism;