71afd9d91414afe788f7b4fc294dc9973b4baad3
hiram
  Mon Aug 31 14:57:08 2020 -0700
construct symLinks into goldenPath for GTF file and archive files refs #26415

diff --git src/hg/utils/automation/doEnsGeneUpdate.pl src/hg/utils/automation/doEnsGeneUpdate.pl
index 0b30ffb..b10009f 100755
--- src/hg/utils/automation/doEnsGeneUpdate.pl
+++ src/hg/utils/automation/doEnsGeneUpdate.pl
@@ -21,30 +21,31 @@
 use vars qw/
     $opt_ensVersion
     $opt_vegaGene
     $opt_buildDir
     $opt_chromSizes
     $opt_species
     /;
 
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'download',   func => \&doDownload },
       { name => 'process',   func => \&doProcess },
       { name => 'load',   func => \&doLoad },
       { name => 'cleanup', func => \&doCleanup },
+      { name => 'goldenPath', func => \&doGoldenPath },
       { name => 'makeDoc', func => \&doMakeDoc },
     ]
 );
 
 # Option defaults:
 my $dbHost = 'hgwdev';
 my $vegaSpecies = "human";
 my $vegaPep = "Homo_sapiens.VEGA";
 
 my $base = $0;
 $base =~ s/^(.*\/)?//;
 my (@versionList) = &EnsGeneAutomate::ensVersionList();
 my $versionListString = join(", ", @versionList);
 my $versionString = "";
 
@@ -255,30 +256,32 @@
          $prevSum eq $thisSum ? "==" : "!=";
 
       if (1 == $identicalToPrevious) {
 	print STDERR "previous genes same as new genes";
       }
   } else {
     $identicalToPrevious = 0;
   }
 
 # there are too many things to check to verify identical to previous
 $identicalToPrevious = 0;
 
   $bossScript->add(<<_EOF_
 export db="$db"
 
+genePredToGtf -utr file process/\$db.allGenes.gp.gz stdout | gzip -c > process/\$db.ensGene.v$ensVersion.gtf.gz
+
 _EOF_
 	  );
 
   if ($dbExists && $identicalToPrevious ) {
       $bossScript->add(<<_EOF_
 hgsql -e 'INSERT INTO trackVersion \\
     (db, name, who, version, updateTime, comment, source, dateReference) \\
     VALUES("\$db", "ensGene", "$ENV{'USER'}", "$ensVersion", now(), \\
 	"identical to previous version $previousEnsVersion", \\
 	"identical to previous version $previousEnsVersion", \\
 	"$ensVersionDateReference" );' hgFixed
 featureBits \$db ensGene > fb.\$db.ensGene.txt 2>&1
 _EOF_
 	  );
   } else {
@@ -588,31 +591,31 @@
 genePredCheck -db=\$db not.vegaPseudo.gp.gz
 _EOF_
 	  );
       }
       if (-s "$chromSizes") {
       $bossScript->add(<<_EOF_
 genePredCheck -chromSizes=$chromSizes \$db.allGenes.gp.gz
 _EOF_
          );
       } else {
       $bossScript->add(<<_EOF_
 genePredCheck -db=\$db \$db.allGenes.gp.gz
 _EOF_
          );
       }
-  }
+  }	# if (! defined $skipInvalid)
   $bossScript->execute() if (! $opt_debug);
 } # doProcess
 
 #########################################################################
 # * step: download [dbHost]
 sub doDownload {
   my $runDir = "$buildDir/download";
   # check if been already done
   if (-s "$runDir/$ensGtfFile" && -s "$runDir/$ensPepFile" ) {
      &HgAutomate::verbose(1,
          "# step download is already completed, continuing...\n");
      return;
   }
   # If not already done, then it should be clean.
   if (-d "$runDir" && ! $opt_debug) {
@@ -660,30 +663,62 @@
 rm -f pseudo.name not.pseudo.name vegaGene.name vegaPepAll.$db.fa.tab vegaPep.name
 gzip vegaPep.$db.fa.tab
 _EOF_
     );
   } else {
     $bossScript->add(<<_EOF_
 rm -f bed.tab ensPep.txt.gz ensPep.$db.fa.tab ensPep.name ensGene.name
 rm -f $db.ensGene.gp.bed
 _EOF_
     );
   }
   $bossScript->execute() if (! $opt_debug);
 } # doCleanup
 
 #########################################################################
+# * step: goldenPath [dbHost]
+sub doGoldenPath {
+  my $runDir = "$buildDir";
+  if (! -s "$runDir/process/$db.ensGene.v$ensVersion.gtf.gz" ) {
+    die "ERROR: step goldenPath can not find process/$db.ensGene.v$ensVersion.gtf.gz\n" .
+        "\tcheck if processing step has completed\n";
+  }
+
+  my $whatItDoes = "Create symlinks to make gtf files appear in goldenPath.";
+  my $gpGeneDir = "$HgAutomate::goldenPath/$db/bigZips/genes";
+  my $gpArchiveDir = "$HgAutomate::goldenPath/archive/$db/ensGene";
+  my $bossScript = newBash HgRemoteScript("$runDir/doGoldenPath.bash", $dbHost,
+				      $runDir, $whatItDoes);
+
+  &HgAutomate::mustMkdir($gpGeneDir);
+  &HgAutomate::mustMkdir($gpArchiveDir);
+
+  $bossScript->add(<<_EOF_
+export db="$db"
+rm -f $gpArchiveDir/\$db.ensGene.v$ensVersion.gtf.gz
+rm -f $gpArchiveDir/\$db.ensGene.v$ensVersion.genePred.gz
+ln -s `pwd`/process/\$db.ensGene.v$ensVersion.gtf.gz  $gpArchiveDir/
+ln -s `pwd`/process/\$db.allGenes.genePred.gz  $gpArchiveDir/\$db.ensGene.v$ensVersion.genePred.gz
+rm -f $gpGeneDir/\$db.ensGene.gtf.gz
+ln -s `pwd`/process/\$db.ensGene.v$ensVersion.gtf.gz  $gpGeneDir/\$db.ensGene.gtf.gz
+_EOF_
+	  );
+
+  $bossScript->execute() if (! $opt_debug);
+} # doGoldenPath
+
+#########################################################################
 # * step: makeDoc [dbHost]
 sub doMakeDoc {
   my $runDir = "$buildDir";
   my $whatItDoes = "Display the make doc text to stdout.";
 
   if (! $dbExists) {
     &HgAutomate::verbose(1,
          "# step makeDoc is not run when not a database build\n");
     return;
   }
   my $updateTime = `hgsql -N -e 'select updateTime from trackVersion where db = "$db" order by updateTime DESC limit 1;' hgFixed`;
   chomp $updateTime;
   $updateTime =~ s/ .*//;	#	removes time
   my $organism = `hgsql -N -e 'select organism from dbDb where name = "$db";' hgcentraltest`;
   chomp $organism;