a87c21c0843e05386bc8539b11305e8aa0439a44
hiram
  Thu Jan 30 17:21:01 2020 -0800
verify all required input files are available refs #23891

diff --git src/hg/utils/automation/doNcbiRefSeq.pl src/hg/utils/automation/doNcbiRefSeq.pl
index c23ed70..a8ce0d3f 100755
--- src/hg/utils/automation/doNcbiRefSeq.pl
+++ src/hg/utils/automation/doNcbiRefSeq.pl
@@ -140,30 +140,45 @@
   &usage(1) if (!$ok);
   &usage(0, 1) if ($opt_help);
   &HgAutomate::processCommonOptions();
   my $err = $stepper->processOptions();
   usage(1) if ($err);
   $dbHost = $opt_dbHost if ($opt_dbHost);
   $workhorse = $opt_workhorse if ($opt_workhorse);
   $bigClusterHub = $opt_bigClusterHub if ($opt_bigClusterHub);
   $smallClusterHub = $opt_smallClusterHub if ($opt_smallClusterHub);
   $fileServer = $opt_fileServer if ($opt_fileServer);
 }
 
 #########################################################################
 # * step: download [workhorse]
 sub doDownload {
+  my $filesFound = 0;
+ my @requiredFiles = qw( genomic.gff.gz rna.fna.gz rna.gbff.gz protein.faa.gz );
+  my $filesExpected = scalar(@requiredFiles);
+  foreach my $expectFile (@requiredFiles) {
+    if ( -s "/hive/data/outside/ncbi/${asmId}_${expectFile}" ) {
+      ++$filesFound;
+    } else {
+      printf STDERR "# doNcbiRefSeq.pl: missing required file /hive/data/outside/ncbi/${asmId}_${expectFile}\n";
+    }
+  }
+
+  if ($filesFound < $filesExpected) {
+    printf STDERR "# doNcbiRefSeq.pl download: can not find all files required\n";
+    exit 0;
+  }
   my $runDir = "$buildDir/download";
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "download required set of files from NCBI.";
   my $bossScript = newBash HgRemoteScript("$runDir/doDownload.bash", $workhorse,
 				      $runDir, $whatItDoes);
   my $outsideCopy = "/hive/data/outside/ncbi/$ftpDir";
   my $localData = "/hive/data/inside/ncbi/$ftpDir";
   $localData =~ s/all_assembly_versions/latest_assembly_versions/;
   my $local2Bit = "$localData/$asmId.ncbi.2bit";
 
   # establish variables
   $bossScript->add(<<_EOF_
 # establish all potential variables to use here, not all may be used