a87c21c0843e05386bc8539b11305e8aa0439a44 hiram Thu Jan 30 17:21:01 2020 -0800 verify all required input files are available refs #23891 diff --git src/hg/utils/automation/doNcbiRefSeq.pl src/hg/utils/automation/doNcbiRefSeq.pl index c23ed70..a8ce0d3f 100755 --- src/hg/utils/automation/doNcbiRefSeq.pl +++ src/hg/utils/automation/doNcbiRefSeq.pl @@ -140,30 +140,45 @@ &usage(1) if (!$ok); &usage(0, 1) if ($opt_help); &HgAutomate::processCommonOptions(); my $err = $stepper->processOptions(); usage(1) if ($err); $dbHost = $opt_dbHost if ($opt_dbHost); $workhorse = $opt_workhorse if ($opt_workhorse); $bigClusterHub = $opt_bigClusterHub if ($opt_bigClusterHub); $smallClusterHub = $opt_smallClusterHub if ($opt_smallClusterHub); $fileServer = $opt_fileServer if ($opt_fileServer); } ######################################################################### # * step: download [workhorse] sub doDownload { + my $filesFound = 0; + my @requiredFiles = qw( genomic.gff.gz rna.fna.gz rna.gbff.gz protein.faa.gz ); + my $filesExpected = scalar(@requiredFiles); + foreach my $expectFile (@requiredFiles) { + if ( -s "/hive/data/outside/ncbi/${asmId}_${expectFile}" ) { + ++$filesFound; + } else { + printf STDERR "# doNcbiRefSeq.pl: missing required file /hive/data/outside/ncbi/${asmId}_${expectFile}\n"; + } + } + + if ($filesFound < $filesExpected) { + printf STDERR "# doNcbiRefSeq.pl download: can not find all files required\n"; + exit 0; + } my $runDir = "$buildDir/download"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "download required set of files from NCBI."; my $bossScript = newBash HgRemoteScript("$runDir/doDownload.bash", $workhorse, $runDir, $whatItDoes); my $outsideCopy = "/hive/data/outside/ncbi/$ftpDir"; my $localData = "/hive/data/inside/ncbi/$ftpDir"; $localData =~ s/all_assembly_versions/latest_assembly_versions/; my $local2Bit = "$localData/$asmId.ncbi.2bit"; # establish variables $bossScript->add(<<_EOF_ # establish all potential variables to use here, not all may be used