d7d97045f093f991875b4bf0efd35d4b928e2967
hiram
  Mon Jul 29 18:10:16 2019 -0700
scripts used to build the VGP assembly hubs refs #23734

diff --git src/hg/makeDb/doc/VGP/runOneJob src/hg/makeDb/doc/VGP/runOneJob
new file mode 100755
index 0000000..006a10a
--- /dev/null
+++ src/hg/makeDb/doc/VGP/runOneJob
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1) {
+  printf STDERR "usage: ./ensRunOne <asmIdName>\n";
+  printf STDERR "where <asmIdName> is asmAcc_asmName, e.g:\n";
+  printf STDERR " ./ensRunOne GCF_004115215.1_mOrnAna1.p.v1\n";
+  exit 255;
+}
+
+my $asmIdName = shift;
+my @nameParts = split('_', $asmIdName);
+
+my $sourceDir=`grep "^$asmIdName" toDo.list | cut -f2`;
+chomp $sourceDir;
+my @dirParts = split('/', $sourceDir);
+my $genbankRefseq = $dirParts[6];
+my $cladeDir = $dirParts[7];
+my $sciName = $dirParts[8];
+my $buildDir="/hive/data/genomes/asmHubs/VGP/ucscNames/${asmIdName}";
+my $stepStart = "gatewayPage";
+my $stepEnd = "gatewayPage";
+my $hubSpecs = "-bigClusterHub=ku -smallClusterHub=hgwdev -ucscNames";
+my $augustusSpecies = "notFound";
+if ($cladeDir =~ "vertebrate_mammalian") {
+  $augustusSpecies = "human";
+} elsif ($cladeDir =~ "vertebrate_other") {
+   if ($nameParts[2] =~ m/^f/) {	# fish
+     $augustusSpecies = "zebrafish"
+   } elsif ($nameParts[2] =~ m/^b/) {	# bird
+     $augustusSpecies = "chicken"
+   } elsif ($nameParts[2] =~ m/^a/) {	# amphibian
+     $augustusSpecies = "chicken"
+   }
+}
+if ($augustusSpecies =~ m/notFound/) {
+  die "can not determine augustus species for $asmIdName";
+}
+
+`mkdir -p ${buildDir}`;
+if ( -s "${buildDir}/steps.$stepStart-$stepEnd.sh" ) {
+  printf STDERR "# NOTE already run ${buildDir}/steps.$stepStart-$stepEnd.sh\n";
+  exit 0;
+}
+
+open (FH, ">${buildDir}/steps.$stepStart-$stepEnd.sh") or die "can not write to ${buildDir}/steps.$stepStart-$stepEnd.sh";
+printf FH "#!/bin/bash\n\n";
+printf FH "cd $buildDir\n";
+printf FH "time (~/kent/src/hg/utils/automation/doAssemblyHub.pl $genbankRefseq \\\n";
+printf FH "   $cladeDir ${sciName} $asmIdName -verbose=2 \\\n";
+printf FH "     -continue=$stepStart -stop=$stepEnd ${hubSpecs} -fileServer=hgwdev \\\n";
+printf FH "         -augustusSpecies=${augustusSpecies} -buildDir=${buildDir} \\\n";
+printf FH "           -sourceDir=${sourceDir}) \\\n";
+printf FH "     >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log 2>&1\n";
+close (FH);
+`chmod 775 ${buildDir}/steps.$stepStart-$stepEnd.sh`;
+
+my $ret = system("${buildDir}/steps.$stepStart-$stepEnd.sh");
+$ret >>= 8;
+
+exit $ret;
+
+__END__
+# /hive/data/outside/ncbi/genomes/refseq/vertebrate_mammalian/Ornithorhynchus_anatinus/all_assembly_versions/GCF_004115215.1_mOrnAna1.p.v1
+
+
+  time (~/kent/src/hg/utils/automation/doAssemblyHub.pl genbankRefseq \
+    vertebrate_other "${sciName}" "${asmIdName}" -verbose=2 \
+     -continue=$stepStart -stop=$stepEnd ${hubSpecs} -fileServer=hgwdev \
+      -augustusSpecies=${augustusSpecies} -buildDir="${buildDir}" \
+        -sourceDir="${sourceDir}") \
+          >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log 2>&1 &
+
+  echo ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+  printf "###########################################################\n" >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log