12a73dd0da50c7ff8569cd798d2a2b1266407bbf
hiram
  Mon Apr 20 15:34:52 2020 -0700
build procedure for ncbiRefSeq genes refs #24547

diff --git src/hg/makeDb/doc/ncbiRefSeq/updateOne src/hg/makeDb/doc/ncbiRefSeq/updateOne
new file mode 100755
index 0000000..8904aa1
--- /dev/null
+++ src/hg/makeDb/doc/ncbiRefSeq/updateOne
@@ -0,0 +1,117 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use POSIX qw(mktime ctime strftime);
+use File::stat;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 2) {
+  printf STDERR "usage: updateOne <ucscDb> <asmId>\n";
+  printf STDERR "e.g.:  ./updateOne susScr11 GCF_000003025.6_Sscrofa11.1\n";
+  exit 255;
+}
+
+my $db = shift;
+my $asmId = shift;
+# e.g.: GCF_000003025.6_Sscrofa11.1
+# GCF_000003025.6_Sscrofa11.1_genomic.gff.gz
+
+my $srcDir = substr($asmId, 0, 3);
+$srcDir .= "/" . substr($asmId, 4, 3);
+$srcDir .= "/" . substr($asmId, 7, 3);
+$srcDir .= "/" . substr($asmId, 10, 3);
+$srcDir .= "/" . $asmId;
+# printf "# %s\n", $srcDir;
+my $gffFile = "/hive/data/outside/ncbi/genomes/$srcDir/${asmId}_genomic.gff.gz";
+if (! -s "${gffFile}" ) {
+  printf STDERR "ERROR: can not find gff file:\n# %s\n", $gffFile;
+  exit 255;
+}
+# print `ls -og $gffFile`;
+my $fileStat = stat($gffFile);
+my $fileMtime = $fileStat->mtime;
+## printf "%d\n", $fileMtime;
+## printf "%s", POSIX::ctime($fileMtime);
+my $fileDate = strftime("%Y-%m-%d %H:%M:%S", localtime($fileMtime));
+# printf "%d %s %s\n", $fileMtime, $fileDate, "${asmId}_genomic.gff.gz";
+
+my $tableStatus = `hgsql -N -e 'show table status like "ncbiRefSeq";' $db | cut -f13`;
+chomp $tableStatus;
+my $status = "done $fileDate";
+my $tableTimeStamp = 0;
+if (0 == length($tableStatus)) {
+  printf STDERR "ERROR: ncbiRefSeq table missing in '%s'\n", $db;
+  $status = "needUpdate $fileDate";
+  #  exit 255;
+} else {
+# e.g.: 2018-02-09 14:41:04
+
+my ($ymd, $hms) = split('\s+', $tableStatus);
+my ($y, $m, $d) = split('-', $ymd);
+my ($h, $min, $s) = split(':', $hms);
+
+# printf "%s\n", $tableStatus;
+$tableTimeStamp = POSIX::mktime($s, $min, $h, $d, $m-1, $y-1900);
+$status = "needUpdate $fileDate" if ($tableTimeStamp < $fileMtime);
+printf "%04d-%02d-%02d %02d:%02d:%02d %s.ncbiRefSeq %s\n", $y, $m, $d, $h, $min, $s, $db, $status;
+}
+if ($tableTimeStamp < 1) {
+  printf "%s.ncbiRefSeq %s\n", $db, $status;
+}
+# printf "%d\n", $tableTimeStamp;
+# printf "%s\n", POSIX::ctime($tableTimeStamp);
+if ($tableTimeStamp >= $fileMtime) {
+  exit 0;
+}
+
+my $bedDir = "/hive/data/genomes/$db/bed";
+if ( ! -d "${bedDir}" ) {
+  printf "ERROR: can not find bed directory:\n# %s\n", $bedDir;
+  exit 255;
+}
+my $today = `date "+%F"`;
+chomp $today;
+my $buildDir = "$bedDir/ncbiRefSeq.$today";
+if ( -d "${buildDir}" ) {
+  printf STDERR "# already run today $buildDir\n";
+  exit 0;
+}
+
+## printf STDERR "# %s\n", $buildDir;
+
+print `mkdir -p "${buildDir}"`;
+my $runScript = <<"_END_";
+#!/bin/bash
+
+set -beEu -o pipefail
+cd $buildDir
+time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=\`pwd\` -toGpWarnOnly -smallClusterHub=hgwdev \\
+  \"${asmId}\" \"${db}\") >> do.log 2>&1
+_END_
+
+open (my $fh, '>', "${buildDir}/run.sh") or die "can not write to ${buildDir}/run.sh";
+printf $fh "%s\n", $runScript;
+close $fh;
+print `chmod +x "$buildDir/run.sh"`;
+my @sysArgs = ("$buildDir/run.sh");
+my $sysReturn = system(@sysArgs);
+if ($sysReturn == -1) {
+  printf STDERR "ERROR: failed to execute $buildDir/run.sh\n";
+  exit 255;
+} elsif ($sysReturn & 127) {
+  printf STDERR "ERROR: run.sh died with signal %d\n", $sysReturn & 127;
+  exit 255;
+} else {
+  my $exitCode = $sysReturn >> 8;
+  if (0 == $exitCode) {
+    exit 0;
+  } else {
+    printf STDERR "ERROR: $buildDir/run.sh script returned %d\n", $exitCode;
+    exit 255;
+  }
+}
+# should never get here
+printf STDERR "ERROR: should never get to this point in updateOne\n";
+exit 255;