45e061bdd61a8fdf439157af29822df1589f6c43
hiram
  Thu Apr 7 13:25:31 2022 -0700
add a hubDateName function and allow newdev to be used as a cluster refs #29203

diff --git src/hg/utils/automation/HgAutomate.pm src/hg/utils/automation/HgAutomate.pm
index 3bba2a2..8df1468 100755
--- src/hg/utils/automation/HgAutomate.pm
+++ src/hg/utils/automation/HgAutomate.pm
@@ -20,33 +20,33 @@
 # This is a listing of the public methods and variables (which should be
 # treated as constants) exported by this module:
 @EXPORT_OK = (
     # Support for common command line options:
     qw( getCommonOptionHelp processCommonOptions
 	@commonOptionVars @commonOptionSpec
       ),
     # Some basic smarts about our compute infrastructure:
     qw( choosePermanentStorage
 	chooseWorkhorse chooseFileServer
 	chooseClusterByBandwidth chooseSmallClusterByBandwidth
 	chooseFilesystemsForCluster checkClusterPath
       ),
     # General-purpose utility routines:
     qw( checkCleanSlate checkExistsUnlessDebug closeStdin
-	getAssemblyInfo getSpecies gensub2 machineHasFile databaseExists
-	makeGsub mustMkdir asmHubBuildDir asmHubDownloadDir mustOpen
-	nfsNoodge paraRun run verbose
+	getAssemblyInfo getSpecies hubDateName gensub2 machineHasFile
+	databaseExists makeGsub mustMkdir asmHubBuildDir asmHubDownloadDir
+	mustOpen nfsNoodge paraRun run verbose
       ),
     # Hardcoded paths/commands/constants:
     qw( $centralDbSql $git
 	$clusterData $trackBuild $goldenPath $images $gbdb
 	$splitThreshold $runSSH $setMachtype
       ),
 );
 
 #########################################################################
 # A simple model of our local compute environment with some subroutines
 # for checking the validity of path+machine combos and for suggesting
 # appropriate storage and machines.
 
 use vars qw( %cluster %clusterFilesystem $defaultDbHost );
 
@@ -54,30 +54,33 @@
 sub readMainCluster(); # forward declaration to keep code order
 
 # the name of the cluster is in a separate text file, so it's easier to
 # use from bash scripts
 
 %cluster =
     ( readMainCluster() =>
         { 'enabled' => 1, 'gigaHz' => 1.4, 'ram' => 8,
 	  'hostCount' => 992, },
       'hgwdev-101' =>
         { 'enabled' => 1, 'gigaHz' => 2.1, 'ram' => 1,
 	  'hostCount' => 32, },
       'hgwdev' =>
         { 'enabled' => 1, 'gigaHz' => 2.1, 'ram' => 1,
 	  'hostCount' => 32, },
+      'newdev' =>
+        { 'enabled' => 1, 'gigaHz' => 2.1, 'ram' => 1,
+	  'hostCount' => 32, },
     );
 
 my %obsoleteCluster =
     ( 'swarm' => ,
         { 'enabled' => 1, 'gigaHz' => 2.33, 'ram' => 8,
 	  'hostCount' => 1024, },
       'memk' =>
         { 'enabled' => 1, 'gigaHz' => 1.0, 'ram' => 32,
 	  'hostCount' => 32, },
       'encodek' =>
         { 'enabled' => 1, 'gigaHz' => 2.0, 'ram' => 16,
 	  'hostCount' => 48, },
     );
 
 my @allClusters = (keys %cluster);
@@ -724,30 +727,78 @@
   return ($scientificName);
 } # getSpecies
 
 sub getOrganism {
   # fetch organism from dbDb
   my ($dbHost, $db) = @_;
   confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
   my $query = "select organism from dbDb " .
               "where name = \"$db\";";
   my $line = `echo '$query' | $HgAutomate::runSSH $dbHost $centralDbSql`;
   chomp $line;
   my ($organism) = split("\t", $line);
   return ($organism);
 } # getOrganism
 
+# try to find the date and assembly name for a hub given just the accession
+sub hubDateName($) {
+  my ($accession) = @_;
+  my $returnDate = "some date";
+  my $returnAsmName = "";
+
+  if ($accession !~ m/^GC/) {	# not a GenArk hub, database assembly
+    my ($comName, $asmDate, $submitter) = getAssemblyInfo("hgwdev", $accession);
+     $returnDate = $asmDate;
+     $returnAsmName = $accession;
+  } else {
+    my $gcX = substr($accession, 0, 3);
+    my $d0 = substr($accession, 4, 3);
+    my $d1 = substr($accession, 7, 3);
+    my $d2 = substr($accession, 10, 3);
+    # a couple assemblies unfortunately have two different assembly names
+    # can't work with those with just an accession
+    # special case the CHM13 assembly
+    my $betterId = $accession;
+    if ($accession =~ m/GCA_009914755.4/) {
+       $betterId = "GCA_009914755.4_CHM13_T2T_v2.0";
+    }
+    my $dirCount = `ls -d /hive/data/outside/ncbi/genomes/$gcX/$d0/$d1/$d2/${betterId}* | wc -l`;
+    chomp $dirCount;
+    if (1 == $dirCount) {
+       my $srcDir = `ls -d /hive/data/outside/ncbi/genomes/$gcX/$d0/$d1/$d2/${betterId}*`;
+       chomp $srcDir;
+       if ( -d "${srcDir}" ) {
+          my $asmId = basename($srcDir);
+          my $asmRpt = "$srcDir/${asmId}_assembly_report.txt";
+          if ( -s "${asmRpt}" ) {
+             (undef, undef, $returnAsmName) = split('_', $asmId, 3);
+             if (defined($returnAsmName)) {
+               $returnAsmName =~ s/\r//;
+               $returnAsmName =~ s/^/_/;
+             } else {
+               $returnAsmName = "";
+             }
+             my $tDate = `egrep -m 1 -i "^#[[:space:]]*Date:" "${asmRpt}" | sed -e 's/.*ate: \\+//;' | tr -d '\r'`;
+             chomp $tDate;
+             $returnDate = $tDate if (length($tDate));
+          }
+       }
+    }
+  }
+  return ($returnDate, $returnAsmName);
+}	#	sub hubDateName($)
+
 sub machineHasFile {
   # Return a positive integer if $mach appears to have $file or 0 if it
   # does not.
   my ($mach, $file) = @_;
   confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
   confess "undef input" if (! defined $mach || ! defined $file);
   my $count = `$HgAutomate::runSSH $mach ls -1 $file 2>>/dev/null | wc -l`;
   chomp $count;
   return $count + 0;
 }
 
 sub databaseExists {
   my ($dbHost, $db) = @_;
   return 0 if ($dbHost =~ m/nohost/i);
   confess "Must have exactly 2 arguments" if (scalar(@_) != 2);