984d172a4eb5f434baa386930496e6af3289c4c8 hiram Sat Jan 24 08:17:32 2026 -0800 better detection of busy/idle nodes diff --git src/hg/utils/automation/HgAutomate.pm src/hg/utils/automation/HgAutomate.pm index e9c2b43c730..2da2d89d880 100755 --- src/hg/utils/automation/HgAutomate.pm +++ src/hg/utils/automation/HgAutomate.pm @@ -242,53 +242,59 @@ verbose(4, "about to run '$cmd'\n"); my $load = `$cmd`; if ($load =~ s/.*load average: (\d+\.\d+).*/$1/) { return $load; } return 1000; } sub getWorkhorseLoads { #*** Would be nice to parameterize instead of hardcoding hostnames... # Return a hash of workhorses (all idle small cluster machines), # associated with their load factors. # a valid workhorse needs to have access to hive. confess "Too many arguments" if (scalar(@_) != 0); my %horses = (); - foreach my $machLine ('ku', 'hgwdev', 'hgwdev-new') { + my @testList; + push (@testList, 'hgwdev'); + for (my $i = 0; $i < 18; ++$i) { + my $n = sprintf("%02d", $i); + push (@testList, "hgcompute-$n"); + } + foreach my $machLine (@testList) { my $mach = $machLine; $mach =~ s/[\. ].*//; chomp $mach; $horses{$mach} = &getLoadFactor($mach) if (! exists $horses{$mach}); } return %horses; } sub chooseWorkhorse { # Choose a suitable "workhorse" machine. If -workhorse was given, use that. # Otherwise, randomly pick a fast machine with low load factor, or wait if # none are available. This can wait indefinitely, so if it's broken or if # all workhorses are down, it's up to the engineer to halt the script. confess "Too many arguments" if (shift); if ($main::opt_workhorse) { return $main::opt_workhorse; } &verbose(2, "chooseWorkhorse: polling load factors of " . "idle small cluster machines. This may take a minute...\n"); while (1) { my %horses = &getWorkhorseLoads(); - foreach my $maxLoad (0.1, 0.5, 1.0, 2.0) { + foreach my $maxLoad (0.1, 0.5, 1.0, 2.0, 3.0, 6.0, 10.0) { my @fastHorses = (); foreach my $horse (keys %horses) { push @fastHorses, $horse if ($horses{$horse} <= $maxLoad); } if (scalar(@fastHorses) > 0) { my $randomFastEnough = $fastHorses[int(rand(scalar(@fastHorses)))]; &verbose(2, "chooseWorkhorse: $randomFastEnough meets load " . "threshold of $maxLoad.\n"); return $randomFastEnough; } } my $delay = 120; &HgAutomate::verbose(1, "chooseWorkhorse: all machines have high load." . " waiting $delay seconds...\n"); sleep($delay);