blob: 91727a8a352c4924ee6e261e822fb2431ca78840 [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import itertools
from datetime import datetime
from twisted.python import log
from twisted.internet import reactor
class FloatingSet(object):
"""A set describing available primary/floating slaves."""
def __init__(self):
self._primary = set()
self._floating = set()
def AddPrimary(self, *s):
self._primary.update(s)
def AddFloating(self, *s):
self._floating.update(s)
def NextSlaveFunc(self, grace_period):
"""Returns a NextSlaveFunc that uses the contents of this set."""
return _FloatingNextSlaveFunc(self, grace_period)
def Get(self):
return (sorted(self._primary), sorted(self._floating))
def __str__(self):
return '%s > %s' % (
', '.join(sorted(self._primary)),
', '.join(sorted(self._floating)))
class PokeBuilderTimer(object):
def __init__(self, botmaster, buildername):
self.botmaster = botmaster
self.buildername = buildername
self.delayed_call = None
def cancel(self):
if self.delayed_call is not None:
self.delayed_call.cancel()
self.delayed_call = None
def reset(self, delta):
if self.delayed_call is not None:
current_delta = (datetime.fromtimestamp(self.delayed_call.getTime()) -
_get_now())
if delta < current_delta:
self.delayed_call.reset(delta.total_seconds())
return
# Schedule a new call
self.delayed_call = reactor.callLater(
delta.total_seconds(),
self._poke,
)
def _poke(self):
self.delayed_call = None
log.msg('Poking builds for builder [%s]' % (self.buildername,))
self.botmaster.maybeStartBuildsForBuilder(self.buildername)
class _FloatingNextSlaveFunc(object):
"""
This object, when used as a Builder's 'nextSlave' function, allows a strata-
based preferential treatment to be assigned to a Builder's Slaves.
The 'nextSlave' function is called on a scheduled build when an associated
slave becomes available, either coming online or finishing an existing build.
These events are used as stimulus to enable the primary builder(s) to pick
up builds when appropriate.
1) If a Primary is available, the build will be assigned to them.
2) If a Primary builder is busy or is still within its grace period for
unavailability, no slave will be assigned in anticipation of the
'nextSlave' being re-invoked once the builder returns (1). If the grace
period expires, we "poke" the master to call 'nextSlave', at which point
the build will fall through to a lower strata.
3) If a Primary slave is offline past its grace period, the build will be
assigned to a Floating slave.
Args:
fs (FloatingSet): The set of available primary/floating slaves.
grace_period: (timedelta) The amount of time that a slave can be offline
before builds fall through to a lower strata.
"""
def __init__(self, fs, grace_period):
self._primary, self._floating = fs.Get()
self._fs = fs
self._grace_period = grace_period
self._poke_builder_timers = {}
self.verbose = False
started = _get_now()
self._slave_seen_times = dict((s, started) for s in itertools.chain(
self._primary, self._floating))
def __repr__(self):
return '%s(%s)' % (type(self).__name__, self._fs)
def __call__(self, builder, slave_builders):
"""Main 'nextSlave' invocation point.
When this is called, we are given the following information:
- The Builder
- A set of 'SlaveBuilder' instances that are available and ready for
assignment (slave_builders).
- The total set of ONLINE 'SlaveBuilder' instances associated with
'builder' (builder.slaves)
- The set of all slaves configured for Builder (via
'_get_all_slave_status')
We compile that into a stateful awareness and use it as a decision point.
Based on the slave availability and grace period, we will either:
(1) Return a slave immediately to claim this build. We do this if:
(1a) There was a "primary" build slave available, or
(1b) We are outside of all of the grace periods for the primary slaves,
and there is a floating builder available.
(2) Return 'None' (delaying the build) in anticipation of primary/floating
availability.
If we go with (2), we will schedule a 'poke' timer to stimulate a future
'nextSlave' call, since BuildBot only checks for builds on explicit slave
availability edges. This covers the case where floating builders are
available, but aren't enlisted because we're within the grace period. In
this case, we need to re-evaluate slaves after the grace period expires,
but actual slave state won't haev changed, so no new slave availabilty edge
will have occurred.
"""
self._debug("Calling [%s] with builder=[%s], slaves=[%s]",
self, builder, slave_builders)
self._cancel_builder_timer(builder)
# Get the set of all 'SlaveStatus' assigned to this Builder (idle, busy,
# and offline).
slave_status_map = dict(
(slave_status.name, slave_status)
for slave_status in self._get_all_slave_status(builder)
)
# Record the names of the slaves that were proposed.
proposed_slave_builder_map = {}
for slave_builder in slave_builders:
proposed_slave_builder_map[slave_builder.slave.slavename] = slave_builder
# Calculate the oldest a slave can be before we assume something's wrong.
now = _get_now()
grace_threshold = (now - self._grace_period)
# Record the last time we've seen any of these slaves online.
online_slave_builders = set()
for slave_builder in builder.slaves:
build_slave = slave_builder.slave
if build_slave is None:
continue
self._record_slave_seen_time(build_slave, now)
online_slave_builders.add(build_slave.slavename)
self._debug('Online proposed slaves: [%s]',
slave_builders)
# Are there any primary slaves that are proposed? If so, use it
within_grace_period = []
some_primary_were_busy = False
wait_delta = None
for slave_name in self._primary:
self._debug('Considering primary slave [%s]', slave_name)
# Was this slave proposed to 'nextSlave'?
slave_builder = proposed_slave_builder_map.get(slave_name)
if slave_builder is not None:
# Yes. Use it!
self._debug('Slave [%s] is available', slave_name)
return slave_builder
# Is this slave online? If so, we won't consider floating candiates.
if slave_name in online_slave_builders:
# The slave is online, but is not proposed (BUSY); add it to the
# desired slaves list.
self._debug('Slave [%s] is online but BUSY.', slave_name)
within_grace_period.append(slave_name)
some_primary_were_busy = True
continue
# The slave is offline. Is this slave within the grace period?
slave_status = slave_status_map.get(slave_name)
last_seen = self._get_latest_seen_time(slave_name, slave_status)
if last_seen < grace_threshold:
# No, the slave is older than our grace period.
self._debug('Slave [%s] is OFFLINE and outside grace period '
'(%s < %s).', slave_name, last_seen, grace_threshold)
continue
# This slave is within its grace threshold. Add it to the list of
# desired slaves from this set and update our wait delta in case we
# have to poke.
#
# We track the longest grace period delta, since after this point if
# no slaves have taken the build we would otherwise hang.
self._debug('Slave %r is OFFLINE but within grace period '
'(%s >= %s).', slave_name, last_seen, grace_threshold)
within_grace_period.append(slave_name)
slave_wait_delta = (self._grace_period - (now - last_seen))
if (wait_delta is None) or (slave_wait_delta > wait_delta):
wait_delta = slave_wait_delta
# We've looped through all primary slaves, and none of them were available.
# Were some within the grace period?
if not within_grace_period:
# We're outside of our grace period. Are there floating slaves that we
# can use?
for slave_name in self._floating:
slave_builder = proposed_slave_builder_map.get(slave_name)
if slave_builder is not None:
# Yes. Use it!
self._debug('Slave [%s] is available', slave_name)
return slave_builder
self._debug('No slaves are available; returning None')
return None
# We're going to return 'None' to wait for a primary slave. If all of
# the slaves that we're anticipating are offline, schedule a 'poke'
# after the last candidate has exceeded its grace period to allow the
# build to go to lower strata.
log.msg('Returning None in anticipation of unavailable primary slaves. '
'Please disregard the following BuildBot `nextSlave` '
'error: %s' % (within_grace_period,))
if (not some_primary_were_busy) and (wait_delta is not None):
self._debug('Scheduling ping for [%s] in [%s]',
builder.name, wait_delta)
self._schedule_builder_timer(builder, wait_delta)
return None
def _debug(self, fmt, *args):
if not self.verbose:
return
log.msg(fmt % args)
@staticmethod
def _get_all_slave_status(builder):
# Try using the builder's BuilderStatus object to get a list of all slaves
if builder.builder_status is not None:
return builder.builder_status.getSlaves()
# Satisfy with the list of currently-connected slaves
return [slave_builder.slave.slave_status
for slave_builder in builder.slaves]
def _get_latest_seen_time(self, slave_name, slave_status):
times = [self._slave_seen_times[slave_name]]
if slave_status:
# Add all of the registered connect times
times += [datetime.fromtimestamp(connect_time)
for connect_time in slave_status.connect_times]
# Add the time of the slave's last message
times.append(datetime.fromtimestamp(slave_status.lastMessageReceived()))
return max(times)
def _record_slave_seen_time(self, build_slave, now):
self._slave_seen_times[build_slave.slavename] = now
def _schedule_builder_timer(self, builder, delta):
poke_builder_timer = self._poke_builder_timers.get(builder.name)
if poke_builder_timer is None:
poke_builder_timer = PokeBuilderTimer(
builder.botmaster,
builder.name,
)
self._poke_builder_timers[builder.name] = poke_builder_timer
poke_builder_timer.reset(delta)
def _cancel_builder_timer(self, builder):
poke_builder_timer = self._poke_builder_timers.get(builder.name)
if poke_builder_timer is None:
return
poke_builder_timer.cancel()
def _get_now():
"""Returns (datetime.datetime): The current time.
This exists so it can be overridden by mocks in unit tests.
"""
return datetime.now()