blob: 08192189236c08e72aba5fd9b1c29e1a236be689 [file] [log] [blame]
# Copyright 2014 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
"""Swarming bot code. Includes bootstrap and swarming_bot.zip.
It includes everything that is AppEngine specific. The non-GAE code is in
bot_archive.py.
"""
import ast
import collections
import hashlib
import logging
import os.path
from six.moves import urllib
from google.appengine.ext import ndb
from components import auth
from components import config
from components import utils
from server import bot_archive
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
### Models.
File = collections.namedtuple('File', ('content', 'who', 'when', 'version'))
class BotArchiveInfo(ndb.Model):
"""Information about a single bot archive.
This entity is generated by the Go code. Only subset of fields actually used
by the python code is defined below.
"""
_use_cache = False
_use_memcache = False
# The bot archive SHA256 digest aka "bot archive version".
digest = ndb.StringProperty(indexed=False, name='Digest')
# The list of BotArchiveChunk entities with the archive content.
chunks = ndb.StringProperty(indexed=False, repeated=True, name='Chunks')
# The revision of bot_config.py script used.
bot_config_rev = ndb.StringProperty(indexed=False, name='BotConfigRev')
def fetch_archive(self):
"""Produces a blob with the bot archive."""
chunks = ndb.get_multi(
[bot_archive_chunk_key(chunk) for chunk in self.chunks])
return ''.join([chunk.data for chunk in chunks])
class ConfigBundleRev(ndb.Model):
"""Contains information about available bot code archives.
This entity is generated by the Go code. Only subset of fields actually used
by the python code is defined below.
"""
_use_cache = False
_use_memcache = False
stable_bot = ndb.StructuredProperty(BotArchiveInfo,
indexed=False,
name='StableBot')
canary_bot = ndb.StructuredProperty(BotArchiveInfo,
indexed=False,
name='CanaryBot')
class BotArchiveChunk(ndb.Model):
"""A chunk of bot archive.
This entity is generated by the Go code. It is never modified once written,
and can be safely cached by the python side.
"""
data = ndb.BlobProperty(indexed=False, name='Data')
### Public APIs.
# Returned by get_bot_channel for stable bots.
STABLE_BOT = 'stable'
# Returned by get_bot_channel for canary bots.
CANARY_BOT = 'canary'
def config_bundle_rev_key():
"""ndb.Key of the ConfigBundleRev singleton entity."""
return ndb.Key('ConfigBundle', 1, 'ConfigBundleRev', 1)
def bot_archive_chunk_key(chunk):
"""ndb.Key of BotArchiveChunk entity."""
return ndb.Key('BotArchiverState', 1, 'BotArchiveChunk', chunk)
def get_bot_channel(bot_id, settings):
"""Determines what release channel a bot should be using.
Args:
bot_id: bot ID as was reported by the bot.
settings: config_pb2.SettingsCfg message with canary percent.
Returns:
Either STABLE_BOT or CANARY_BOT.
"""
canary_percent = 0
if settings.HasField('bot_deployment'):
canary_percent = settings.bot_deployment.canary_percent
if _quasi_random_100(bot_id) < canary_percent:
return CANARY_BOT
return STABLE_BOT
def get_bot_version(channel):
"""Returns a concrete version digest for the given release channel.
Args:
channel: either CANARY_BOT or STABLE_BOT.
Returns:
(A bot archive digest, revision of bot_config.py embedded inside).
"""
assert channel in (STABLE_BOT, CANARY_BOT), channel
info = config_bundle_rev_key().get()
if channel == STABLE_BOT:
return (info.stable_bot.digest, info.stable_bot.bot_config_rev)
return (info.canary_bot.digest, info.canary_bot.bot_config_rev)
def get_bootstrap(host_url, bootstrap_token=None):
"""Returns the mangled version of the utility script bootstrap.py.
Try to find the content in the following order:
- get the file from luci-config
- return the default version
Returns:
File instance.
"""
# Calculate the header to inject at the top of the file.
if bootstrap_token:
quoted = urllib.parse.quote_plus(bootstrap_token)
assert bootstrap_token == quoted, bootstrap_token
header = (
'#!/usr/bin/env python\n'
'# coding: utf-8\n'
'host_url = %r\n'
'bootstrap_token = %r\n') % (host_url or '', bootstrap_token or '')
# Check in luci-config imported file if present.
rev, cfg = config.get_self_config(
'scripts/bootstrap.py', store_last_good=True)
if cfg:
return File(header + cfg, config.config_service_hostname(), None, rev)
# Fallback to the one embedded in the tree.
path = os.path.join(ROOT_DIR, 'swarming_bot', 'config', 'bootstrap.py')
with open(path, 'rb') as f:
return File(header + f.read(), None, None, None)
def get_bot_config():
"""Returns the current version of bot_config.py.
Try to find the content in the following order:
- get the file from luci-config
- return the default version
Returns:
File instance.
"""
# Check in luci-config imported file if present.
rev, cfg = config.get_self_config(
'scripts/bot_config.py', store_last_good=True)
if cfg:
return File(cfg, config.config_service_hostname(), None, rev), rev
# Fallback to the one embedded in the tree.
path = os.path.join(ROOT_DIR, 'swarming_bot', 'config', 'bot_config.py')
with open(path, 'rb') as f:
return File(f.read(), None, None, None), rev
def bootstrap_for_dev_server(host):
"""Called in the local smoke test to bootstrap the bot archive."""
assert utils.is_local_dev_server()
bot_config, bot_config_rev = get_bot_config()
content, version = bot_archive.get_swarming_bot_zip(
os.path.join(ROOT_DIR, 'swarming_bot'),
host,
utils.get_app_version(),
{'config/bot_config.py': bot_config.content},
None,
)
chunks = []
offset = 0
while len(content) > 0:
chunk_size = min(500 * 1000, len(content))
chunk, content = content[:chunk_size], content[chunk_size:]
chunks.append(
BotArchiveChunk(
key=bot_archive_chunk_key('%s:%d' % (version, offset)),
data=chunk,
))
offset += len(chunk)
ndb.put_multi(chunks)
rev = ConfigBundleRev(
key=config_bundle_rev_key(),
stable_bot=BotArchiveInfo(
digest=version,
chunks=[ent.key.id() for ent in chunks],
bot_config_rev=bot_config_rev,
),
canary_bot=BotArchiveInfo(
digest=version,
chunks=[ent.key.id() for ent in chunks],
bot_config_rev=bot_config_rev,
),
)
rev.put()
return rev
### Bootstrap token.
class BootstrapToken(auth.TokenKind):
expiration_sec = 3600
secret_key = auth.SecretKey('bot_bootstrap_token')
version = 1
def generate_bootstrap_token():
"""Returns a token that authenticates calls to bot bootstrap endpoints.
The authenticated bootstrap workflow looks like this:
1. An admin visit Swarming server root page and copy-pastes URL to
bootstrap.py that has a '?tok=...' parameter with the bootstrap token,
generated by this function.
2. /bootstrap verifies the token and serves bootstrap.py, with same token
embedded into it.
3. The modified bootstrap.py is executed on the bot. It fetches bot code
from /bot_code, passing it the bootstrap token again.
4. /bot_code verifies the token and serves the bot code zip archive.
This function assumes the caller is already authorized.
"""
# The embedded payload is mostly FYI. The important expiration time is added
# by BootstrapToken already.
return BootstrapToken.generate(message=None, embedded={
'for': auth.get_current_identity().to_bytes(),
})
def validate_bootstrap_token(tok):
"""Returns a token payload if the token is valid or None if not.
The token is valid if its HMAC signature is correct and it hasn't expired yet.
Doesn't recheck ACLs. Logs errors.
"""
try:
return BootstrapToken.validate(tok, message=None)
except auth.InvalidTokenError as exc:
logging.warning('Failed to validate bootstrap token: %s', exc)
return None
### Private code
def _quasi_random_100(s):
"""Given a string, returns a quasi-random integer in range [0; 100)."""
# Use some seed to avoid being in sync with a similar generator in rbe.py.
digest = hashlib.sha256('bot-channel:' + s).digest()
num = float(ord(digest[0]) + ord(digest[1]) * 256)
return int(num * 99.9 / (256.0 + 256.0 * 256.0))
def _validate_python(content):
"""Returns True if content is valid python script."""
try:
ast.parse(content)
except (SyntaxError, TypeError):
return False
return True
## Config validators
@config.validation.self_rule('regex:scripts/.+\\.py')
def _validate_scripts(content, ctx):
try:
ast.parse(content)
except (SyntaxError, TypeError) as e:
ctx.error('invalid %s: %s' % (ctx.path, e))