blob: aae6248d15b9bb59fe8c1c0a69e3c9289fbc4e6f [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Updates the Git Cache zip files."""
import re
from recipe_engine import post_process
from PB.recipe_engine import result as result_pb
from PB.go.chromium.org.luci.buildbucket.proto import common as bb_common_pb
from PB.recipes.infra import git_cache_updater as git_cache_updater_pb
PYTHON_VERSION_COMPATIBILITY = "PY2+3"
DEPS = [
'recipe_engine/buildbucket',
'recipe_engine/context',
'recipe_engine/file',
'recipe_engine/raw_io',
'recipe_engine/futures',
'recipe_engine/path',
'recipe_engine/properties',
'recipe_engine/runtime',
'recipe_engine/step',
'recipe_engine/url',
'depot_tools/depot_tools',
'depot_tools/git',
]
PROPERTIES = git_cache_updater_pb.Inputs
OK, EMPTY = range(2)
CONCURRENT_STEPS = 1
def _list_host_repos(api, host_url):
host_url = host_url.rstrip('/')
with api.depot_tools.on_path():
output = api.url.get_text('%s/?format=TEXT' % host_url,
default_test_data=TEST_REPOS).output
return output.strip().splitlines()
def _repos_to_urls(host_url, repos):
host_url = host_url.rstrip('/')
return ['%s/%s' % (host_url, repo) for repo in repos]
class _InvalidInput(Exception):
pass
def _get_repo_urls(api, inputs):
if inputs.git_host.host:
assert not inputs.repo_urls, 'only 1 of (git_host, repo_urls) allowed'
repos = _list_host_repos(api, 'https://' + inputs.git_host.host)
if inputs.git_host.exclude_repos:
exclude_regexps = []
for i, r in enumerate(inputs.git_host.exclude_repos):
try:
exclude_regexps.append(re.compile('^' + r + '$', re.IGNORECASE))
except Exception as e:
raise _InvalidInput(
'invalid regular expression[%d] %r: %s' % (i, r, e))
repos = [repo for repo in repos
if all(not r.match(repo) for r in exclude_regexps)]
return _repos_to_urls('https://' + inputs.git_host.host, repos)
if inputs.repo_urls:
return list(inputs.repo_urls)
raise _InvalidInput('repo_urls or git_host.host must be provided')
def _do_update_bootstrap(api, url, work_dir, gc_aggressive):
opts = [
'--cache-dir', work_dir,
'--verbose',
url,
]
with api.step.nest(url) as summary:
api.step(
name='populate',
cmd=[
'git_cache.py',
'populate',
'--reset-fetch-config',
# By default, "refs/heads/*" are checked out by
# git_cache. However, for heavy branching repos,
# 'refs/branch-heads/*' is also very useful (crbug/942169).
# This is a noop for repos without refs/branch-heads.
'--ref',
'refs/branch-heads/*',
# By default, any tags that point to objects we fetch
# from remote are also fetched. This ensures ALL tags are
# downloaded from remote.
# This is needed by chromeos builders.
'--ref',
'refs/tags/*',
'--break-locks',
] + opts,
cost=api.step.ResourceCost(disk=20))
repo_path = api.path.abs_to_path(
api.step(
name='lookup repo_path',
cmd=['git_cache.py', 'exists'] + opts,
stdout=api.raw_io.output(),
step_test_data=lambda: api.raw_io.test_api.stream_output(
api.path.join(work_dir, url.strip('https://')) + '\n',),
).stdout.decode('utf-8').strip())
with api.context(cwd=repo_path):
stats = api.git.count_objects(
raise_on_failure=True,
# TODO(iannucci): ugh, the test mock for this is horrendous.
# 1) it should default to something automatically
# 2) test_api.count_objects_output should return a TestData, not
# a string.
step_test_data=lambda: api.raw_io.test_api.stream_output(
api.git.test_api.count_objects_output(10)))
# Scale the memory cost of this update by size-pack squared. This is
# an arbitrary scaling factor, but it allows multiple small repos to run
# in parallel but allows large repos (e.g. chromium) to exclusively use
# all the memory on the system.
mem_cost = 4 * int((stats['size'] + stats['size-pack']) ** 2)
if mem_cost == 0:
# some repos can be empty (e.g. they're an "ACL-only" repo), and
# update-bootstrap doesn't like that, so skip them.
api.step('repo is empty; skipping update', cmd=None)
summary.step_text = "[empty]"
summary.status = api.step.FAILURE # TODO(iannucci): warning
return EMPTY
gc_aggressive_opt = []
if gc_aggressive:
gc_aggressive_opt = ['--gc-aggressive']
api.step(
name='update bootstrap',
cmd=[
'git_cache.py', 'update-bootstrap',
'--skip-populate', '--prune',
] + opts + gc_aggressive_opt,
cost=api.step.ResourceCost(
cpu=api.step.CPU_CORE*2,
memory=mem_cost,
net=10,
))
summary.step_text = "[ok]"
return OK
def RunSteps(api, inputs):
try:
repo_urls = _get_repo_urls(api, inputs)
except _InvalidInput as e:
return result_pb.RawResult(
status=bb_common_pb.FAILURE, summary_markdown=str(e))
work_dir = api.path.cache_dir.joinpath('builder', 'w')
api.file.ensure_directory('ensure work_dir', work_dir)
env = {
# Turn off the low speed limit, since checkout will be long.
'GIT_HTTP_LOW_SPEED_LIMIT': '0',
'GIT_HTTP_LOW_SPEED_TIME': '0',
# Ensure git-number tool can be used.
'CHROME_HEADLESS': '1',
}
if api.runtime.is_experimental:
assert inputs.override_bucket, 'override_bucket required for experiments'
if inputs.override_bucket:
env['OVERRIDE_BOOTSTRAP_BUCKET'] = inputs.override_bucket
work = []
sem = api.futures.make_bounded_semaphore(CONCURRENT_STEPS)
def fn(sem, api, url, work_dir, gc_aggressive):
with sem:
return _do_update_bootstrap(api, url, work_dir, gc_aggressive)
with api.context(env=env), api.depot_tools.on_path():
for url in sorted(repo_urls):
work.append(
api.futures.spawn_immediate(
fn, sem, api, url, work_dir, inputs.gc_aggressive, __name=url))
total = len(work)
success = warning = 0
failed_repos = []
empties = 0
for future in api.futures.iwait(work):
try:
status = future.result()
except Exception: # pylint: disable=broad-except
failed_repos.append(future.name)
continue
if status == OK:
success += 1
elif status == EMPTY:
empties += 1
warning += 1
else:
assert False, 'unknown status %r' % (status,) # pragma: no cover
status = bb_common_pb.FAILURE if failed_repos else bb_common_pb.SUCCESS
summary = 'Updated cache for %d/%d repos.' % (success, total)
if warning:
summary += '\n\nEncountered warnings for %d repos:\n' % (warning,)
if empties:
summary += '\n * empty (repo has no objects): %d' % (empties,)
if failed_repos:
summary += '\n\nEncountered failures for %d repos:\n' % (len(failed_repos),)
for repo_name in failed_repos:
summary += '\n * ' + repo_name
return result_pb.RawResult(status=status, summary_markdown=summary)
TEST_REPOS = """
All-Projects
All-Users
apps
chromium/src
foo/bar
"""
def GenTests(api):
yield (
api.test('needs input')
+ api.expect_status('FAILURE')
+ api.post_process(post_process.StatusFailure)
+ api.post_process(post_process.DropExpectation)
)
yield (api.test('one-repo-experiment-aggressive') +
api.runtime(is_experimental=True) + api.properties(
git_cache_updater_pb.Inputs(
override_bucket='experimental-gs-bucket',
repo_urls=['https://chromium.googlesource.com/v8/v8'],
gc_aggressive=True,
)))
yield (api.test('one-repo-empty') + api.runtime(is_experimental=True) +
api.properties(
git_cache_updater_pb.Inputs(
override_bucket='experimental-gs-bucket',
repo_urls=['https://chromium.googlesource.com/empty'],
gc_aggressive=True,
)) + api.override_step_data(
'https://chromium.googlesource.com/empty.git count-objects',
api.raw_io.stream_output(api.git.count_objects_output(0)),
))
yield (api.test('one-repo-fail') + api.runtime(is_experimental=True) +
api.properties(
git_cache_updater_pb.Inputs(
override_bucket='experimental-gs-bucket',
repo_urls=['https://chromium.googlesource.com/fail'],
gc_aggressive=True,
)) + api.override_step_data(
'https://chromium.googlesource.com/fail.populate',
retcode=1,
)) + api.expect_status('FAILURE')
yield (
api.test('host-with-exclusions')
+ api.properties(git_cache_updater_pb.Inputs(
git_host=git_cache_updater_pb.Inputs.GitHost(
host='chromium.googlesource.com',
exclude_repos=[
'foo/.+',
'all-projects',
'all-users',
],
),
))
)
yield (
api.test('host-with-incorrect-regexp-exclude')
+ api.properties(git_cache_updater_pb.Inputs(
git_host=git_cache_updater_pb.Inputs.GitHost(
host='chromium.googlesource.com',
exclude_repos=[
'?.\\',
],
),
))
+ api.expect_status('FAILURE')
+ api.post_process(post_process.StatusFailure)
+ api.post_process(post_process.DropExpectation)
)