| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Updates the Git Cache zip files.""" |
| |
| import re |
| |
| from recipe_engine import post_process |
| from PB.recipe_engine import result as result_pb |
| from PB.go.chromium.org.luci.buildbucket.proto import common as bb_common_pb |
| |
| from PB.recipes.infra import git_cache_updater as git_cache_updater_pb |
| |
| |
| PYTHON_VERSION_COMPATIBILITY = "PY2+3" |
| |
| DEPS = [ |
| 'recipe_engine/buildbucket', |
| 'recipe_engine/context', |
| 'recipe_engine/file', |
| 'recipe_engine/raw_io', |
| 'recipe_engine/futures', |
| 'recipe_engine/path', |
| 'recipe_engine/properties', |
| 'recipe_engine/runtime', |
| 'recipe_engine/step', |
| 'recipe_engine/url', |
| |
| 'depot_tools/depot_tools', |
| 'depot_tools/git', |
| ] |
| |
| PROPERTIES = git_cache_updater_pb.Inputs |
| |
| OK, EMPTY = range(2) |
| |
| CONCURRENT_STEPS = 1 |
| |
| |
| def _list_host_repos(api, host_url): |
| host_url = host_url.rstrip('/') |
| with api.depot_tools.on_path(): |
| output = api.url.get_text('%s/?format=TEXT' % host_url, |
| default_test_data=TEST_REPOS).output |
| return output.strip().splitlines() |
| |
| |
| def _repos_to_urls(host_url, repos): |
| host_url = host_url.rstrip('/') |
| return ['%s/%s' % (host_url, repo) for repo in repos] |
| |
| |
| class _InvalidInput(Exception): |
| pass |
| |
| |
| def _get_repo_urls(api, inputs): |
| if inputs.git_host.host: |
| assert not inputs.repo_urls, 'only 1 of (git_host, repo_urls) allowed' |
| repos = _list_host_repos(api, 'https://' + inputs.git_host.host) |
| if inputs.git_host.exclude_repos: |
| exclude_regexps = [] |
| for i, r in enumerate(inputs.git_host.exclude_repos): |
| try: |
| exclude_regexps.append(re.compile('^' + r + '$', re.IGNORECASE)) |
| except Exception as e: |
| raise _InvalidInput( |
| 'invalid regular expression[%d] %r: %s' % (i, r, e)) |
| repos = [repo for repo in repos |
| if all(not r.match(repo) for r in exclude_regexps)] |
| return _repos_to_urls('https://' + inputs.git_host.host, repos) |
| |
| if inputs.repo_urls: |
| return list(inputs.repo_urls) |
| |
| raise _InvalidInput('repo_urls or git_host.host must be provided') |
| |
| |
| def _do_update_bootstrap(api, url, work_dir, gc_aggressive): |
| opts = [ |
| '--cache-dir', work_dir, |
| '--verbose', |
| url, |
| ] |
| |
| with api.step.nest(url) as summary: |
| api.step( |
| name='populate', |
| cmd=[ |
| 'git_cache.py', |
| 'populate', |
| '--reset-fetch-config', |
| |
| # By default, "refs/heads/*" are checked out by |
| # git_cache. However, for heavy branching repos, |
| # 'refs/branch-heads/*' is also very useful (crbug/942169). |
| # This is a noop for repos without refs/branch-heads. |
| '--ref', |
| 'refs/branch-heads/*', |
| |
| # By default, any tags that point to objects we fetch |
| # from remote are also fetched. This ensures ALL tags are |
| # downloaded from remote. |
| # This is needed by chromeos builders. |
| '--ref', |
| 'refs/tags/*', |
| '--break-locks', |
| ] + opts, |
| cost=api.step.ResourceCost(disk=20)) |
| |
| repo_path = api.path.abs_to_path( |
| api.step( |
| name='lookup repo_path', |
| cmd=['git_cache.py', 'exists'] + opts, |
| stdout=api.raw_io.output(), |
| step_test_data=lambda: api.raw_io.test_api.stream_output( |
| api.path.join(work_dir, url.strip('https://')) + '\n',), |
| ).stdout.decode('utf-8').strip()) |
| |
| with api.context(cwd=repo_path): |
| stats = api.git.count_objects( |
| raise_on_failure=True, |
| # TODO(iannucci): ugh, the test mock for this is horrendous. |
| # 1) it should default to something automatically |
| # 2) test_api.count_objects_output should return a TestData, not |
| # a string. |
| step_test_data=lambda: api.raw_io.test_api.stream_output( |
| api.git.test_api.count_objects_output(10))) |
| |
| # Scale the memory cost of this update by size-pack squared. This is |
| # an arbitrary scaling factor, but it allows multiple small repos to run |
| # in parallel but allows large repos (e.g. chromium) to exclusively use |
| # all the memory on the system. |
| mem_cost = 4 * int((stats['size'] + stats['size-pack']) ** 2) |
| if mem_cost == 0: |
| # some repos can be empty (e.g. they're an "ACL-only" repo), and |
| # update-bootstrap doesn't like that, so skip them. |
| api.step('repo is empty; skipping update', cmd=None) |
| summary.step_text = "[empty]" |
| summary.status = api.step.FAILURE # TODO(iannucci): warning |
| return EMPTY |
| |
| gc_aggressive_opt = [] |
| if gc_aggressive: |
| gc_aggressive_opt = ['--gc-aggressive'] |
| |
| api.step( |
| name='update bootstrap', |
| cmd=[ |
| 'git_cache.py', 'update-bootstrap', |
| '--skip-populate', '--prune', |
| ] + opts + gc_aggressive_opt, |
| cost=api.step.ResourceCost( |
| cpu=api.step.CPU_CORE*2, |
| memory=mem_cost, |
| net=10, |
| )) |
| |
| summary.step_text = "[ok]" |
| return OK |
| |
| |
| def RunSteps(api, inputs): |
| try: |
| repo_urls = _get_repo_urls(api, inputs) |
| except _InvalidInput as e: |
| return result_pb.RawResult( |
| status=bb_common_pb.FAILURE, summary_markdown=str(e)) |
| |
| work_dir = api.path.cache_dir.joinpath('builder', 'w') |
| api.file.ensure_directory('ensure work_dir', work_dir) |
| |
| env = { |
| # Turn off the low speed limit, since checkout will be long. |
| 'GIT_HTTP_LOW_SPEED_LIMIT': '0', |
| 'GIT_HTTP_LOW_SPEED_TIME': '0', |
| # Ensure git-number tool can be used. |
| 'CHROME_HEADLESS': '1', |
| } |
| if api.runtime.is_experimental: |
| assert inputs.override_bucket, 'override_bucket required for experiments' |
| if inputs.override_bucket: |
| env['OVERRIDE_BOOTSTRAP_BUCKET'] = inputs.override_bucket |
| |
| work = [] |
| sem = api.futures.make_bounded_semaphore(CONCURRENT_STEPS) |
| |
| def fn(sem, api, url, work_dir, gc_aggressive): |
| with sem: |
| return _do_update_bootstrap(api, url, work_dir, gc_aggressive) |
| |
| with api.context(env=env), api.depot_tools.on_path(): |
| for url in sorted(repo_urls): |
| work.append( |
| api.futures.spawn_immediate( |
| fn, sem, api, url, work_dir, inputs.gc_aggressive, __name=url)) |
| |
| total = len(work) |
| success = warning = 0 |
| failed_repos = [] |
| empties = 0 |
| for future in api.futures.iwait(work): |
| try: |
| status = future.result() |
| except Exception: # pylint: disable=broad-except |
| failed_repos.append(future.name) |
| continue |
| |
| if status == OK: |
| success += 1 |
| elif status == EMPTY: |
| empties += 1 |
| warning += 1 |
| else: |
| assert False, 'unknown status %r' % (status,) # pragma: no cover |
| |
| status = bb_common_pb.FAILURE if failed_repos else bb_common_pb.SUCCESS |
| summary = 'Updated cache for %d/%d repos.' % (success, total) |
| if warning: |
| summary += '\n\nEncountered warnings for %d repos:\n' % (warning,) |
| if empties: |
| summary += '\n * empty (repo has no objects): %d' % (empties,) |
| if failed_repos: |
| summary += '\n\nEncountered failures for %d repos:\n' % (len(failed_repos),) |
| for repo_name in failed_repos: |
| summary += '\n * ' + repo_name |
| |
| return result_pb.RawResult(status=status, summary_markdown=summary) |
| |
| |
| TEST_REPOS = """ |
| All-Projects |
| All-Users |
| apps |
| chromium/src |
| foo/bar |
| """ |
| |
| |
| def GenTests(api): |
| yield ( |
| api.test('needs input') |
| + api.expect_status('FAILURE') |
| + api.post_process(post_process.StatusFailure) |
| + api.post_process(post_process.DropExpectation) |
| ) |
| |
| yield (api.test('one-repo-experiment-aggressive') + |
| api.runtime(is_experimental=True) + api.properties( |
| git_cache_updater_pb.Inputs( |
| override_bucket='experimental-gs-bucket', |
| repo_urls=['https://chromium.googlesource.com/v8/v8'], |
| gc_aggressive=True, |
| ))) |
| |
| yield (api.test('one-repo-empty') + api.runtime(is_experimental=True) + |
| api.properties( |
| git_cache_updater_pb.Inputs( |
| override_bucket='experimental-gs-bucket', |
| repo_urls=['https://chromium.googlesource.com/empty'], |
| gc_aggressive=True, |
| )) + api.override_step_data( |
| 'https://chromium.googlesource.com/empty.git count-objects', |
| api.raw_io.stream_output(api.git.count_objects_output(0)), |
| )) |
| |
| yield (api.test('one-repo-fail') + api.runtime(is_experimental=True) + |
| api.properties( |
| git_cache_updater_pb.Inputs( |
| override_bucket='experimental-gs-bucket', |
| repo_urls=['https://chromium.googlesource.com/fail'], |
| gc_aggressive=True, |
| )) + api.override_step_data( |
| 'https://chromium.googlesource.com/fail.populate', |
| retcode=1, |
| )) + api.expect_status('FAILURE') |
| |
| |
| yield ( |
| api.test('host-with-exclusions') |
| + api.properties(git_cache_updater_pb.Inputs( |
| git_host=git_cache_updater_pb.Inputs.GitHost( |
| host='chromium.googlesource.com', |
| exclude_repos=[ |
| 'foo/.+', |
| 'all-projects', |
| 'all-users', |
| ], |
| ), |
| )) |
| ) |
| |
| yield ( |
| api.test('host-with-incorrect-regexp-exclude') |
| + api.properties(git_cache_updater_pb.Inputs( |
| git_host=git_cache_updater_pb.Inputs.GitHost( |
| host='chromium.googlesource.com', |
| exclude_repos=[ |
| '?.\\', |
| ], |
| ), |
| )) |
| + api.expect_status('FAILURE') |
| + api.post_process(post_process.StatusFailure) |
| + api.post_process(post_process.DropExpectation) |
| ) |