blob: aa17649179a693dbbe28d6c1c49702151b5745c0 [file] [log] [blame]
# Copyright (c) 2013 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import collections
import datetime
import email.mime.text
import functools
import getpass
import os
import re
import smtplib
import subprocess
import sys
import tempfile
from dateutil import parser as dateutilparser
sys.stdout.write("Please `apt-get install python-dateutil`: "
"Python's datetime packages don't handle timezones.")
BUILD_DIR = os.path.dirname(__file__)
NACL_DIR = os.path.dirname(BUILD_DIR)
TOOLCHAIN_REV_DIR = os.path.join(NACL_DIR, 'toolchain_revisions')
PKG_VER = os.path.join(BUILD_DIR, 'package_version', '')
def ParseArgs(args):
parser = argparse.ArgumentParser(
description="""Update pnacl_newlib.json PNaCl version.
LLVM and other projects are checked-in to the NaCl repository, but their head
isn't necessarily the one that we currently use in PNaCl. The pnacl_newlib.json
and pnacl_translator.json files point at git revisions to use for tools such as
LLVM. Our build process then downloads pre-built tool tarballs from the
toolchain build waterfall.
git repository before running this script:
| |
v |
...----A------B------C------D------ NaCl HEAD
^ ^ ^ ^
| | | |__ Latest pnacl_{newlib,translator}.json update.
| | |
| | |__ A newer LLVM change (LLVM repository HEAD).
| |
| |__ Oldest LLVM change since this PNaCl version.
|__ pnacl_{newlib,translator}.json points at an older LLVM change.
git repository after running this script:
| |
v |
...----A------B------C------D------E------ NaCl HEAD
Note that there could be any number of non-PNaCl changes between each of these
changelists, and that the user can also decide to update the pointer to B
instead of C.
There is further complication when toolchain builds are merged.
parser.add_argument('--email', metavar='ADDRESS', type=str,
help="Email address to send errors to.")
parser.add_argument('--hash', metavar='HASH',
help="Update to a specific git hash instead of the most "
"recent git hash with a PNaCl change. This value must "
"be more recent than the one in the current "
"pnacl_newlib.json. This option is useful when multiple "
"changelists' toolchain builds were merged, or when "
"too many PNaCl changes would be pulled in at the "
"same time.")
parser.add_argument('-n', '--dry-run', default=False, action='store_true',
help="Print the changelist that would be sent, but "
"don't actually send anything to review.")
parser.add_argument('--ignore-branch', default=False, action='store_true',
help='Allow script to run from branches other than '
'master or main')
parser.add_argument('--saigo', default=False, action='store_true',
help='Update the Saigo toolchain instead of PNaCl.')
# TODO(jfb) The following options come from and
# should be shared in some way.
parser.add_argument('--filter_out_predicates', default=[],
help="Toolchains to filter out.")
return parser.parse_args()
def ExecCommand(command):
return subprocess.check_output(command, stderr=subprocess.STDOUT, text=True)
except subprocess.CalledProcessError as e:
sys.stderr.write('\nRunning `%s` returned %i, got:\n%s\n' %
(' '.join(e.cmd), e.returncode, e.output))
def GetCurrentRevision(package):
return ExecCommand([sys.executable, PKG_VER,
'--revision-package', package]).strip()
def SetCurrentRevision(hash, package):
ExecCommand([sys.executable, PKG_VER,
'--revision-set', package,
'--revision', hash])
def GetRevisionPackageFiles(package):
out = ExecCommand([sys.executable, PKG_VER,
'--revision-set', package])
package_list = [package.strip() for package in out.strip().split('\n')]
return [os.path.join(TOOLCHAIN_REV_DIR, '%s.json' % package)
for package in package_list]
def GitCurrentBranch():
return ExecCommand(['git', 'symbolic-ref', 'HEAD', '--short']).strip()
def GitRevParse(rev):
return ExecCommand(['git', 'rev-parse', rev]).strip()
def GitStatus():
"""List of statuses, one per path, of paths in the current git branch.
Ignores untracked paths."""
out = ExecCommand(['git', 'status', '--porcelain']).strip()
if not out:
return []
out = out.split('\n')
return [f.strip() for f in out if not re.match('^\?\? (.*)$', f.strip())]
def SyncSources():
ExecCommand(['gclient', 'sync'])
def GitCommitInfo(info='', obj=None, num=None, extra=[]):
"""Commit information, where info is one of the shorthands in git_formats.
obj can be a path or a hash.
num is the number of results to return.
extra is a list of optional extra arguments."""
# Shorthands for git's pretty formats.
# See PRETTY FORMATS format:<string> in `git help log`.
git_formats = {
'': '',
'hash': '%H',
'date': '%cI',
'author email': '%aE',
'subject': '%s',
'body': '%b',
cmd = ['git', 'log', '--format=format:%s' % git_formats[info]] + extra
if num: cmd += ['-n'+str(num)]
if obj: cmd += [obj]
return ExecCommand(cmd).strip()
def GitCommitsSince(date):
"""List of commit hashes since a particular date,
in reverse chronological order."""
return GitCommitInfo(info='hash',
extra=['--since="%s"' % date]).split('\n')
def GitFilesChanged(commit_hash):
"""List of files changed in a commit."""
return GitCommitInfo(obj=commit_hash, num=1,
def GitChangesPath(commit_hash, path):
"""Returns True if the commit changes a file under the given path."""
return any(['^' + path, f.strip()) for f in
def GitBranchExists(name):
return len(ExecCommand(['git', 'branch', '--list', name]).strip()) != 0
def GitCheckout(branch, force=False):
"""Checkout an existing branch.
force throws away local changes."""
ExecCommand(['git', 'checkout'] +
(['--force'] if force else []) +
def GitCheckoutNewBranch(branch):
"""Create and checkout a new git branch."""
ExecCommand(['git', 'checkout', '-b', branch, 'origin/main'])
def GitDeleteBranch(branch, force=False):
"""Force-delete a branch."""
ExecCommand(['git', 'branch', '-D' if force else '-d', branch])
def GitAdd(file):
ExecCommand(['git', 'add', file])
def GitCommit(message):
with tempfile.NamedTemporaryFile() as tmp:
ExecCommand(['git', 'commit', '--file=%s' %])
def UploadChanges():
"""Upload changes, don't prompt."""
# TODO(jfb) Using the commit queue and avoiding git try + manual commit
# would be much nicer. See '--use-commit-queue'
return ExecCommand(['git', 'cl', 'upload', '--send-mail', '-f'])
def GitTry():
return ExecCommand(['git', 'cl', 'try'])
def CommitMessageToCleanDict(commit_message):
"""Extract and clean commit message fields that follow the NaCl commit
message convention. Don't repeat them as-is, to avoid confusing our
res = {}
fields = [
['reviewers tbr', '\s*TBR=([^\n]+)', ''],
['reviewers', '\s*R=([^\n]+)', ''],
['review url', '\s*Review URL: *([^\n]+)', '<none>'],
['bug', '\s*BUG=([^\n]+)', '<none>'],
['test', '\s*TEST=([^\n]+)', '<none>'],
for key, regex, none in fields:
found =, commit_message)
if found:
commit_message = commit_message.replace(, '')
res[key] =
res[key] = none
res['body'] = commit_message.strip()
return res
def SendEmail(user_email, out):
if user_email:
sys.stderr.write('\nSending email to %s.\n' % user_email)
msg = email.mime.text.MIMEText(out)
msg['Subject'] = '[PNaCl revision updater] failure!'
msg['From'] = ''
msg['To'] = user_email
s = smtplib.SMTP('localhost')
s.sendmail(msg['From'], [msg['To']], msg.as_string())
sys.stderr.write('\nNo email address specified.')
def DryRun(out):
sys.stdout.write("DRY RUN: " + out + "\n")
def Done(out):
class CLInfo:
"""Changelist information: sorted dictionary of NaCl-standard fields."""
def __init__(self, desc):
self._desc = desc
self._vals = collections.OrderedDict([
('hash', None),
('author email', None),
('date', None),
('subject', None),
('commits since', None),
('bug', None),
('test', None),
('review url', None),
('reviewers tbr', None),
('reviewers', None),
('body', None),
def __getitem__(self, key):
return self._vals[key]
def __setitem__(self, key, val):
assert key in self._vals.keys()
self._vals[key] = str(val)
def __str__(self):
"""Changelist to string.
A short description of the change, e.g.:
1c0ffee: ( Subject of the change.
If the change is itself pulling in other changes from
sub-repositories then take its relevant description and append it to
the string. These sub-directory updates are also script-generated
and therefore have a predictable format. e.g.:
1c0ff33: ( Subject of the change.
| dead123: ( Other change in another repository.
| beef456: ( Yet another cross-repository change.
desc = (' ' + self._vals['hash'][:7] + ': (' +
self._vals['author email'] + ') ' +
if GitChangesPath(self._vals['hash'], 'pnacl/COMPONENT_REVISIONS'):
git_hash_abbrev = '[0-9a-fA-F]{7}'
email = '[^@)]+@[^)]+\.[^)]+'
desc = '\n'.join([desc] + [
' | ' + line for line in self._vals['body'].split('\n') if
re.match('^ *%s: \(%s\) .*$' % (git_hash_abbrev, email), line)])
return desc
def FmtOut(tr_points_at,
assert isinstance(err, list)
assert isinstance(msg, list)
old_git_hash = tr_points_at['hash']
changes = '\n'.join([str(cl) for cl in pnacl_changes])
bugs = '\n'.join(sorted(list(set(
['BUG= ' + cl['bug'].strip() if cl['bug'] else '' for
cl in pnacl_changes]) - set(['']))))
reviewers = ', '.join(sorted(list(set(
[r if '@' in r else r + '' for r in
[r.strip() for r in
cl['author email'] + ',' +
cl['reviewers tbr'] + ',' +
for cl in pnacl_changes])).split(',')]
if r != '']))))
toolchain_name = 'Saigo' if saigo else 'PNaCl'
return (('*** ERROR ***\n' if err else '') +
'\n\n'.join(err) +
'\n\n'.join(msg) +
('\n\n' if err or msg else '') +
('Update revision for %s\n\n'
'Update %s -> %s\n\n'
'Pull the following %s changes into NaCl:\n%s\n\n'
'R= %s\n'
'TEST=git cl try\n'
'(Please LGTM this change and tick the "commit" box)\n' %
(toolchain_name, old_git_hash, new_git_hash, toolchain_name, changes,
bugs, reviewers)))
def Main(args):
args = ParseArgs(args)
package = 'pnacl_newlib'
if args.saigo:
package = 'saigo_newlib'
new_pnacl_revision = args.hash
user_provided_hash = args.hash is not None
if user_provided_hash:
new_pnacl_revision = GitRevParse(new_pnacl_revision)
tr_points_at = CLInfo('revision update points at PNaCl version')
pnacl_changes = []
msg = []
orig_branch = GitCurrentBranch()
if not args.dry_run and not args.ignore_branch:
if orig_branch != 'master' and orig_branch != 'main':
raise Exception('Must be on branch master or main, currently on %s'
% orig_branch)
if not args.dry_run:
status = GitStatus()
if len(status) != 0:
raise Exception("Repository isn't clean:\n %s" % '\n '.join(status))
if not args.dry_run:
# The current revision file points at a specific PNaCl LLVM version. LLVM is
# checked-in to the NaCl repository, but its head isn't necessarily the one
# that we currently use in PNaCl.
tr_points_at['hash'] = GetCurrentRevision(package)
tr_points_at['date'] = GitCommitInfo(
info='date', obj=tr_points_at['hash'], num=1)
recent_commits = GitCommitsSince(tr_points_at['date'])
tr_points_at['commits since'] = len(recent_commits)
assert len(recent_commits) > 1
if not user_provided_hash:
# No update hash specified, take the latest commit.
new_pnacl_revision = recent_commits[0]
new_pnacl_revision_date = GitCommitInfo(
info='date', obj=new_pnacl_revision, num=1)
new_date = dateutilparser.parse(new_pnacl_revision_date)
old_date = dateutilparser.parse(tr_points_at['date'])
if new_date <= old_date:
Done(FmtOut(tr_points_at, pnacl_changes, new_pnacl_revision,
err=["Can't update to git hash %s committed on %s: "
"the current revision's hash %s "
"committed on %s is more recent." %
(new_pnacl_revision, new_pnacl_revision_date,
tr_points_at['hash'], tr_points_at['date'])],
# Find the commits changing PNaCl files that follow the previous PNaCl
# revision pointer.
pnacl_pathes = ['pnacl/', 'toolchain_build/']
pnacl_hashes = list(set(functools.reduce(
lambda acc, lst: acc + lst,
[[cl for cl in recent_commits[:-1] if
GitChangesPath(cl, path)] for
path in pnacl_pathes])))
for hash in pnacl_hashes:
cl = CLInfo('PNaCl change ' + hash)
cl['hash'] = hash
for i in ['author email', 'date', 'subject']:
cl[i] = GitCommitInfo(info=i, obj=hash, num=1)
for k,v in CommitMessageToCleanDict(
GitCommitInfo(info='body', obj=hash, num=1)).items():
cl[k] = v
# Hashes aren't ordered chronologically, make sure the changes are.
pnacl_changes.sort(key=lambda x: dateutilparser.parse(x['date']))
# Remove commits later than the current commit or the user-provided one.
cutoff_date = dateutilparser.parse(GitCommitInfo(
info='date', obj=new_pnacl_revision, num=1))
pnacl_changes = [cl for cl in pnacl_changes if
dateutilparser.parse(cl['date']) <= cutoff_date]
if len(pnacl_changes) == 0:
Done(FmtOut(tr_points_at, pnacl_changes, new_pnacl_revision,
msg=['No change since %s on %s.' %
(tr_points_at['hash'], tr_points_at['date'])],
if not user_provided_hash:
# Take the latest commit that touched PNaCl.
new_pnacl_revision = pnacl_changes[-1]['hash']
new_branch_name = 'pnacl-revision-update-to-%s' % new_pnacl_revision
if args.saigo:
new_branch_name = 'saigo-revision-update-to-%s' % new_pnacl_revision
if GitBranchExists(new_branch_name):
# TODO(jfb) Figure out if tryjobs succeeded, checkout the branch and land.
raise Exception("Branch %s already exists, the change hasn't "
"landed yet.\nPlease check trybots and land it "
"manually." % new_branch_name)
if args.dry_run:
DryRun("Would check out branch: " + new_branch_name)
DryRun("Would update toolchain revision to: %s" % new_pnacl_revision)
SetCurrentRevision(new_pnacl_revision, package)
for f in GetRevisionPackageFiles(package):
upload_res = UploadChanges()
msg += ['Upload result:\n%s' % upload_res]
try_res = GitTry()
msg += ['Try result:\n%s' % try_res]
GitCheckout(orig_branch, force=False)
GitCheckout(orig_branch, force=True)
except SystemExit as e:
# Normal exit.
except (BaseException, Exception) as e:
# Leave the branch around, if any was created: it'll prevent next
# runs of the cronjob from succeeding until the failure is fixed.
out = FmtOut(tr_points_at, pnacl_changes, new_pnacl_revision, msg=msg,
err=['Failed at %s: %s' % (, e)])
sys.stderr.write('%s\n' % e)
if not args.dry_run:
SendEmail(, out)
return 0
if __name__ == '__main__':