blob: a8ac4b0733a884a0611aa2462ababa64460f72e3 [file] [log] [blame] [edit]
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Creates an server to offload non-critical-path GN targets."""
from __future__ import annotations
import argparse
import json
import os
import queue
import shutil
import socket
import subprocess
import sys
import threading
from typing import Callable, Dict, List, Optional, Tuple
sys.path.append(os.path.join(os.path.dirname(__file__), 'gyp'))
from util import server_utils
def log(msg: str, *, end: str = ''):
# Shrink the message (leaving a 2-char prefix and use the rest of the room
# for the suffix) according to terminal size so it is always one line.
width = shutil.get_terminal_size().columns
prefix = f'[{TaskStats.prefix()}] '
max_msg_width = width - len(prefix)
if len(msg) > max_msg_width:
length_to_show = max_msg_width - 5 # Account for ellipsis and header.
msg = f'{msg[:2]}...{msg[-length_to_show:]}'
# \r to return the carriage to the beginning of line.
# \033[K to replace the normal \n to erase until the end of the line.
# Avoid the default line ending so the next \r overwrites the same line just
# like ninja's output.
print(f'\r{prefix}{msg}\033[K', end=end, flush=True)
class TaskStats:
"""Class to keep track of aggregate stats for all tasks across threads."""
_num_processes = 0
_completed_tasks = 0
_total_tasks = 0
_lock = threading.Lock()
@classmethod
def no_running_processes(cls):
return cls._num_processes == 0
@classmethod
def add_task(cls):
# Only the main thread calls this, so there is no need for locking.
cls._total_tasks += 1
@classmethod
def add_process(cls):
with cls._lock:
cls._num_processes += 1
@classmethod
def remove_process(cls):
with cls._lock:
cls._num_processes -= 1
@classmethod
def complete_task(cls):
with cls._lock:
cls._completed_tasks += 1
@classmethod
def prefix(cls):
# Ninja's prefix is: [205 processes, 6/734 @ 6.5/s : 0.922s ]
# Time taken and task completion rate are not important for the build server
# since it is always running in the background and uses idle priority for
# its tasks.
with cls._lock:
word = 'process' if cls._num_processes == 1 else 'processes'
return (f'{cls._num_processes} {word}, '
f'{cls._completed_tasks}/{cls._total_tasks}')
class TaskManager:
"""Class to encapsulate a threadsafe queue and handle deactivating it."""
def __init__(self):
self._queue: queue.SimpleQueue[Task] = queue.SimpleQueue()
self._deactivated = False
def add_task(self, task: Task):
assert not self._deactivated
TaskStats.add_task()
self._queue.put(task)
log(f'QUEUED {task.name}')
self._maybe_start_tasks()
def deactivate(self):
self._deactivated = True
while not self._queue.empty():
try:
task = self._queue.get_nowait()
except queue.Empty:
return
task.terminate()
@staticmethod
def _num_running_processes():
with open('/proc/stat') as f:
for line in f:
if line.startswith('procs_running'):
return int(line.rstrip().split()[1])
assert False, 'Could not read /proc/stat'
return 0
def _maybe_start_tasks(self):
if self._deactivated:
return
# Include load avg so that a small dip in the number of currently running
# processes will not cause new tasks to be started while the overall load is
# heavy.
cur_load = max(self._num_running_processes(), os.getloadavg()[0])
num_started = 0
# Always start a task if we don't have any running, so that all tasks are
# eventually finished. Try starting up tasks when the overall load is light.
# Limit to at most 2 new tasks to prevent ramping up too fast. There is a
# chance where multiple threads call _maybe_start_tasks and each gets to
# spawn up to 2 new tasks, but since the only downside is some build tasks
# get worked on earlier rather than later, it is not worth mitigating.
while num_started < 2 and (TaskStats.no_running_processes()
or num_started + cur_load < os.cpu_count()):
try:
next_task = self._queue.get_nowait()
except queue.Empty:
return
num_started += next_task.start(self._maybe_start_tasks)
# TODO(wnwen): Break this into Request (encapsulating what ninja sends) and Task
# when a Request starts to be run. This would eliminate ambiguity
# about when and whether _proc/_thread are initialized.
class Task:
"""Class to represent one task and operations on it."""
def __init__(self, name: str, cwd: str, cmd: List[str], stamp_file: str):
self.name = name
self.cwd = cwd
self.cmd = cmd
self.stamp_file = stamp_file
self._terminated = False
self._replaced = False
self._lock = threading.Lock()
self._proc: Optional[subprocess.Popen] = None
self._thread: Optional[threading.Thread] = None
self._return_code: Optional[int] = None
@property
def key(self):
return (self.cwd, self.name)
def start(self, on_complete_callback: Callable[[], None]) -> int:
"""Starts the task if it has not already been terminated.
Returns the number of processes that have been started. This is called at
most once when the task is popped off the task queue."""
# The environment variable forces the script to actually run in order to
# avoid infinite recursion.
env = os.environ.copy()
env[server_utils.BUILD_SERVER_ENV_VARIABLE] = '1'
with self._lock:
if self._terminated:
return 0
# Use os.nice(19) to ensure the lowest priority (idle) for these analysis
# tasks since we want to avoid slowing down the actual build.
# TODO(wnwen): Use ionice to reduce resource consumption.
TaskStats.add_process()
log(f'STARTING {self.name}')
# This use of preexec_fn is sufficiently simple, just one os.nice call.
# pylint: disable=subprocess-popen-preexec-fn
self._proc = subprocess.Popen(
self.cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=self.cwd,
env=env,
text=True,
preexec_fn=lambda: os.nice(19),
)
self._thread = threading.Thread(
target=self._complete_when_process_finishes,
args=(on_complete_callback, ))
self._thread.start()
return 1
def terminate(self, replaced=False):
"""Can be called multiple times to cancel and ignore the task's output."""
with self._lock:
if self._terminated:
return
self._terminated = True
self._replaced = replaced
# It is safe to access _proc and _thread outside of _lock since they are
# only changed by self.start holding _lock when self._terminate is false.
# Since we have just set self._terminate to true inside of _lock, we know
# that neither _proc nor _thread will be changed from this point onwards.
if self._proc:
self._proc.terminate()
self._proc.wait()
# Ensure that self._complete is called either by the thread or by us.
if self._thread:
self._thread.join()
else:
self._complete()
def _complete_when_process_finishes(self,
on_complete_callback: Callable[[], None]):
assert self._proc
# We know Popen.communicate will return a str and not a byte since it is
# constructed with text=True.
stdout: str = self._proc.communicate()[0]
self._return_code = self._proc.returncode
TaskStats.remove_process()
self._complete(stdout)
on_complete_callback()
def _complete(self, stdout: str = ''):
"""Update the user and ninja after the task has run or been terminated.
This method should only be run once per task. Avoid modifying the task so
that this method does not need locking."""
TaskStats.complete_task()
delete_stamp = False
if self._terminated:
log(f'TERMINATED {self.name}')
# When tasks are replaced, avoid deleting the stamp file, context:
# https://issuetracker.google.com/301961827.
if not self._replaced:
delete_stamp = True
else:
log(f'FINISHED {self.name}')
if stdout or self._return_code != 0:
delete_stamp = True
# An extra new line is needed since we want to preserve the previous
# _log line. Use a single print so that it is threadsafe.
# TODO(wnwen): Improve stdout display by parsing over it and moving the
# actual error to the bottom. Otherwise long command lines
# in the Traceback section obscure the actual error(s).
print('\n' + '\n'.join([
f'FAILED: {self.name}',
f'Return code: {self._return_code}',
' '.join(self.cmd),
stdout,
]))
if delete_stamp:
# Force ninja to consider failed targets as dirty.
try:
os.unlink(os.path.join(self.cwd, self.stamp_file))
except FileNotFoundError:
pass
else:
# Ninja will rebuild targets when their inputs change even if their stamp
# file has a later modified time. Thus we do not need to worry about the
# script being run by the build server updating the mtime incorrectly.
pass
def _listen_for_request_data(sock: socket.socket):
while True:
conn = sock.accept()[0]
received = []
with conn:
while True:
data = conn.recv(4096)
if not data:
break
received.append(data)
if received:
yield json.loads(b''.join(received))
def _process_requests(sock: socket.socket):
# Since dicts in python can contain anything, explicitly type tasks to help
# make static type checking more useful.
tasks: Dict[Tuple[str, str], Task] = {}
task_manager = TaskManager()
try:
log('READY... Remember to set android_static_analysis="build_server" in '
'args.gn files')
for data in _listen_for_request_data(sock):
task = Task(name=data['name'],
cwd=data['cwd'],
cmd=data['cmd'],
stamp_file=data['stamp_file'])
existing_task = tasks.get(task.key)
if existing_task:
existing_task.terminate(replaced=True)
tasks[task.key] = task
task_manager.add_task(task)
except KeyboardInterrupt:
log('STOPPING SERVER...', end='\n')
# Gracefully shut down the task manager, terminating all queued tasks.
task_manager.deactivate()
# Terminate all currently running tasks.
for task in tasks.values():
task.terminate()
log('STOPPED', end='\n')
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--fail-if-not-running',
action='store_true',
help='Used by GN to fail fast if the build server is not running.')
args = parser.parse_args()
if args.fail_if_not_running:
with socket.socket(socket.AF_UNIX) as sock:
try:
sock.connect(server_utils.SOCKET_ADDRESS)
except socket.error:
print('Build server is not running and '
'android_static_analysis="build_server" is set.\nPlease run '
'this command in a separate terminal:\n\n'
'$ build/android/fast_local_dev_server.py\n')
return 1
else:
return 0
with socket.socket(socket.AF_UNIX) as sock:
sock.bind(server_utils.SOCKET_ADDRESS)
sock.listen()
_process_requests(sock)
return 0
if __name__ == '__main__':
sys.exit(main())