blob: 2c4f293229290287940eb8cd0a3124e1929d1f76 [file] [log] [blame]
#!/usr/bin/python3
'''
Bundle files for uploading to ClusterFuzz.
Usage:
bundle.py OUTPUT_FILE.tgz [--build-dir=BUILD_DIR]
The output file will be a .tgz file.
if a build directory is provided, we will look under there to find bin/wasm-opt
and lib/libbinaryen.so. A useful place to get builds from is the Emscripten SDK,
as you can do
./emsdk install tot
after which ./upstream/ (from the emsdk dir) will contain builds of wasm-opt and
libbinaryen.so (that are designed to run on as many systems as possible, by not
depending on newer libc symbols, etc., as opposed to a normal local build).
Thus, the full workflow could be
cd emsdk
./emsdk install tot
cd ../binaryen
python3 scripts/bundle_clusterfuzz.py binaryen_wasm_fuzzer.tgz --build-dir=../emsdk/upstream
When using --build-dir in this way, you are responsible for ensuring that the
wasm-opt in the build dir is compatible with the scripts in the current dir
(e.g., if run.py here passes a flag that is only in a new/older version of
wasm-opt, a problem can happen).
Before uploading to ClusterFuzz, it is worth doing the following:
1. Run the local fuzzer (scripts/fuzz_opt.py). That includes a ClusterFuzz
testcase handler, which simulates what ClusterFuzz does.
2. Run the unit tests, which include smoke tests for our ClusterFuzz support:
python -m unittest test/unit/test_cluster_fuzz.py
Look at the logs, which will contain statistics on the wasm files the
fuzzer emits, and see that they look reasonable.
You should run the unit tests on the bundle you are about to upload, by
setting the proper env var like this (using the same filename as above):
BINARYEN_CLUSTER_FUZZ_BUNDLE=`pwd`/binaryen_wasm_fuzzer.tgz python -m unittest test/unit/test_cluster_fuzz.py
Note that you must pass an absolute filename (e.g. using pwd as shown).
The unittest logs should reflect that that bundle is being used at the
very start ("Using existing bundle: ..." rather than "Making a new
bundle"). Note that some of the unittests also create their own bundles, to
test the bundling script itself, so later down you will see logging of
bundle creation even if you provide a bundle.
After uploading to ClusterFuzz, you can wait a while for it to run, and then:
1. Inspect the log to see that we generate all the testcases properly, and
their sizes look reasonably random, etc.
2. Inspect the sample testcase and run it locally, to see that
d8 --wasm-staging testcase.js
properly runs the testcase, emitting logging etc.
3. Check the stats and crashes page (known crashes should at least be showing
up). Note that these may take longer to show up than 1 and 2.
'''
import glob
import os
import subprocess
import sys
import tarfile
# Read the filenames first, as importing |shared| changes the directory.
output_file = os.path.abspath(sys.argv[1])
print(f'Bundling to: {output_file}')
assert output_file.endswith('.tgz'), 'Can only generate a .tgz'
build_dir = None
if len(sys.argv) >= 3:
assert sys.argv[2].startswith('--build-dir=')
build_dir = sys.argv[2].split('=')[1]
build_dir = os.path.abspath(build_dir)
# Delete the argument, as importing |shared| scans it.
sys.argv.pop()
from test import fuzzing # noqa
from test import shared # noqa
from test import support # noqa
# Pick where to get the builds
if build_dir:
binaryen_bin = os.path.join(build_dir, 'bin')
binaryen_lib = os.path.join(build_dir, 'lib')
else:
binaryen_bin = shared.options.binaryen_bin
binaryen_lib = shared.options.binaryen_lib
# ClusterFuzz's run.py uses these features. Keep this in sync with that, so that
# we only bundle initial content that makes sense for it.
features = [
'-all',
'--disable-shared-everything',
'--disable-fp16',
'--disable-strings',
'--disable-stack-switching',
]
with tarfile.open(output_file, "w:gz") as tar:
# run.py
run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py')
print(f' .. run: {run}')
tar.add(run, arcname='run.py')
# fuzz_shell.js
fuzz_shell = os.path.join(shared.options.binaryen_root, 'scripts', 'fuzz_shell.js')
print(f' .. fuzz_shell: {fuzz_shell}')
tar.add(fuzz_shell, arcname='scripts/fuzz_shell.js')
# wasm-opt binary
wasm_opt = os.path.join(binaryen_bin, 'wasm-opt')
print(f' .. wasm-opt: {wasm_opt}')
tar.add(wasm_opt, arcname='bin/wasm-opt')
# For a dynamic build we also need libbinaryen.so and possibly other files.
# Try both .so and .dylib suffixes for more OS coverage.
for suffix in ['.so', '.dylib']:
libbinaryen = os.path.join(binaryen_lib, f'libbinaryen{suffix}')
if os.path.exists(libbinaryen):
print(f' .. libbinaryen: {libbinaryen}')
tar.add(libbinaryen, arcname=f'lib/libbinaryen{suffix}')
# The emsdk build also includes some more necessary files.
for lib in ['libc++', 'libmimalloc']:
# Include the main name plus any NAME.2.0 and such.
# TODO: Using ldd/otool would be better, to find the actual
# dependencies of libbinaryen. Using glob like this will
# pick up stale contents in the directory.
full_lib = os.path.join(binaryen_lib, lib) + suffix
for path in glob.glob(f'{full_lib}*'):
print(f' ............. : {path}')
tar.add(path, arcname=f'lib/{os.path.basename(path)}')
# Add tests we will use as initial content under initial/. We put all the
# tests from the test suite there.
print(' .. initial content: ')
temp_wasm = 'temp.wasm'
index = 0
all_tests = shared.get_all_tests()
for i, test in enumerate(all_tests):
if not fuzzing.is_fuzzable(test):
continue
for wast, asserts in support.split_wast(test):
if not wast:
continue
support.write_wast(temp_wasm, wast)
# If the file is not valid for our features, skip it. In the same
# operation, also convert to binary if this was text (binary is more
# compact).
cmd = shared.WASM_OPT + ['-q', temp_wasm, '-o', temp_wasm] + features
if subprocess.run(cmd, stderr=subprocess.PIPE).returncode:
continue
# Looks good.
tar.add(temp_wasm, arcname=f'initial/{index}.wasm')
index += 1
print(f'\r {100 * i / len(all_tests):.2f}%', end='', flush=True)
print(f' (num: {index})')
# Write initial/num.txt which contains the number of testcases in that
# directory (saves run.py from needing to listdir each time).
num_txt = 'num.txt'
with open(num_txt, 'w') as f:
f.write(f'{index}')
tar.add(num_txt, arcname='initial/num.txt')
print('Done.')
print('To run the tests on this bundle, do:')
print()
print(f'BINARYEN_CLUSTER_FUZZ_BUNDLE={output_file} python -m unittest test/unit/test_cluster_fuzz.py')
print()