| import glob |
| import os |
| import platform |
| import re |
| import statistics |
| import subprocess |
| import sys |
| import tarfile |
| import tempfile |
| import unittest |
| |
| from scripts.test import shared |
| from . import utils |
| |
| |
| def get_build_dir(): |
| # wasm-opt is in the bin/ dir, and the build dir is one above it, |
| # and contains bin/ and lib/. |
| return os.path.dirname(os.path.dirname(shared.WASM_OPT[0])) |
| |
| |
| # Windows is not yet supported. |
| @unittest.skipIf(platform.system() == 'Windows', "showing class skipping") |
| class ClusterFuzz(utils.BinaryenTestCase): |
| @classmethod |
| def setUpClass(cls): |
| # Bundle up our ClusterFuzz package, and unbundle it to a directory. |
| # Keep the directory alive in a class var. |
| cls.temp_dir = tempfile.TemporaryDirectory() |
| cls.clusterfuzz_dir = cls.temp_dir.name |
| |
| bundle = os.environ.get('BINARYEN_CLUSTER_FUZZ_BUNDLE') |
| if bundle: |
| print(f'Using existing bundle: {bundle}') |
| else: |
| print('Making a new bundle') |
| bundle = os.path.join(cls.clusterfuzz_dir, 'bundle.tgz') |
| cmd = [shared.in_binaryen('scripts', 'bundle_clusterfuzz.py')] |
| cmd.append(bundle) |
| cmd.append(f'--build-dir={get_build_dir()}') |
| shared.run_process(cmd) |
| |
| print('Unpacking bundle') |
| tar = tarfile.open(bundle, "r:gz") |
| tar.extractall(path=cls.clusterfuzz_dir) |
| tar.close() |
| |
| print('Ready') |
| |
| # Test our bundler for ClusterFuzz. |
| def test_bundle(self): |
| # The bundle should contain certain files: |
| # 1. run.py, the main entry point. |
| self.assertTrue(os.path.exists(os.path.join(self.clusterfuzz_dir, 'run.py'))) |
| # 2. scripts/fuzz_shell.js, the js testcase shell |
| self.assertTrue(os.path.exists(os.path.join(self.clusterfuzz_dir, 'scripts', 'fuzz_shell.js'))) |
| # 3. bin/wasm-opt, the wasm-opt binary in a static build |
| wasm_opt = os.path.join(self.clusterfuzz_dir, 'bin', 'wasm-opt') |
| self.assertTrue(os.path.exists(wasm_opt)) |
| |
| # See that we can execute the bundled wasm-opt. It should be able to |
| # print out its version. |
| out = subprocess.check_output([wasm_opt, '--version'], text=True) |
| self.assertIn('wasm-opt version ', out) |
| |
| # Generate N testcases, using run.py from a temp dir, and outputting to a |
| # testcase dir. |
| def generate_testcases(self, N, testcase_dir): |
| proc = subprocess.run([sys.executable, |
| os.path.join(self.clusterfuzz_dir, 'run.py'), |
| f'--output_dir={testcase_dir}', |
| f'--no_of_files={N}'], |
| text=True, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE) |
| self.assertEqual(proc.returncode, 0) |
| |
| # We should have logged the creation of N testcases. |
| self.assertEqual(proc.stdout.count('Created testcase:'), N) |
| |
| # We should have actually created them. |
| for i in range(0, N + 2): |
| fuzz_file = os.path.join(testcase_dir, f'fuzz-binaryen-{i}.js') |
| flags_file = os.path.join(testcase_dir, f'flags-binaryen-{i}.js') |
| # We actually emit the range [1, N], so 0 or N+1 should not exist. |
| if i >= 1 and i <= N: |
| self.assertTrue(os.path.exists(fuzz_file)) |
| self.assertTrue(os.path.exists(flags_file)) |
| else: |
| self.assertTrue(not os.path.exists(fuzz_file)) |
| self.assertTrue(not os.path.exists(flags_file)) |
| |
| return proc |
| |
| # Test the bundled run.py script. |
| def test_run_py(self): |
| temp_dir = tempfile.TemporaryDirectory() |
| |
| N = 10 |
| proc = self.generate_testcases(N, temp_dir.name) |
| |
| # Run.py should report no errors or warnings to stderr, except from |
| # those we know are safe (we cannot test this in generate_testcases, |
| # because the caller could do something like set BINARYEN_PASS_DEBUG, |
| # which generates intentional stderr warnings). |
| SAFE_WARNINGS = [ |
| # When we randomly pick no passes to run, this is shown. |
| 'warning: no passes specified, not doing any work', |
| # MemoryPacking warns on some things. |
| 'warning: active memory segments have overlap, which prevents some optimizations.', |
| ] |
| stderr = proc.stderr |
| for safe in SAFE_WARNINGS: |
| stderr = stderr.replace(safe, '') |
| stderr = stderr.strip() |
| self.assertEqual(stderr, '') |
| |
| def test_fuzz_passes(self): |
| # We should see interesting passes being run in run.py. This is *NOT* a |
| # deterministic test, since the number of passes run is random (we just |
| # let run.py run normally, to simulate the real environment), so flakes |
| # are possible here. However, we do the check in a way that the |
| # statistical likelihood of a flake is insignificant. Specifically, we |
| # just check that we see a different number of passes run in two |
| # different invocations, which is enough to prove that we are running |
| # different passes each time. And the number of passes is on average |
| # over 100 here (10 testcases, and each runs 0-20 passes or so). |
| temp_dir = tempfile.TemporaryDirectory() |
| N = 10 |
| |
| # Try many times to see a different number, to make flakes even less |
| # likely. In the worst case if there were two possible numbers of |
| # passes run, with equal probability, then if we failed 100 iterations |
| # every second, we could go for billions of billions of years without a |
| # flake. (And, if there are only two numbers with *non*-equal |
| # probability then something is very wrong, and we'd like to see |
| # errors.) |
| seen_num_passes = set() |
| for i in range(100): |
| os.environ['BINARYEN_PASS_DEBUG'] = '1' |
| try: |
| proc = self.generate_testcases(N, temp_dir.name) |
| finally: |
| del os.environ['BINARYEN_PASS_DEBUG'] |
| |
| num_passes = proc.stderr.count('running pass') |
| print(f'num passes: {num_passes}') |
| seen_num_passes.add(num_passes) |
| if len(seen_num_passes) > 1: |
| return |
| raise Exception(f'We always only saw {seen_num_passes} passes run') |
| |
| def test_file_contents(self): |
| # As test_fuzz_passes, this is nondeterministic, but statistically it is |
| # almost impossible to get a flake here. |
| temp_dir = tempfile.TemporaryDirectory() |
| N = 100 |
| self.generate_testcases(N, temp_dir.name) |
| |
| # To check for interesting wasm file contents, we'll note how many |
| # struct.news appear (a signal that we are emitting WasmGC, and also a |
| # non-trivial number of them), the sizes of the wasm files, and the |
| # exports. |
| seen_struct_news = [] |
| seen_sizes = [] |
| seen_exports = [] |
| |
| # Second wasm files are also emitted sometimes. |
| seen_second_sizes = [] |
| |
| # The number of struct.news appears in the metrics report like this: |
| # |
| # StructNew : 18 |
| # |
| struct_news_regex = re.compile(r'StructNew\s+:\s+(\d+)') |
| |
| # The number of exports appears in the metrics report like this: |
| # |
| # [exports] : 1 |
| # |
| exports_regex = re.compile(r'\[exports\]\s+:\s+(\d+)') |
| |
| for i in range(1, N + 1): |
| fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js') |
| flags_file = os.path.join(temp_dir.name, f'flags-binaryen-{i}.js') |
| |
| # The flags file must contain --wasm-staging |
| with open(flags_file) as f: |
| self.assertEqual(f.read(), '--wasm-staging') |
| |
| # Extract the wasm file(s) from the JS. Make sure to not notice |
| # stale files. |
| for f in glob.glob('extracted*'): |
| os.unlink(f) |
| extractor = shared.in_binaryen('scripts', 'clusterfuzz', 'extract_wasms.py') |
| subprocess.check_call([sys.executable, extractor, fuzz_file, 'extracted']) |
| |
| # One wasm file must always exist, and must be valid. |
| binary_file = 'extracted.0.wasm' |
| assert os.path.exists(binary_file) |
| metrics = subprocess.check_output( |
| shared.WASM_OPT + ['-all', '--metrics', binary_file, '-q'], text=True) |
| |
| # Update with what we see. |
| struct_news = re.findall(struct_news_regex, metrics) |
| if not struct_news: |
| # No line is emitted when --metrics sees no struct.news. |
| struct_news = ['0'] |
| # Metrics should contain one line for StructNews. |
| self.assertEqual(len(struct_news), 1) |
| seen_struct_news.append(int(struct_news[0])) |
| |
| seen_sizes.append(os.path.getsize(binary_file)) |
| |
| exports = re.findall(exports_regex, metrics) |
| # Metrics should contain one line for exports. |
| self.assertEqual(len(exports), 1) |
| seen_exports.append(int(exports[0])) |
| |
| # Sometimes a second wasm file should exist, and it must be valid |
| # too. |
| second_binary_file = 'extracted.1.wasm' |
| if os.path.exists(second_binary_file): |
| subprocess.check_call( |
| shared.WASM_OPT + ['-all', second_binary_file, '-q']) |
| |
| # Note its size (we leave detailed metrics for the first one; |
| # they are generated by the same logic in run.py, so just |
| # verifying some valid second wasms are emitted, of random |
| # sizes, is enough). |
| seen_second_sizes.append(os.path.getsize(second_binary_file)) |
| |
| print() |
| |
| print('struct.news are distributed as ~ mean 15, stddev 24, median 10') |
| # Given that, with 100 samples we are incredibly likely to see an |
| # interesting number at least once. It is also incredibly unlikely for |
| # the stdev to be zero. |
| print(f'mean struct.news: {statistics.mean(seen_struct_news)}') |
| print(f'stdev struct.news: {statistics.stdev(seen_struct_news)}') |
| print(f'median struct.news: {statistics.median(seen_struct_news)}') |
| self.assertGreaterEqual(max(seen_struct_news), 10) |
| self.assertGreater(statistics.stdev(seen_struct_news), 0) |
| |
| print() |
| |
| print('sizes are distributed as ~ mean 2933, stddev 2011, median 2510') |
| print(f'mean sizes: {statistics.mean(seen_sizes)}') |
| print(f'stdev sizes: {statistics.stdev(seen_sizes)}') |
| print(f'median sizes: {statistics.median(seen_sizes)}') |
| self.assertGreaterEqual(max(seen_sizes), 1000) |
| self.assertGreater(statistics.stdev(seen_sizes), 0) |
| |
| print() |
| |
| print('exports are distributed as ~ mean 9, stddev 6, median 8') |
| print(f'mean exports: {statistics.mean(seen_exports)}') |
| print(f'stdev exports: {statistics.stdev(seen_exports)}') |
| print(f'median exports: {statistics.median(seen_exports)}') |
| self.assertGreaterEqual(max(seen_exports), 8) |
| self.assertGreater(statistics.stdev(seen_exports), 0) |
| |
| print() |
| |
| # Second files appear in ~ 1/3 of testcases. |
| print('number of second wasms should be around 33 +- 8') |
| print(f'number of second wasms: {len(seen_second_sizes)}') |
| assert seen_second_sizes, 'must see at least one second wasm' |
| print('second sizes are distributed as ~ mean 2933, stddev 2011, median 2510') |
| print(f'mean sizes: {statistics.mean(seen_second_sizes)}') |
| print(f'stdev sizes: {statistics.stdev(seen_second_sizes)}') |
| print(f'median sizes: {statistics.median(seen_second_sizes)}') |
| # Relax the assert on the max seen second size compared to the max seen |
| # primary size, as we see fewer of these. 500 is still proof of an |
| # interesting wasm file. |
| self.assertGreaterEqual(max(seen_second_sizes), 500) |
| self.assertGreater(statistics.stdev(seen_second_sizes), 0) |
| |
| print() |
| |
| # To check for interesting JS file contents, we'll note how many times |
| # we build and run the wasm, and other things like JSPI. |
| seen_builds = [] |
| seen_calls = [] |
| seen_second_builds = [] |
| seen_JSPIs = [] |
| seen_initial_contents = [] |
| |
| # Initial contents are noted in comments like this: |
| # |
| # /* using initial content 42.wasm */ |
| # |
| # Note that we may see more than one in a file, as we may have more than |
| # one wasm in each testcase: each wasm has a chance. |
| initial_content_regex = re.compile(r'[/][*] using initial content ([^ ]+) [*][/]') |
| |
| # Some calls to callExports come with a random seed, so we have either |
| # |
| # callExports(); |
| # callExports(123456); |
| # |
| call_exports_regex = re.compile(r'callExports[(](\d*)[)]') |
| |
| for i in range(1, N + 1): |
| fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js') |
| with open(fuzz_file) as f: |
| js = f.read() |
| seen_builds.append(js.count('build(binary);')) |
| seen_calls.append(re.findall(call_exports_regex, js)) |
| seen_second_builds.append(js.count('build(secondBinary);')) |
| |
| # If JSPI is enabled, the async and await keywords should be |
| # enabled (uncommented). |
| if 'JSPI = 1' in js: |
| seen_JSPIs.append(1) |
| assert '/* async */' not in js |
| assert '/* await */' not in js |
| else: |
| seen_JSPIs.append(0) |
| assert '/* async */' in js |
| assert '/* await */' in js |
| |
| seen_initial_contents.append(re.findall(initial_content_regex, js)) |
| |
| # There is always one build and one call (those are in the default |
| # fuzz_shell.js), and we add a couple of operations, each with equal |
| # probability to be a build or a call, so over the 100 testcases here we |
| # have an overwhelming probability to see at least one extra build and |
| # one extra call. |
| print('JS builds are distributed as ~ mean 4, stddev 5, median 2') |
| print(f'mean JS builds: {statistics.mean(seen_builds)}') |
| print(f'stdev JS builds: {statistics.stdev(seen_builds)}') |
| print(f'median JS builds: {statistics.median(seen_builds)}') |
| # Assert on at least 2, which means we added at least one to the default |
| # one that always exists, as mentioned before. |
| self.assertGreaterEqual(max(seen_builds), 2) |
| self.assertGreater(statistics.stdev(seen_builds), 0) |
| |
| print() |
| |
| # Generate the counts of seen calls, for convenience. We convert |
| # [['11', '22'], [], ['99']] |
| # into |
| # [2, 0, 1] |
| num_seen_calls = [len(x) for x in seen_calls] |
| print('Num JS calls are distributed as ~ mean 4, stddev 5, median 2') |
| print(f'mean JS calls: {statistics.mean(num_seen_calls)}') |
| print(f'stdev JS calls: {statistics.stdev(num_seen_calls)}') |
| print(f'median JS calls: {statistics.median(num_seen_calls)}') |
| self.assertGreaterEqual(max(num_seen_calls), 2) |
| self.assertGreater(statistics.stdev(num_seen_calls), 0) |
| |
| # The initial callExports have no seed (that makes the first, default, |
| # callExports behave deterministically, so we can compare to |
| # wasm-opt --fuzz-exec etc.), and all subsequent ones must have a seed. |
| seeds = [] |
| for calls in seen_calls: |
| if calls: |
| self.assertEqual(calls[0], '') |
| for other in calls[1:]: |
| self.assertNotEqual(other, '') |
| seeds.append(int(other)) |
| |
| # The seeds are random numbers in 0..2^32-1, so overlap between them |
| # should be incredibly unlikely. Allow a few % of such overlap just to |
| # avoid extremely rare errors. |
| num_seeds = len(seeds) |
| num_unique_seeds = len(set(seeds)) |
| print(f'unique JS call seeds: {num_unique_seeds} (should be almost {num_seeds})') |
| self.assertGreaterEqual(num_unique_seeds / num_seeds, 0.95) |
| |
| print() |
| |
| # Second wasm files are more rarely added, only 1/3 of the time or so, |
| # but over 100 samples we are still overwhelmingly likely to see one. |
| print('JS second builds are distributed as ~ mean 1.8, stddev 2.2, median 1') |
| print(f'mean JS second builds: {statistics.mean(seen_second_builds)}') |
| print(f'stdev JS second builds: {statistics.stdev(seen_second_builds)}') |
| print(f'median JS second builds: {statistics.median(seen_second_builds)}') |
| self.assertGreaterEqual(max(seen_second_builds), 2) |
| self.assertGreater(statistics.stdev(seen_second_builds), 0) |
| |
| print() |
| |
| # JSPI is done 1/4 of the time or so. |
| print('JSPIs are distributed as ~ mean 0.25') |
| print(f'mean JSPIs: {statistics.mean(seen_JSPIs)}') |
| self.assertEqual(min(seen_JSPIs), 0) |
| self.assertEqual(max(seen_JSPIs), 1) |
| |
| print() |
| |
| # Flatten the data to help some of the below, from |
| # [['a.wasm', 'b.wasm'], ['c.wasm']] |
| # into |
| # ['a.wasm', 'b.wasm', 'c.wasm'] |
| flat_initial_contents = [item for items in seen_initial_contents for item in items] |
| |
| # Initial content appear 50% of the time for each wasm file. Each |
| # testcase has 1.333 wasm files on average. |
| print('Initial contents are distributed as ~ mean 0.68') |
| print(f'mean initial contents: {len(flat_initial_contents) / N}') |
| # Initial contents should be mostly unique (we have many, many testcases |
| # and we pick just 100 or so). And we must see more than one unique one. |
| unique_initial_contents = set(flat_initial_contents) |
| print(f'unique initial contents: {len(unique_initial_contents)} should be almost equal to {len(flat_initial_contents)}') |
| self.assertGreater(len(unique_initial_contents), 1) |
| # Not all testcases have initial contents. |
| num_initial_contents = [len(items) for items in seen_initial_contents] |
| self.assertEqual(min(num_initial_contents), 0) |
| # Some do (this is redundant given that the set of unique initial |
| # contents was asserted on before, so this just confirms/checks that). |
| self.assertGreaterEqual(max(num_initial_contents), 1) |
| |
| print() |
| |
| # Execute the files in V8. Almost all should execute properly (some |
| # small number may trap during startup, say on a segment out of bounds). |
| if shared.V8: |
| valid_executions = 0 |
| for i in range(1, N + 1): |
| fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js') |
| |
| cmd = [shared.V8, '--wasm-staging', fuzz_file] |
| proc = subprocess.run(cmd, stdout=subprocess.PIPE) |
| |
| # An execution is valid if we exited without error, and if we |
| # managed to run some code before exiting (modules with no |
| # exports will be considered "invalid" here, but that is very |
| # rare, and in a sense they are actually unuseful). |
| if proc.returncode == 0 and b'[fuzz-exec] calling ' in proc.stdout: |
| valid_executions += 1 |
| |
| print('Valid executions are distributed as ~ mean 0.99') |
| print(f'mean valid executions: {valid_executions / N}') |
| # Assert on having at least half execute properly. Given the true mean |
| # is 0.9, for half of 100 to fail is incredibly unlikely. |
| self.assertGreater(valid_executions, N / 2) |
| |
| print() |
| |
| # "zzz" in test name so that this runs last. If it runs first, it can be |
| # confusing as it appears next to the logging of which bundle we use (see |
| # setUpClass). |
| def test_zzz_bundle_build_dir(self): |
| cmd = [shared.in_binaryen('scripts', 'bundle_clusterfuzz.py')] |
| cmd.append('bundle.tgz') |
| # Test that we notice the --build-dir flag. Here we pass an invalid |
| # value, so we should error. |
| cmd.append('--build-dir=foo_bar') |
| |
| failed = False |
| try: |
| subprocess.check_call(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| except subprocess.CalledProcessError: |
| # Expected error. |
| failed = True |
| self.assertTrue(failed) |
| |
| # Test with a valid --build-dir. |
| cmd.pop() |
| cmd.append(f'--build-dir={get_build_dir()}') |
| subprocess.check_call(cmd) |