scripts/bundle_clusterfuzz.py - external/github.com/WebAssembly/binaryen - Git at Google

 #!/usr/bin/python3

 '''
 Bundle files for uploading to ClusterFuzz.

 Usage:

 bundle.py OUTPUT_FILE.tgz [--build-dir=BUILD_DIR]

 The output file will be a .tgz file.

 if a build directory is provided, we will look under there to find bin/wasm-opt
 and lib/libbinaryen.so. A useful place to get builds from is the Emscripten SDK,
 as you can do

   ./emsdk install tot

 after which ./upstream/ (from the emsdk dir) will contain builds of wasm-opt and
 libbinaryen.so (that are designed to run on as many systems as possible, by not
 depending on newer libc symbols, etc., as opposed to a normal local build).
 Thus, the full workflow could be

   cd emsdk
   ./emsdk install tot
   cd ../binaryen
   python3 scripts/bundle_clusterfuzz.py binaryen_wasm_fuzzer.tgz --build-dir=../emsdk/upstream

 When using --build-dir in this way, you are responsible for ensuring that the
 wasm-opt in the build dir is compatible with the scripts in the current dir
 (e.g., if run.py here passes a flag that is only in a new/older version of
 wasm-opt, a problem can happen).

 Before uploading to ClusterFuzz, it is worth doing the following:

   1. Run the local fuzzer (scripts/fuzz_opt.py). That includes a ClusterFuzz
      testcase handler, which simulates what ClusterFuzz does.

   2. Run the unit tests, which include smoke tests for our ClusterFuzz support:

        python -m unittest test/unit/test_cluster_fuzz.py

      Look at the logs, which will contain statistics on the wasm files the
      fuzzer emits, and see that they look reasonable.

      You should run the unit tests on the bundle you are about to upload, by
      setting the proper env var like this (using the same filename as above):

        BINARYEN_CLUSTER_FUZZ_BUNDLE=`pwd`/binaryen_wasm_fuzzer.tgz python -m unittest test/unit/test_cluster_fuzz.py

      Note that you must pass an absolute filename (e.g. using pwd as shown).

      The unittest logs should reflect that that bundle is being used at the
      very start ("Using existing bundle: ..." rather than "Making a new
      bundle"). Note that some of the unittests also create their own bundles, to
      test the bundling script itself, so later down you will see logging of
      bundle creation even if you provide a bundle.

 After uploading to ClusterFuzz, you can wait a while for it to run, and then:

   1. Inspect the log to see that we generate all the testcases properly, and
      their sizes look reasonably random, etc.

   2. Inspect the sample testcase and run it locally, to see that

        d8 --wasm-staging testcase.js

      properly runs the testcase, emitting logging etc.

   3. Check the stats and crashes page (known crashes should at least be showing
      up). Note that these may take longer to show up than 1 and 2.
 '''

 import glob
 import os
 import subprocess
 import sys
 import tarfile

 # Read the filenames first, as importing |shared| changes the directory.
 output_file = os.path.abspath(sys.argv[1])
 print(f'Bundling to: {output_file}')
 assert output_file.endswith('.tgz'), 'Can only generate a .tgz'

 build_dir = None
 if len(sys.argv) >= 3:
     assert sys.argv[2].startswith('--build-dir=')
     build_dir = sys.argv[2].split('=')[1]
     build_dir = os.path.abspath(build_dir)
     # Delete the argument, as importing |shared| scans it.
     sys.argv.pop()

 from test import fuzzing # noqa
 from test import shared # noqa
 from test import support # noqa

 # Pick where to get the builds
 if build_dir:
     binaryen_bin = os.path.join(build_dir, 'bin')
     binaryen_lib = os.path.join(build_dir, 'lib')
 else:
     binaryen_bin = shared.options.binaryen_bin
     binaryen_lib = shared.options.binaryen_lib

 # ClusterFuzz's run.py uses these features. Keep this in sync with that, so that
 # we only bundle initial content that makes sense for it.
 features = [
     '-all',
     '--disable-shared-everything',
     '--disable-fp16',
     '--disable-strings',
     '--disable-stack-switching',
 ]

 with tarfile.open(output_file, "w:gz") as tar:
     # run.py
     run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py')
     print(f'  .. run:         {run}')
     tar.add(run, arcname='run.py')

     # fuzz_shell.js
     fuzz_shell = os.path.join(shared.options.binaryen_root, 'scripts', 'fuzz_shell.js')
     print(f'  .. fuzz_shell:  {fuzz_shell}')
     tar.add(fuzz_shell, arcname='scripts/fuzz_shell.js')

     # wasm-opt binary
     wasm_opt = os.path.join(binaryen_bin, 'wasm-opt')
     print(f'  .. wasm-opt:    {wasm_opt}')
     tar.add(wasm_opt, arcname='bin/wasm-opt')

     # For a dynamic build we also need libbinaryen.so and possibly other files.
     # Try both .so and .dylib suffixes for more OS coverage.
     for suffix in ['.so', '.dylib']:
         libbinaryen = os.path.join(binaryen_lib, f'libbinaryen{suffix}')
         if os.path.exists(libbinaryen):
             print(f'  .. libbinaryen: {libbinaryen}')
             tar.add(libbinaryen, arcname=f'lib/libbinaryen{suffix}')

             # The emsdk build also includes some more necessary files.
             for lib in ['libc++', 'libmimalloc']:
                 # Include the main name plus any NAME.2.0 and such.
                 # TODO: Using ldd/otool would be better, to find the actual
                 #       dependencies of libbinaryen. Using glob like this will
                 #       pick up stale contents in the directory.
                 full_lib = os.path.join(binaryen_lib, lib) + suffix
                 for path in glob.glob(f'{full_lib}*'):
                     print(f'  ............. : {path}')
                     tar.add(path, arcname=f'lib/{os.path.basename(path)}')

     # Add tests we will use as initial content under initial/. We put all the
     # tests from the test suite there.
     print('  .. initial content: ')
     temp_wasm = 'temp.wasm'
     index = 0
     all_tests = shared.get_all_tests()
     for i, test in enumerate(all_tests):
         if not fuzzing.is_fuzzable(test):
             continue
         for wast, asserts in support.split_wast(test):
             if not wast:
                 continue
             support.write_wast(temp_wasm, wast)
             # If the file is not valid for our features, skip it. In the same
             # operation, also convert to binary if this was text (binary is more
             # compact).
             cmd = shared.WASM_OPT + ['-q', temp_wasm, '-o', temp_wasm] + features
             if subprocess.run(cmd, stderr=subprocess.PIPE).returncode:
                 continue

             # Looks good.
             tar.add(temp_wasm, arcname=f'initial/{index}.wasm')
             index += 1
         print(f'\r        {100 * i / len(all_tests):.2f}%', end='', flush=True)
     print(f'        (num: {index})')

     # Write initial/num.txt which contains the number of testcases in that
     # directory (saves run.py from needing to listdir each time).
     num_txt = 'num.txt'
     with open(num_txt, 'w') as f:
         f.write(f'{index}')
     tar.add(num_txt, arcname='initial/num.txt')


 print('Done.')
 print('To run the tests on this bundle, do:')
 print()
 print(f'BINARYEN_CLUSTER_FUZZ_BUNDLE={output_file} python -m unittest test/unit/test_cluster_fuzz.py')
 print()
	#!/usr/bin/python3

	'''
	Bundle files for uploading to ClusterFuzz.

	Usage:

	bundle.py OUTPUT_FILE.tgz [--build-dir=BUILD_DIR]

	The output file will be a .tgz file.

	if a build directory is provided, we will look under there to find bin/wasm-opt
	and lib/libbinaryen.so. A useful place to get builds from is the Emscripten SDK,
	as you can do

	./emsdk install tot

	after which ./upstream/ (from the emsdk dir) will contain builds of wasm-opt and
	libbinaryen.so (that are designed to run on as many systems as possible, by not
	depending on newer libc symbols, etc., as opposed to a normal local build).
	Thus, the full workflow could be

	cd emsdk
	./emsdk install tot
	cd ../binaryen
	python3 scripts/bundle_clusterfuzz.py binaryen_wasm_fuzzer.tgz --build-dir=../emsdk/upstream

	When using --build-dir in this way, you are responsible for ensuring that the
	wasm-opt in the build dir is compatible with the scripts in the current dir
	(e.g., if run.py here passes a flag that is only in a new/older version of
	wasm-opt, a problem can happen).

	Before uploading to ClusterFuzz, it is worth doing the following:

	1. Run the local fuzzer (scripts/fuzz_opt.py). That includes a ClusterFuzz
	testcase handler, which simulates what ClusterFuzz does.

	2. Run the unit tests, which include smoke tests for our ClusterFuzz support:

	python -m unittest test/unit/test_cluster_fuzz.py

	Look at the logs, which will contain statistics on the wasm files the
	fuzzer emits, and see that they look reasonable.

	You should run the unit tests on the bundle you are about to upload, by
	setting the proper env var like this (using the same filename as above):

	BINARYEN_CLUSTER_FUZZ_BUNDLE=`pwd`/binaryen_wasm_fuzzer.tgz python -m unittest test/unit/test_cluster_fuzz.py

	Note that you must pass an absolute filename (e.g. using pwd as shown).

	The unittest logs should reflect that that bundle is being used at the
	very start ("Using existing bundle: ..." rather than "Making a new
	bundle"). Note that some of the unittests also create their own bundles, to
	test the bundling script itself, so later down you will see logging of
	bundle creation even if you provide a bundle.

	After uploading to ClusterFuzz, you can wait a while for it to run, and then:

	1. Inspect the log to see that we generate all the testcases properly, and
	their sizes look reasonably random, etc.

	2. Inspect the sample testcase and run it locally, to see that

	d8 --wasm-staging testcase.js

	properly runs the testcase, emitting logging etc.

	3. Check the stats and crashes page (known crashes should at least be showing
	up). Note that these may take longer to show up than 1 and 2.
	'''

	import glob
	import os
	import subprocess
	import sys
	import tarfile

	# Read the filenames first, as importing \|shared\| changes the directory.
	output_file = os.path.abspath(sys.argv[1])
	print(f'Bundling to: {output_file}')
	assert output_file.endswith('.tgz'), 'Can only generate a .tgz'

	build_dir = None
	if len(sys.argv) >= 3:
	assert sys.argv[2].startswith('--build-dir=')
	build_dir = sys.argv[2].split('=')[1]
	build_dir = os.path.abspath(build_dir)
	# Delete the argument, as importing \|shared\| scans it.
	sys.argv.pop()

	from test import fuzzing # noqa
	from test import shared # noqa
	from test import support # noqa

	# Pick where to get the builds
	if build_dir:
	binaryen_bin = os.path.join(build_dir, 'bin')
	binaryen_lib = os.path.join(build_dir, 'lib')
	else:
	binaryen_bin = shared.options.binaryen_bin
	binaryen_lib = shared.options.binaryen_lib

	# ClusterFuzz's run.py uses these features. Keep this in sync with that, so that
	# we only bundle initial content that makes sense for it.
	features = [
	'-all',
	'--disable-shared-everything',
	'--disable-fp16',
	'--disable-strings',
	'--disable-stack-switching',
	]

	with tarfile.open(output_file, "w:gz") as tar:
	# run.py
	run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py')
	print(f' .. run: {run}')
	tar.add(run, arcname='run.py')

	# fuzz_shell.js
	fuzz_shell = os.path.join(shared.options.binaryen_root, 'scripts', 'fuzz_shell.js')
	print(f' .. fuzz_shell: {fuzz_shell}')
	tar.add(fuzz_shell, arcname='scripts/fuzz_shell.js')

	# wasm-opt binary
	wasm_opt = os.path.join(binaryen_bin, 'wasm-opt')
	print(f' .. wasm-opt: {wasm_opt}')
	tar.add(wasm_opt, arcname='bin/wasm-opt')

	# For a dynamic build we also need libbinaryen.so and possibly other files.
	# Try both .so and .dylib suffixes for more OS coverage.
	for suffix in ['.so', '.dylib']:
	libbinaryen = os.path.join(binaryen_lib, f'libbinaryen{suffix}')
	if os.path.exists(libbinaryen):
	print(f' .. libbinaryen: {libbinaryen}')
	tar.add(libbinaryen, arcname=f'lib/libbinaryen{suffix}')

	# The emsdk build also includes some more necessary files.
	for lib in ['libc++', 'libmimalloc']:
	# Include the main name plus any NAME.2.0 and such.
	# TODO: Using ldd/otool would be better, to find the actual
	# dependencies of libbinaryen. Using glob like this will
	# pick up stale contents in the directory.
	full_lib = os.path.join(binaryen_lib, lib) + suffix
	for path in glob.glob(f'{full_lib}*'):
	print(f' ............. : {path}')
	tar.add(path, arcname=f'lib/{os.path.basename(path)}')

	# Add tests we will use as initial content under initial/. We put all the
	# tests from the test suite there.
	print(' .. initial content: ')
	temp_wasm = 'temp.wasm'
	index = 0
	all_tests = shared.get_all_tests()
	for i, test in enumerate(all_tests):
	if not fuzzing.is_fuzzable(test):
	continue
	for wast, asserts in support.split_wast(test):
	if not wast:
	continue
	support.write_wast(temp_wasm, wast)
	# If the file is not valid for our features, skip it. In the same
	# operation, also convert to binary if this was text (binary is more
	# compact).
	cmd = shared.WASM_OPT + ['-q', temp_wasm, '-o', temp_wasm] + features
	if subprocess.run(cmd, stderr=subprocess.PIPE).returncode:
	continue

	# Looks good.
	tar.add(temp_wasm, arcname=f'initial/{index}.wasm')
	index += 1
	print(f'\r {100 * i / len(all_tests):.2f}%', end='', flush=True)
	print(f' (num: {index})')

	# Write initial/num.txt which contains the number of testcases in that
	# directory (saves run.py from needing to listdir each time).
	num_txt = 'num.txt'
	with open(num_txt, 'w') as f:
	f.write(f'{index}')
	tar.add(num_txt, arcname='initial/num.txt')


	print('Done.')
	print('To run the tests on this bundle, do:')
	print()
	print(f'BINARYEN_CLUSTER_FUZZ_BUNDLE={output_file} python -m unittest test/unit/test_cluster_fuzz.py')
	print()