| #!/usr/bin/env python3 |
| # Copyright 2021 WebAssembly Community Group participants |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """A test case update script. |
| |
| This script is a utility to update wasm-opt based lit tests with new FileCheck |
| patterns. It is based on LLVM's update_llc_test_checks.py script. |
| """ |
| |
| import argparse |
| import glob |
| import os |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| |
| script_dir = os.path.dirname(__file__) |
| script_name = os.path.basename(__file__) |
| |
| NOTICE = (';; NOTE: Assertions have been generated by {script} and should not' + |
| ' be edited.') |
| |
| RUN_LINE_RE = re.compile(r'^\s*;;\s*RUN:\s*(.*)$') |
| CHECK_PREFIX_RE = re.compile(r'.*--check-prefix[= ](\S+).*') |
| MODULE_RE = re.compile(r'^\(module.*$', re.MULTILINE) |
| |
| ALL_ITEMS = '|'.join(['type', 'import', 'global', 'memory', 'data', 'table', |
| 'elem', 'tag', 'export', 'start', 'func']) |
| ITEM_NAME = r'\$?[^\s()]*|"[^\s()]*"' |
| # FIXME: This does not handle nested string contents. For example, |
| # (data (i32.const 10) "hello(") |
| # will look unterminated, due to the '(' inside the string. As a result, the |
| # code below will consider more elements after the |data| to be part of it, |
| # until it sees enough closing ')' symbols. |
| ITEM_RE = re.compile(r'(?:^\s*\(rec\s*)?(^\s*)\((' + ALL_ITEMS + r')\s+(' + ITEM_NAME + ').*$', |
| re.MULTILINE) |
| |
| FUZZ_EXEC_FUNC = re.compile(r'^\[fuzz-exec\] calling (?P<name>\S*)$') |
| |
| |
| def warn(msg): |
| print(f'warning: {msg}', file=sys.stderr) |
| |
| |
| def itertests(args): |
| """ |
| Yield (filename, lines) for each test specified in the command line args |
| """ |
| for pattern in args.tests: |
| tests = glob.glob(pattern, recursive=True) |
| if not tests: |
| warn(f'No tests matched {pattern}. Ignoring it.') |
| continue |
| for test in tests: |
| with open(test) as f: |
| lines = [line.rstrip() for line in f] |
| first_line = lines[0] if lines else '' |
| if script_name not in first_line and not args.force: |
| warn(f'Skipping test {test} which was not generated by ' |
| f'{script_name}. Use -f to override.') |
| continue |
| yield test, lines |
| |
| |
| def find_run_lines(test, lines): |
| line_matches = [RUN_LINE_RE.match(l) for l in lines] |
| matches = [match.group(1) for match in line_matches if match] |
| if not matches: |
| warn(f'No RUN lines found in {test}. Ignoring.') |
| return [] |
| run_lines = [matches[0]] |
| for line in matches[1:]: |
| if run_lines[-1].endswith('\\'): |
| run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + line |
| else: |
| run_lines.append(line) |
| return run_lines |
| |
| |
| def run_command(args, test, tmp, command): |
| env = dict(os.environ) |
| env['PATH'] = args.binaryen_bin + os.pathsep + env['PATH'] |
| command = command.replace('%s', test) |
| command = command.replace('%t', tmp) |
| command = command.replace('foreach', os.path.join(script_dir, 'foreach.py')) |
| return subprocess.check_output(command, shell=True, env=env).decode('utf-8') |
| |
| |
| def find_end(module, start): |
| # Find the index one past the closing parenthesis corresponding to the first |
| # open parenthesis at `start`. |
| assert module[start] == '(' |
| depth = 1 |
| for end in range(start + 1, len(module)): |
| if depth == 0: |
| break |
| elif module[end] == '(': |
| depth += 1 |
| elif module[end] == ')': |
| depth -= 1 |
| return end |
| |
| |
| def split_modules(text): |
| # Return a list of strings; one for each module |
| module_starts = [match.start() for match in MODULE_RE.finditer(text)] |
| if len(module_starts) < 2: |
| return [text] |
| first_module = text[:module_starts[1]] |
| modules = [first_module] |
| for i in range(1, len(module_starts) - 1): |
| module = text[module_starts[i]:module_starts[i + 1]] |
| modules.append(module) |
| last_module = text[module_starts[-1]:] |
| modules.append(last_module) |
| return modules |
| |
| |
| def parse_output_modules(text): |
| # Return a list containing, for each module in the text, a list of |
| # ((kind, name), [line]) for module items. |
| modules = [] |
| for module in split_modules(text): |
| items = [] |
| for match in ITEM_RE.finditer(module): |
| kind, name = match[2], match[3] |
| end = find_end(module, match.end(1)) |
| lines = module[match.start():end].split('\n') |
| items.append(((kind, name), lines)) |
| modules.append(items) |
| return modules |
| |
| |
| def parse_output_fuzz_exec(text): |
| # Returns the same data as `parse_output_modules`, but can't tell where |
| # module boundaries are, so always just returns items for a single module. |
| items = [] |
| for line in text.split('\n'): |
| func = FUZZ_EXEC_FUNC.match(line) |
| if func: |
| # Add quotes around the name because that is how it will be parsed |
| # in the input. |
| name = f'"{func.group("name")}"' |
| items.append((('func', name), [line])) |
| elif line.startswith('[host limit'): |
| # Skip mentions of host limits that we hit. This can happen even |
| # before we reach the execution of a function (if it happens during |
| # instantiation of the module), in which case |items| may be empty, |
| # and we'd error on the code below. |
| pass |
| elif line: |
| assert items, 'unexpected non-invocation line' |
| items[-1][1].append(line) |
| return [items] |
| |
| |
| def get_command_output(args, kind, test, lines, tmp): |
| # Return list of maps from prefixes to lists of module items of the form |
| # ((kind, name), [line]). The outer list has an entry for each module. |
| command_output = [] |
| for line in find_run_lines(test, lines): |
| commands = [cmd.strip() for cmd in line.rsplit('|', 1)] |
| if (len(commands) > 2 or |
| (len(commands) == 2 and not commands[1].startswith('filecheck '))): |
| warn('pipes only supported for one command piped to `filecheck`') |
| filecheck_cmd = '' |
| if len(commands) > 1 and commands[1].startswith('filecheck '): |
| filecheck_cmd = commands[1] |
| commands = commands[:1] |
| |
| prefix = '' |
| if filecheck_cmd.startswith('filecheck '): |
| prefix_match = CHECK_PREFIX_RE.match(filecheck_cmd) |
| if prefix_match: |
| prefix = prefix_match.group(1) |
| else: |
| prefix = 'CHECK' |
| |
| output = run_command(args, test, tmp, commands[0]) |
| if prefix: |
| if kind == 'wat': |
| module_outputs = parse_output_modules(output) |
| elif kind == 'fuzz-exec': |
| module_outputs = parse_output_fuzz_exec(output) |
| else: |
| assert False, "unknown output kind" |
| for i in range(len(module_outputs)): |
| if len(command_output) == i: |
| command_output.append({}) |
| command_output[i][prefix] = module_outputs[i] |
| |
| return command_output |
| |
| |
| def update_test(args, test, lines, tmp): |
| # Do not update `args` directly because the changes should only apply to the |
| # current test. |
| all_items = args.all_items |
| output_kind = args.output |
| if lines and script_name in lines[0]: |
| # Apply previously used options for this file |
| if '--all-items' in lines[0]: |
| all_items = True |
| output = re.search(r'--output=(?P<kind>\S*)', lines[0]) |
| if output: |
| output_kind = output.group('kind') |
| # Skip the notice if it is already in the output |
| lines = lines[1:] |
| |
| command_output = get_command_output(args, output_kind, test, lines, tmp) |
| |
| prefixes = set(prefix |
| for module_output in command_output |
| for prefix in module_output.keys()) |
| check_line_re = re.compile(r'^\s*;;\s*(' + '|'.join(prefixes) + |
| r')(?:-NEXT|-LABEL|-NOT)?:.*$') |
| |
| # Filter out whitespace between check blocks |
| if lines: |
| filtered = [lines[0]] |
| for i in range(1, len(lines) - 1): |
| if lines[i] or not check_line_re.match(lines[i - 1]) or \ |
| not check_line_re.match(lines[i + 1]): |
| filtered.append(lines[i]) |
| filtered.append(lines[-1]) |
| lines = filtered |
| |
| named_items = [] |
| for line in lines: |
| match = ITEM_RE.match(line) |
| if match: |
| kind, name = match[2], match[3] |
| named_items.append((kind, name)) |
| |
| script = script_name |
| if all_items: |
| script += ' --all-items' |
| if output_kind != 'wat': |
| script += f' --output={output_kind}' |
| output_lines = [NOTICE.format(script=script)] |
| |
| def emit_checks(indent, prefix, lines): |
| def pad(line): |
| return line if not line or line.startswith(' ') else ' ' + line |
| output_lines.append(f'{indent};; {prefix}: {pad(lines[0])}') |
| for line in lines[1:]: |
| output_lines.append(f'{indent};; {prefix}-NEXT:{pad(line)}') |
| |
| input_modules = [m.split('\n') for m in split_modules('\n'.join(lines))] |
| if len(input_modules) > len(command_output): |
| warn('Fewer output modules than input modules:' |
| 'not all modules will get checks.') |
| |
| # Remove extra newlines at the end of modules |
| input_modules = [m[:-1] for m in input_modules[:-1]] + [input_modules[-1]] |
| |
| for module_idx in range(len(input_modules)): |
| output = command_output[module_idx] \ |
| if module_idx < len(command_output) else {} |
| |
| for line in input_modules[module_idx]: |
| # Skip pre-existing check lines; we will regenerate them. |
| if check_line_re.match(line): |
| continue |
| |
| match = ITEM_RE.match(line) |
| if not match: |
| output_lines.append(line) |
| continue |
| |
| indent, kind, name = match.groups() |
| |
| for prefix, items in output.items(): |
| # If the output for this prefix contains an item with this |
| # name, emit all the items up to and including the matching |
| # item |
| has_item = False |
| for kind_name, lines in items: |
| if name and (kind, name) == kind_name: |
| has_item = True |
| break |
| if has_item: |
| first = True |
| while True: |
| kind_name, lines = items.pop(0) |
| if all_items or kind_name in named_items: |
| if not first: |
| output_lines.append('') |
| first = False |
| emit_checks(indent, prefix, lines) |
| if name and (kind, name) == kind_name: |
| break |
| output_lines.append(line) |
| |
| # Output any remaining checks for each prefix |
| first = True |
| for prefix, items in output.items(): |
| for kind_name, lines in items: |
| if all_items or kind_name in named_items: |
| if not first: |
| output_lines.append('') |
| first = False |
| emit_checks('', prefix, lines) |
| |
| if args.dry_run: |
| print('\n'.join(output_lines)) |
| else: |
| with open(test, 'w') as f: |
| for line in output_lines: |
| f.write(line + '\n') |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| '--binaryen-bin', dest='binaryen_bin', default='bin', |
| help=('Specifies the path to the Binaryen executables in the CMake build' |
| ' directory. Default: bin/ of current directory (i.e. assume an' |
| ' in-tree build).')) |
| parser.add_argument( |
| '--all-items', action='store_true', |
| help=('Emit checks for all module items, even those that do not appear' |
| ' in the input.')) |
| parser.add_argument( |
| '--output', choices=['wat', 'fuzz-exec'], default='wat', |
| help=('The kind of output test commands are expected to produce.')) |
| parser.add_argument( |
| '-f', '--force', action='store_true', |
| help=('Generate FileCheck patterns even for test files whose existing ' |
| 'patterns were not generated by this script.')) |
| parser.add_argument( |
| '--dry-run', action='store_true', |
| help=('Print the updated test file contents instead of changing the ' |
| 'test files')) |
| parser.add_argument('tests', nargs='+', help='The test files to update') |
| args = parser.parse_args() |
| args.binaryen_bin = os.path.abspath(args.binaryen_bin) |
| |
| tmp = tempfile.mktemp() |
| |
| for test, lines in itertests(args): |
| update_test(args, test, lines, tmp) |
| |
| |
| if __name__ == '__main__': |
| main() |