| #!/usr/bin/env python3 |
| # Copyright 2012 The Emscripten Authors. All rights reserved. |
| # Emscripten is available under two separate licenses, the MIT license and the |
| # University of Illinois/NCSA Open Source License. Both these licenses can be |
| # found in the LICENSE file. |
| |
| import os |
| import sys |
| import subprocess |
| import re |
| import json |
| import shutil |
| |
| __rootpath__ = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) |
| sys.path.insert(1, __rootpath__) |
| |
| from tools.toolchain_profiler import ToolchainProfiler |
| from tools import building, config, shared, utils |
| |
| configuration = shared.configuration |
| temp_files = configuration.get_temp_files() |
| |
| |
| def path_from_root(*pathelems): |
| return os.path.join(__rootpath__, *pathelems) |
| |
| |
| ACORN_OPTIMIZER = path_from_root('tools', 'acorn-optimizer.js') |
| |
| NUM_CHUNKS_PER_CORE = 3 |
| MIN_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MIN_CHUNK_SIZE') or 512 * 1024) # configuring this is just for debugging purposes |
| MAX_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MAX_CHUNK_SIZE') or 5 * 1024 * 1024) |
| |
| WINDOWS = sys.platform.startswith('win') |
| |
| DEBUG = os.environ.get('EMCC_DEBUG') |
| |
| func_sig = re.compile(r'function ([_\w$]+)\(') |
| func_sig_json = re.compile(r'\["defun", ?"([_\w$]+)",') |
| import_sig = re.compile(r'(var|const) ([_\w$]+ *=[^;]+);') |
| |
| |
| def split_funcs(js, just_split=False): |
| if just_split: |
| return [('(json)', line) for line in js.splitlines()] |
| # split properly even if there are no newlines, |
| # which is important for deterministic builds (as which functions |
| # are in each chunk may differ, so we need to split them up and combine |
| # them all together later and sort them deterministically) |
| parts = ['function ' + part for part in js.split('function ')[1:]] |
| funcs = [] |
| for func in parts: |
| m = func_sig.search(func) |
| if not m: |
| continue |
| ident = m.group(1) |
| assert ident |
| funcs.append((ident, func)) |
| return funcs |
| |
| |
| class Minifier: |
| """minification support. We calculate minification of |
| globals here, then pass that into the parallel acorn-optimizer.js runners which |
| perform minification of locals. |
| """ |
| |
| def __init__(self, js): |
| self.js = js |
| self.symbols_file = None |
| self.profiling_funcs = False |
| |
| def minify_shell(self, shell, minify_whitespace): |
| # Run through acorn-optimizer.js to find and minify the global symbols |
| # We send it the globals, which it parses at the proper time. JS decides how |
| # to minify all global names, we receive a dictionary back, which is then |
| # used by the function processors |
| |
| shell = shell.replace('0.0', '13371337') # avoid optimizer doing 0.0 => 0 |
| |
| # Find all globals in the JS functions code |
| |
| if not self.profiling_funcs: |
| self.globs = [m.group(1) for m in func_sig.finditer(self.js)] |
| if len(self.globs) == 0: |
| self.globs = [m.group(1) for m in func_sig_json.finditer(self.js)] |
| else: |
| self.globs = [] |
| |
| with temp_files.get_file('.minifyglobals.js') as temp_file: |
| with open(temp_file, 'w') as f: |
| f.write(shell) |
| f.write('\n') |
| f.write('// EXTRA_INFO:' + json.dumps(self.serialize())) |
| |
| cmd = config.NODE_JS + [ACORN_OPTIMIZER, temp_file, 'minifyGlobals'] |
| if minify_whitespace: |
| cmd.append('minifyWhitespace') |
| output = shared.run_process(cmd, stdout=subprocess.PIPE).stdout |
| |
| assert len(output) and not output.startswith('Assertion failed'), 'Error in js optimizer: ' + output |
| code, metadata = output.split('// EXTRA_INFO:') |
| self.globs = json.loads(metadata) |
| |
| if self.symbols_file: |
| with open(self.symbols_file, 'w') as f: |
| for key, value in self.globs.items(): |
| f.write(value + ':' + key + '\n') |
| print('wrote symbol map file to', self.symbols_file, file=sys.stderr) |
| |
| return code.replace('13371337', '0.0') |
| |
| def serialize(self): |
| return { |
| 'globals': self.globs |
| } |
| |
| |
| start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n' |
| end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n' |
| start_asm_marker = '// EMSCRIPTEN_START_ASM\n' |
| end_asm_marker = '// EMSCRIPTEN_END_ASM\n' |
| |
| |
| # Given a set of functions of form (ident, text), and a preferred chunk size, |
| # generates a set of chunks for parallel processing and caching. |
| @ToolchainProfiler.profile_block('chunkify') |
| def chunkify(funcs, chunk_size): |
| chunks = [] |
| # initialize reasonably, the rest of the funcs we need to split out |
| curr = [] |
| total_size = 0 |
| for i in range(len(funcs)): |
| func = funcs[i] |
| curr_size = len(func[1]) |
| if total_size + curr_size < chunk_size: |
| curr.append(func) |
| total_size += curr_size |
| else: |
| chunks.append(curr) |
| curr = [func] |
| total_size = curr_size |
| if curr: |
| chunks.append(curr) |
| curr = None |
| return [''.join(func[1] for func in chunk) for chunk in chunks] # remove function names |
| |
| |
| def run_on_js(filename, passes, extra_info=None, just_split=False, just_concat=False): |
| with ToolchainProfiler.profile_block('js_optimizer.split_markers'): |
| if not isinstance(passes, list): |
| passes = [passes] |
| |
| js = utils.read_file(filename) |
| if os.linesep != '\n': |
| js = js.replace(os.linesep, '\n') # we assume \n in the splitting code |
| |
| # Find suffix |
| suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' |
| suffix_start = js.find(suffix_marker) |
| suffix = '' |
| if suffix_start >= 0: |
| suffix_end = js.find('\n', suffix_start) |
| suffix = js[suffix_start:suffix_end] + '\n' |
| # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. |
| |
| # Find markers |
| start_funcs = js.find(start_funcs_marker) |
| end_funcs = js.rfind(end_funcs_marker) |
| |
| if start_funcs < 0 or end_funcs < start_funcs or not suffix: |
| shared.exit_with_error('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) |
| |
| minify_globals = 'minifyNames' in passes |
| if minify_globals: |
| passes = [p if p != 'minifyNames' else 'minifyLocals' for p in passes] |
| start_asm = js.find(start_asm_marker) |
| end_asm = js.rfind(end_asm_marker) |
| assert (start_asm >= 0) == (end_asm >= 0) |
| |
| closure = 'closure' in passes |
| if closure: |
| passes = [p for p in passes if p != 'closure'] # we will do it manually |
| |
| cleanup = 'cleanup' in passes |
| if cleanup: |
| passes = [p for p in passes if p != 'cleanup'] # we will do it manually |
| |
| if not minify_globals: |
| with ToolchainProfiler.profile_block('js_optimizer.no_minify_globals'): |
| pre = js[:start_funcs + len(start_funcs_marker)] |
| post = js[end_funcs + len(end_funcs_marker):] |
| js = js[start_funcs + len(start_funcs_marker):end_funcs] |
| if 'asm' not in passes: |
| # can have Module[..] and inlining prevention code, push those to post |
| class Finals: |
| buf = [] |
| |
| def process(line): |
| if len(line) and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): |
| Finals.buf.append(line) |
| return False |
| return True |
| |
| js = '\n'.join(filter(process, js.split('\n'))) |
| post = '\n'.join(Finals.buf) + '\n' + post |
| post = end_funcs_marker + post |
| else: |
| with ToolchainProfiler.profile_block('js_optimizer.minify_globals'): |
| # We need to split out the asm shell as well, for minification |
| pre = js[:start_asm + len(start_asm_marker)] |
| post = js[end_asm:] |
| asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' |
| EMSCRIPTEN_FUNCS(); |
| ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] |
| js = js[start_funcs + len(start_funcs_marker):end_funcs] |
| |
| # we assume there is a maximum of one new name per line |
| minifier = Minifier(js) |
| |
| def check_symbol_mapping(p): |
| if p.startswith('symbolMap='): |
| minifier.symbols_file = p.split('=', 1)[1] |
| return False |
| if p == 'profilingFuncs': |
| minifier.profiling_funcs = True |
| return False |
| return True |
| |
| passes = list(filter(check_symbol_mapping, passes)) |
| asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes).split('EMSCRIPTEN_FUNCS();') |
| asm_shell_post = asm_shell_post.replace('});', '})') |
| pre += asm_shell_pre + '\n' + start_funcs_marker |
| post = end_funcs_marker + asm_shell_post + post |
| |
| minify_info = minifier.serialize() |
| |
| if extra_info: |
| for key, value in extra_info.items(): |
| assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] |
| minify_info[key] = value |
| |
| # if DEBUG: |
| # print >> sys.stderr, 'minify info:', minify_info |
| |
| with ToolchainProfiler.profile_block('js_optimizer.remove_suffix_and_split'): |
| # remove suffix if no longer needed |
| if suffix and 'last' in passes: |
| suffix_start = post.find(suffix_marker) |
| suffix_end = post.find('\n', suffix_start) |
| post = post[:suffix_start] + post[suffix_end:] |
| |
| total_size = len(js) |
| funcs = split_funcs(js, just_split) |
| js = None |
| |
| with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'): |
| # if we are making source maps, we want our debug numbering to start from the |
| # top of the file, so avoid breaking the JS into chunks |
| cores = shared.get_num_cores() |
| |
| if not just_split: |
| intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) |
| chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) |
| chunks = chunkify(funcs, chunk_size) |
| else: |
| # keep same chunks as before |
| chunks = [f[1] for f in funcs] |
| |
| chunks = [chunk for chunk in chunks if len(chunk)] |
| if DEBUG and len(chunks): |
| print('chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)), file=sys.stderr) |
| funcs = None |
| |
| if len(chunks): |
| serialized_extra_info = suffix_marker + '\n' |
| if minify_globals: |
| serialized_extra_info += '// EXTRA_INFO:' + json.dumps(minify_info) |
| elif extra_info: |
| serialized_extra_info += '// EXTRA_INFO:' + json.dumps(extra_info) |
| with ToolchainProfiler.profile_block('js_optimizer.write_chunks'): |
| def write_chunk(chunk, i): |
| temp_file = temp_files.get('.jsfunc_%d.js' % i).name |
| with open(temp_file, 'w') as f: |
| f.write(chunk) |
| f.write(serialized_extra_info) |
| return temp_file |
| filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] |
| else: |
| filenames = [] |
| |
| with ToolchainProfiler.profile_block('run_optimizer'): |
| if len(filenames): |
| commands = [config.NODE_JS + [ACORN_OPTIMIZER, f] + passes for f in filenames] |
| |
| if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0': |
| for filename in filenames: |
| saved = 'save_' + os.path.basename(filename) |
| while os.path.exists(saved): |
| saved = 'input' + str(int(saved.replace('input', '').replace('.txt', '')) + 1) + '.txt' |
| shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved)) |
| |
| filenames = shared.run_multiple_processes(commands, route_stdout_to_temp_files_suffix='js_opt.jo.js') |
| |
| for filename in filenames: |
| temp_files.note(filename) |
| |
| with ToolchainProfiler.profile_block('split_closure_cleanup'): |
| if closure or cleanup: |
| # run on the shell code, everything but what we acorn-optimize |
| start_asm = '// EMSCRIPTEN_START_ASM\n' |
| end_asm = '// EMSCRIPTEN_END_ASM\n' |
| cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(wakaGlobal,wakaEnv,wakaBuffer)\n' |
| |
| with temp_files.get_file('.cl.js') as cle: |
| pre_1, pre_2 = pre.split(start_asm) |
| post_1, post_2 = post.split(end_asm) |
| with open(cle, 'w') as f: |
| f.write(pre_1) |
| f.write(cl_sep) |
| f.write(post_2) |
| cld = cle |
| if closure: |
| if DEBUG: |
| print('running closure on shell code', file=sys.stderr) |
| cld = building.closure_compiler(cld, pretty='minifyWhitespace' not in passes) |
| temp_files.note(cld) |
| elif cleanup: |
| if DEBUG: |
| print('running cleanup on shell code', file=sys.stderr) |
| acorn_passes = ['JSDCE'] |
| if 'minifyWhitespace' in passes: |
| acorn_passes.append('minifyWhitespace') |
| cld = building.acorn_optimizer(cld, acorn_passes) |
| temp_files.note(cld) |
| coutput = utils.read_file(cld) |
| |
| coutput = coutput.replace('wakaUnknownBefore();', start_asm) |
| after = 'wakaUnknownAfter' |
| start = coutput.find(after) |
| end = coutput.find(')', start) |
| # If the closure comment to suppress useless code is present, we need to look one |
| # brace past it, as the first is in there. Otherwise, the first brace is the |
| # start of the function body (what we want). |
| USELESS_CODE_COMMENT = '/** @suppress {uselessCode} */ ' |
| USELESS_CODE_COMMENT_BODY = 'uselessCode' |
| brace = pre_2.find('{') + 1 |
| has_useless_code_comment = False |
| if pre_2[brace:brace + len(USELESS_CODE_COMMENT_BODY)] == USELESS_CODE_COMMENT_BODY: |
| brace = pre_2.find('{', brace) + 1 |
| has_useless_code_comment = True |
| pre = coutput[:start] + '(' + (USELESS_CODE_COMMENT if has_useless_code_comment else '') + 'function(global,env,buffer) {\n' + pre_2[brace:] |
| post = post_1 + end_asm + coutput[end + 1:] |
| |
| with ToolchainProfiler.profile_block('write_pre'): |
| filename += '.jo.js' |
| temp_files.note(filename) |
| f = open(filename, 'w') |
| f.write(pre) |
| pre = None |
| |
| with ToolchainProfiler.profile_block('sort_or_concat'): |
| if not just_concat: |
| # sort functions by size, to make diffing easier and to improve aot times |
| funcses = [] |
| for out_file in filenames: |
| funcses.append(split_funcs(utils.read_file(out_file), False)) |
| funcs = [item for sublist in funcses for item in sublist] |
| funcses = None |
| if not os.environ.get('EMCC_NO_OPT_SORT'): |
| funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True) |
| |
| if 'last' in passes and len(funcs): |
| count = funcs[0][1].count('\n') |
| if count > 3000: |
| print('warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz)' % (count, funcs[0][0]), file=sys.stderr) |
| |
| for func in funcs: |
| f.write(func[1]) |
| funcs = None |
| else: |
| # just concat the outputs |
| for out_file in filenames: |
| f.write(utils.read_file(out_file)) |
| |
| with ToolchainProfiler.profile_block('write_post'): |
| f.write('\n') |
| f.write(post) |
| # No need to write suffix: if there was one, it is inside post which exists when suffix is there |
| f.write('\n') |
| f.close() |
| |
| return filename |
| |
| |
| @ToolchainProfiler.profile_block('js_optimizer.run_on_js') |
| def run(filename, passes, extra_info=None): |
| just_split = 'receiveJSON' in passes |
| just_concat = 'emitJSON' in passes |
| return run_on_js(filename, passes, extra_info=extra_info, just_split=just_split, just_concat=just_concat) |
| |
| |
| def main(): |
| last = sys.argv[-1] |
| if '{' in last: |
| extra_info = json.loads(last) |
| sys.argv = sys.argv[:-1] |
| else: |
| extra_info = None |
| out = run(sys.argv[1], sys.argv[2:], extra_info=extra_info) |
| shutil.copyfile(out, sys.argv[1] + '.jsopt.js') |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |