blob: 7ecdbf8449db9234caf3dd2323b35058b1f0b3df [file] [log] [blame] [edit]
# Copyright 2016 The Emscripten Authors. All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License. Both these licenses can be
# found in the LICENSE file.
from __future__ import print_function
import os
import sys
import subprocess
import re
import json
import shutil
import tempfile
import logging
import traceback
sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from tools import shared
from tools.js_optimizer import DEBUG, temp_files, start_funcs_marker, end_funcs_marker, split_funcs, start_asm_marker, end_asm_marker
from tools.js_optimizer import MIN_CHUNK_SIZE, MAX_CHUNK_SIZE, NUM_CHUNKS_PER_CORE
DUPLICATE_FUNCTION_ELIMINATOR = shared.path_from_root('tools', 'eliminate-duplicate-functions.js')
def process_shell(js_engine, shell, equivalentfn_hash_info=None):
suffix = '.eliminatedupes'
with temp_files.get_file(suffix + '.js') as temp_file:
with open(temp_file, 'w') as f:
f.write(shell)
f.write('\n')
f.write(equivalentfn_hash_info)
proc = shared.run_process(
js_engine +
[DUPLICATE_FUNCTION_ELIMINATOR, temp_file, '--use-hash-info', '--no-minimize-whitespace'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
assert len(proc.stdout)
assert len(proc.stderr) == 0
return proc.stdout
def run_on_chunk(command):
try:
file_suffix = '.js'
index = command.index(DUPLICATE_FUNCTION_ELIMINATOR)
filename = command[index + 1]
if '--gen-hash-info' in command:
file_suffix = '.json'
if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0':
saved = 'save_' + os.path.basename(filename)
while os.path.exists(saved):
saved = 'input' + str(int(saved.replace('input', '').replace('.txt', '')) + 1) + '.txt'
print('running DFE command', ' '.join([c if c != filename else saved for c in command]), file=sys.stderr)
shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved))
if shared.EM_BUILD_VERBOSE >= 3:
print('run_on_chunk: ' + str(command), file=sys.stderr)
proc = shared.run_process(command, stdout=subprocess.PIPE)
output = proc.stdout
assert proc.returncode == 0, 'Error in optimizer (return code ' + str(proc.returncode) + '): ' + output
assert len(output) and not output.startswith('Assertion failed'), 'Error in optimizer: ' + output
filename = temp_files.get(os.path.basename(filename) + '.dfjo' + file_suffix).name
with open(filename, 'w') as f:
f.write(output)
if DEBUG and not shared.WINDOWS:
print('.', file=sys.stderr) # Skip debug progress indicator on Windows, since it doesn't buffer well with multiple threads printing to console.
return filename
except KeyboardInterrupt:
# avoid throwing keyboard interrupts from a child process
raise Exception()
except (TypeError, ValueError):
formatted_lines = traceback.format_exc().splitlines()
print(">>>>>>>>>>>>>>>>>", file=sys.stderr)
for formatted_line in formatted_lines:
print(formatted_line, file=sys.stderr)
print("<<<<<<<<<<<<<<<<<", file=sys.stderr)
raise
def dump_equivalent_functions(passed_in_filename, global_data):
# Represents the sets of equivalent functions for the passed in filename
equivalent_fn_info = {}
equivalent_fn_json_file = passed_in_filename + ".equivalent_functions.json"
# If we are running more than one pass, then we want to merge
# all the hash infos into one
if os.path.isfile(equivalent_fn_json_file):
print("Merging data from current pass for {} into {}".format(passed_in_filename, equivalent_fn_json_file), file=sys.stderr)
with open(equivalent_fn_json_file) as data_file:
equivalent_fn_info = json.load(data_file)
else:
print("Writing equivalent functions for {} to {}".format(passed_in_filename, equivalent_fn_json_file), file=sys.stderr)
# Merge the global data's fn_hash_to_fn_name structure into
# the equivalent function info hash.
for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].items():
if fn_hash not in equivalent_fn_info:
# Exclude single item arrays as they are of no use to us.
if len(fn_names) > 1:
equivalent_fn_info[fn_hash] = fn_names[:]
else:
for fn_name in fn_names:
if fn_name not in equivalent_fn_info[fn_hash]:
equivalent_fn_info[fn_hash].append(fn_name)
with open(equivalent_fn_json_file, 'w') as fout:
fout.write(json.dumps(equivalent_fn_info))
def write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename):
# Represents the aggregated info for all the json files passed in
# Each json file contains info for one of the processed chunks
global_data = {}
global_data['fn_hash_to_fn_name'] = {}
global_data['fn_hash_to_fn_body'] = {}
global_data['variable_names'] = {}
for json_file in json_files:
with open(json_file) as data_file:
data = json.load(data_file)
# Merge the data's fn_hash_to_fn_name structure into
# the global data hash.
for fn_hash, fn_names in data['fn_hash_to_fn_name'].items():
if fn_hash not in global_data['fn_hash_to_fn_name']:
global_data['fn_hash_to_fn_name'][fn_hash] = fn_names[:]
global_data['fn_hash_to_fn_body'][fn_hash] = data['fn_hash_to_fn_body'][fn_hash]
else:
assert(data['fn_hash_to_fn_body'][fn_hash] == global_data['fn_hash_to_fn_body'][fn_hash])
for fn_name in fn_names:
if fn_name not in global_data['fn_hash_to_fn_name'][fn_hash]:
global_data['fn_hash_to_fn_name'][fn_hash].append(fn_name)
# Merge the data's variable_names structure into
# the global data hash.
for variable, value in data['variable_names'].items():
if variable not in global_data['variable_names']:
global_data['variable_names'][variable] = value
variable_names = global_data['variable_names']
# Lets generate the equivalent function hash from the global data set
equivalent_fn_hash = {}
for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].items():
shortest_fn = None
for fn_name in fn_names:
if (fn_name not in variable_names) and (shortest_fn is None or (len(fn_name) < len(shortest_fn))):
shortest_fn = fn_name
if shortest_fn is not None:
for fn_name in fn_names:
if fn_name not in variable_names and fn_name != shortest_fn:
equivalent_fn_hash[fn_name] = shortest_fn
# Dump the sets of equivalent functions if the user desires it
# This comes in handy for debugging
if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS:
dump_equivalent_functions(passed_in_filename, global_data)
# Now write the equivalent function hash to the last line of the file
f.write('// ' + json.dumps(equivalent_fn_hash, separators=(',', ':')))
# gen_hash_info is used to determine whether we are generating
# the global set of function implementation hashes. If set to
# False, we assume that we have to use the global hash info to
# reduce the set of duplicate functions
# Returns the filename of the processed JS file, which is expected to be
# deleted by the caller once done.
def run_on_js(filename, gen_hash_info=False):
js_engine = shared.NODE_JS
js = open(filename).read()
if os.linesep != '\n':
js = js.replace(os.linesep, '\n') # we assume \n in the splitting code
equivalentfn_hash_info = None
passed_in_filename = filename
# Find markers
start_funcs = js.find(start_funcs_marker)
end_funcs = js.rfind(end_funcs_marker)
if start_funcs < 0 or end_funcs < start_funcs:
logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs))
sys.exit(1)
if not gen_hash_info:
equivalentfn_hash_info = js[js.rfind('//'):]
start_asm = js.find(start_asm_marker)
end_asm = js.rfind(end_asm_marker)
assert (start_asm >= 0) == (end_asm >= 0)
# We need to split out the asm shell as well, for minification
pre = js[:start_asm + len(start_asm_marker)]
post = js[end_asm:]
asm_shell_pre = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)]
# Prevent "uglify" from turning 0.0 into 0 in variables' initialization. To do this we first replace 0.0 with
# ZERO$DOT$ZERO and then replace it back.
asm_shell_pre = re.sub(r'(\S+\s*=\s*)0\.0', r'\1ZERO$DOT$ZERO', asm_shell_pre)
asm_shell_post = js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)]
asm_shell = asm_shell_pre + '\nEMSCRIPTEN_FUNCS();\n' + asm_shell_post
js = js[start_funcs + len(start_funcs_marker):end_funcs]
# we assume there is a maximum of one new name per line
asm_shell_pre, asm_shell_post = process_shell(js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();')
asm_shell_pre = re.sub(r'(\S+\s*=\s*)ZERO\$DOT\$ZERO', r'\g<1>0.0', asm_shell_pre)
asm_shell_post = asm_shell_post.replace('});', '})')
pre += asm_shell_pre + '\n' + start_funcs_marker
post = end_funcs_marker + asm_shell_post + post
if not gen_hash_info:
# We don't need the extra info at the end
post = post[:post.rfind('//')].strip()
else:
pre = js[:start_funcs + len(start_funcs_marker)]
post = js[end_funcs + len(end_funcs_marker):]
js = js[start_funcs + len(start_funcs_marker):end_funcs]
post = end_funcs_marker + post
total_size = len(js)
funcs = split_funcs(js, False)
js = None
# if we are making source maps, we want our debug numbering to start from the
# top of the file, so avoid breaking the JS into chunks
cores = shared.Building.get_num_cores()
intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
chunks = shared.chunkify(funcs, chunk_size)
chunks = [chunk for chunk in chunks if len(chunk)]
if DEBUG and len(chunks):
print('chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)), file=sys.stderr)
funcs = None
if len(chunks):
def write_chunk(chunk, i):
temp_file = temp_files.get('.jsfunc_%d.js' % i).name
with open(temp_file, 'w') as f:
f.write(chunk)
if not gen_hash_info:
f.write('\n')
f.write(equivalentfn_hash_info)
return temp_file
filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))]
else:
filenames = []
old_filenames = filenames[:]
if len(filenames):
commands = [js_engine + [DUPLICATE_FUNCTION_ELIMINATOR, f, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace'] for f in filenames]
if DEBUG and commands is not None:
print([' '.join(command if command is not None else '(null)') for command in commands], file=sys.stderr)
cores = min(cores, len(filenames))
if len(chunks) > 1 and cores >= 2:
# We can parallelize
if DEBUG:
print('splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size / (1024 * 1024.)), file=sys.stderr)
pool = shared.Building.get_multiprocessing_pool()
filenames = pool.map(run_on_chunk, commands, chunksize=1)
else:
# We can't parallize, but still break into chunks to avoid uglify/node memory issues
if len(chunks) > 1 and DEBUG:
print('splitting up js optimization into %d chunks' % (len(chunks)), file=sys.stderr)
filenames = [run_on_chunk(command) for command in commands]
else:
filenames = []
# we create temp files in the child threads, clean them up here when we are done
for filename in filenames:
temp_files.note(filename)
json_files = []
# We're going to be coalescing the files back at the end
# Just replace the file list with the ones provided in
# the command list - and save off the generated Json
if gen_hash_info:
json_files = filenames[:]
filenames = old_filenames[:]
for filename in filenames:
temp_files.note(filename)
filename += '.jo.js'
f = open(filename, 'w')
f.write(pre)
pre = None
# sort functions by size, to make diffing easier and to improve aot times
funcses = []
for out_file in filenames:
funcses.append(split_funcs(open(out_file).read(), False))
funcs = [item for sublist in funcses for item in sublist]
funcses = None
if not os.environ.get('EMCC_NO_OPT_SORT'):
funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True)
for func in funcs:
f.write(func[1])
funcs = None
f.write('\n')
f.write(post)
# No need to write suffix: if there was one, it is inside post which exists when suffix is there
f.write('\n')
if gen_hash_info and len(json_files):
write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename)
f.close()
return filename
def save_temp_file(file_to_process):
if os.environ.get('EMSCRIPTEN_SAVE_TEMP_FILES') and os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR'):
destinationFile = file_to_process
temp_dir_name = tempfile.gettempdir()
destinationFile = destinationFile.replace(temp_dir_name, os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR'))
if not os.path.exists(os.path.dirname(destinationFile)):
os.makedirs(os.path.dirname(destinationFile))
print("Copying {} to {}".format(file_to_process, destinationFile), file=sys.stderr)
shutil.copyfile(file_to_process, destinationFile)
def get_func_names(javascript_file):
func_names = []
start_tok = "// EMSCRIPTEN_START_FUNCS"
end_tok = "// EMSCRIPTEN_END_FUNCS"
start_off = 0
end_off = 0
with open(javascript_file, 'rt') as fin:
blob = "".join(fin.readlines())
start_off = blob.find(start_tok) + len(start_tok)
end_off = blob.find(end_tok)
asm_chunk = blob[start_off:end_off]
for match in re.finditer(r'function (\S+?)\s*\(', asm_chunk):
func_names.append(match.groups(1)[0])
return func_names
def eliminate_duplicate_funcs(file_name):
if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0:
# Remove previous log file if it exists
equivalent_fn_json_file = file_name + ".equivalent_functions.json"
if os.path.isfile(equivalent_fn_json_file):
print("Deleting old json: " + equivalent_fn_json_file, file=sys.stderr)
os.remove(equivalent_fn_json_file)
old_funcs = get_func_names(file_name)
for pass_num in range(shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_PASSES):
if DEBUG:
print("[PASS {}]: eliminating duplicate functions in: {}.".format(pass_num, file_name), file=sys.stderr)
# Generate the JSON for the equivalent hash first
processed_file = run_on_js(filename=file_name, gen_hash_info=True)
try:
save_temp_file(processed_file)
# Use the hash to reduce the JS file
final_file = run_on_js(filename=processed_file, gen_hash_info=False)
finally:
os.remove(processed_file)
save_temp_file(final_file)
shared.safe_move(final_file, file_name)
if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0:
new_funcs = get_func_names(file_name)
eliminated_funcs_file = file_name + ".eliminated_functions.json"
print("Writing eliminated functions to file: {}".format(eliminated_funcs_file), file=sys.stderr)
with open(eliminated_funcs_file, 'w') as fout:
eliminated_functions = list(set(old_funcs) - set(new_funcs))
eliminated_functions.sort()
for eliminated_function in eliminated_functions:
fout.write('{}\n'.format(eliminated_function))
def run(filename, js_engine=shared.NODE_JS):
js_engine = shared.listify(js_engine)
return temp_files.run_and_clean(lambda: eliminate_duplicate_funcs(filename))
if __name__ == '__main__':
run(sys.argv[1], sys.argv[2:])
sys.exit(0)