blob: ae694fc2b34998a92db2e6afffd65e8adb4e6fb6 [file] [log] [blame]
// Copyright 2016 The Emscripten Authors. All rights reserved.
// Emscripten is available under two separate licenses, the MIT license and the
// University of Illinois/NCSA Open Source License. Both these licenses can be
// found in the LICENSE file.
//
// Duplicate Function Elimination.
//
// This is a Javascript file that is used to post-process an Emscripten
// transpiled JS file. It will remove all the duplicate functions from the
// generated ASM. In its current form, the input JS file is expected to be a
// 'chunk' from an Emscripten generated ASM.JS file.
//
// An ASM JS chunk consists of a number of ASM.JS function definitions. It can
// also represent the ASM JS 'shell' which consists of the global variable
// declarations for the generated ASM JS.
//
// The file will remove all the generated functions that are deemed to be
// identical. Currently, the file will only run one pass of the algorithm. The
// caller of this JS file can run multiple passes to ensure that higher level
// functions which will become identical after a pass can be further eliminated.
//
// Usually, 4 or at most 5 passes will result in an optimal reduction - i.e., in
// a file that cannot be reduced any further.
var crypto = require('crypto');
var uglify = require('../tools/eliminator/node_modules/uglify-js');
var nodeFS = require('fs');
var nodePath = require('path');
var debug = false;
var debugFile = undefined;
var debugFileName = 'function_eliminator.log';
var genHashInfo = false;
var useHashInfo = false;
var useAsmAst = false;
// Variables that helps control verbosity of debug spew
// Set appropriate zones here (to 0 or 1) for debugging various
// parts of the algorithm.
var ZONE_IDENTIFY_DUPLICATE_FUNCS = 1;
var ZONE_REPLACE_FUNCTION_REFERENCES = 1;
var ZONE_REPLACE_DUPLICATE_FUNCS = 1;
var ZONE_EQUIVALENT_FUNCTION_HASH = 1;
var ZONE_TOP_LEVEL = 1;
var ZONE_DUMP_AST = 0;
if (!nodeFS.existsSync) {
nodeFS.existsSync = function(path) {
try {
return !!nodeFS.readFileSync(path);
} catch (e) {
return false;
}
}
}
function srcToAst(src) {
return uglify.parser.parse(src, false, false);
}
function astToSrc(ast, minifyWhitespace) {
return uglify.uglify.gen_code(ast, {
debug: debug,
ascii_only: true,
beautify: !minifyWhitespace,
indent_level: 1
});
}
// Traverses the children of a node. If the traverse function returns an object,
// replaces the child. If it returns true, stop the traversal and return true.
function traverseChildren(node, traverse, pre, post) {
for (var i = 0; i < node.length; i++) {
var subnode = node[i];
if (Array.isArray(subnode)) {
var subresult = traverse(subnode, pre, post);
if (subresult === true) return true;
if (subresult !== null && typeof subresult === 'object') node[i] = subresult;
}
}
}
print = function(x) {
process['stdout'].write(x + '\n');
};
printErr = function(x) {
process['stderr'].write(x + '\n');
};
function debugLog(zone, str) {
if (debug && (zone !== 0)) {
nodeFS.writeSync(debugFile, str + '\n');
}
}
// Traverses a JavaScript syntax tree rooted at the given node calling the given
// callback for each node.
// @arg node: The root of the AST.
// @arg pre: The pre to call for each node. This will be called with
// the node as the first argument and its type as the second. If true is
// returned, the traversal is stopped. If an object is returned,
// it replaces the passed node in the tree. If null is returned, we stop
// traversing the subelements (but continue otherwise).
// @arg post: A callback to call after traversing all children.
// @returns: If the root node was replaced, the new root node. If the traversal
// was stopped, true. Otherwise undefined.
function traverse(node, pre, post) {
var type = node[0],
result, len;
var relevant = typeof type === 'string';
if (relevant) {
var result = pre(node, type);
if (result === true) return true;
if (result && result !== null) node = result; // Continue processing on this node
}
if (result !== null) {
if (traverseChildren(node, traverse, pre, post) === true) return true;
}
if (relevant) {
if (post) {
var postResult = post(node, type);
result = result || postResult;
}
}
return result;
}
function dumpAst(ast) {
debugLog(ZONE_DUMP_AST, JSON.stringify(ast, null, ' '));
}
function getFunctionBody(node) {
// Remove the function <name> part of the source for the function
var functionSrc = astToSrc(node, true);
var functionNameRegex = /(function .*?)\(/;
return functionSrc.replace(functionNameRegex, "(");
}
function traverseFunctions(ast, callback) {
var topLevelList = useAsmAst ? ast : ast[1];
for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) {
var node = topLevelList[listIndex];
if (node[0] === 'defun') {
callback(node);
}
}
}
function identifyDuplicateFunctions(ast) {
debugLog(ZONE_TOP_LEVEL, "identifyDuplicateFunctions");
var functionHashToFunctionName = {};
traverseFunctions(ast, function(node) {
debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Node: " + node);
var functionBody = getFunctionBody(node);
debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Function Body: " + functionBody + "\n");
var functionHash = crypto.createHash('sha256').update(functionBody).digest('hex');
if (functionHashToFunctionName[functionHash] === undefined) {
functionHashToFunctionName[functionHash] = [];
}
debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, typeof node[1]);
functionHashToFunctionName[functionHash].push(node[1]);
debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, functionHash + '->' + node[1]);
});
if (debug) {
for (var key in functionHashToFunctionName) {
debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, key + "->" + functionHashToFunctionName[key]);
}
}
return functionHashToFunctionName;
}
function getVariableNames(ast) {
var variableNames = {};
traverse(ast, function(node, type) {
if (type === 'var') {
var vars = node[1];
if (Array.isArray(vars)) {
for (var i = 0; i < vars.length; i++) {
var ident = vars[i][0];
variableNames[ident] = 1;
}
}
}
});
return variableNames;
}
function replaceFunctionDefinitions(ast, equivalentFunctionHash) {
debugLog(ZONE_TOP_LEVEL, 'replaceFunctionDefinitions');
var topLevelList = useAsmAst ? ast : ast[1];
var indicesToRemove = [];
for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) {
var node = topLevelList[listIndex];
if (node[0] === 'defun' && equivalentFunctionHash[node[1]] !== undefined) {
indicesToRemove.push(listIndex);
}
}
if (indicesToRemove.length > 0) {
for (var i = indicesToRemove.length - 1; i >= 0; --i) {
debugLog(ZONE_REPLACE_DUPLICATE_FUNCS, "Removing " + topLevelList[indicesToRemove[i]][1]);
topLevelList.splice(indicesToRemove[i], 1);
}
}
}
function replaceFunctionReferences(ast, equivalentFunctionHash) {
debugLog(ZONE_TOP_LEVEL, 'replaceFunctionReferences');
traverse(ast, function(node, type) {
if (type === 'call') {
var functionName = node[1][1];
// Replace the call with a call to the equivalent function if there is one
if (equivalentFunctionHash[functionName] !== undefined) {
node[1][1] = equivalentFunctionHash[functionName];
}
} else if (type === 'var') {
var vars = node[1];
for (var i = 0; i < vars.length; i++) {
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable: ' + vars[i]);
var value = vars[i][1][1];
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value: ' + value);
if (equivalentFunctionHash[value] !== undefined) {
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value replacement: ' + equivalentFunctionHash[value]);
vars[i][1][1] = equivalentFunctionHash[value];
}
}
} else if (type === 'assign') {
if (node[3][0] === 'name' && equivalentFunctionHash[node[3][1]] !== undefined) {
node[3][1] = equivalentFunctionHash[node[3][1]];
}
} else if (type === 'object') {
var assignments = node[1];
for (var i = 0; i < assignments.length; i++) {
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Object Value Assignment: ' + assignments[i][1][1]);
if (equivalentFunctionHash[assignments[i][1][1]] !== undefined) {
assignments[i][1][1] = equivalentFunctionHash[assignments[i][1][1]];
}
}
} else if (type === 'array') {
var arrayVars = node[1];
if (Array.isArray(arrayVars)) {
for (var i = 0; i < arrayVars.length; i++) {
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Array: " + arrayVars[i][0] + ", " + arrayVars[i][1]);
// First element contains type, 2nd contains value
if (arrayVars[i][0] == 'name' && equivalentFunctionHash[arrayVars[i][1]] !== undefined) {
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Replacing array value " + arrayVars[i][1]);
arrayVars[i][1] = equivalentFunctionHash[arrayVars[i][1]];
}
}
} else {
debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "ArrayVars (not an array): " + arrayVars + ", node: " + node);
}
}
});
}
function replaceDuplicateFuncs(ast, equivalentFunctionHash) {
debugLog(ZONE_TOP_LEVEL, "replaceDuplicateFuncs");
// Replace references to all functions with their equivalent function
replaceFunctionReferences(ast, equivalentFunctionHash);
// Now lets replace the function definitions
replaceFunctionDefinitions(ast, equivalentFunctionHash);
}
function logEquivalentFunctionHash(equivalentFunctionHash) {
if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) {
debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Function Hash:");
for (var fn in equivalentFunctionHash) {
debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, fn + "->" + equivalentFunctionHash[fn]);
}
}
}
function generateEquivalentFunctionHash(functionHashToFunctionName, variableNames) {
var equivalentFunctionHash = {};
debugLog(ZONE_TOP_LEVEL, "generateEquivalentFunctionHash");
if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) {
debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Functions:");
for (var fnHash in functionHashToFunctionName) {
if (functionHashToFunctionName[fnHash].length > 1) {
debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, JSON.stringify(functionHashToFunctionName[fnHash], null, ' '));
}
}
}
for (var fnHash in functionHashToFunctionName) {
var equivalentFunctions = functionHashToFunctionName[fnHash];
var shortestFunction = undefined;
var equivalentFn = undefined;
// From each list of equivalent functions, pick the
// shortest one that is not also a variable name
for (var index in equivalentFunctions) {
equivalentFn = equivalentFunctions[index];
// If one of the variables is not the same name as the equivalent function,
// and the equivalent function is shorter than the shortest function.
if ((variableNames[equivalentFn] === undefined) &&
(shortestFunction === undefined || equivalentFn.length < shortestFunction.length)) {
shortestFunction = equivalentFn;
}
if (debug && variableNames[equivalentFn] !== undefined) {
debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + " is a variable");
}
}
if (shortestFunction !== undefined) {
// Populate the equivalent function hash with this info
for (var index in equivalentFunctions) {
equivalentFn = equivalentFunctions[index];
// If we're not the shortest function, and
// we are not a variable name
if ((equivalentFn !== shortestFunction) && variableNames[equivalentFn] === undefined) {
equivalentFunctionHash[equivalentFn] = shortestFunction;
debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + "->" + shortestFunction);
}
}
}
}
return equivalentFunctionHash;
}
function getBodyForFunction(ast, functionName) {
var functionBody = undefined;
var topLevelList = ast[1];
for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) {
var node = topLevelList[listIndex];
if (node[0] === 'defun' && node[1] === functionName) {
functionBody = getFunctionBody(node);
break;
}
}
return functionBody;
}
function checkForHashCollisions(ast, functionHashToFunctionName) {
var functionHashToFunctionBody = {};
for (var functionHash in functionHashToFunctionName) {
var equivalentFunctions = functionHashToFunctionName[functionHash];
var functionBody = getBodyForFunction(ast, equivalentFunctions[0]);
functionHashToFunctionBody[functionHash] = functionBody;
// If we have more than one equivalent function, make sure
// that the bodies are the same from the hash values
if (equivalentFunctions.length > 1) {
for (var functionIndex = 1; functionIndex < equivalentFunctions.length; ++functionIndex) {
var curFunctionBody = getBodyForFunction(ast, equivalentFunctions[functionIndex]);
if (curFunctionBody !== functionBody) {
printErr("ERROR!!! Function bodies for two hash-equivalent functions differ!!! Candidates: "
+ equivalentFunctions[0] + ", " + equivalentFunctions[functionIndex]);
process.exit(1);
}
}
}
}
return functionHashToFunctionBody;
}
function eliminateDuplicateFuncs(ast) {
debugLog(ZONE_TOP_LEVEL, "eliminateDuplicateFuncs");
// Phase 1 - identify duplicate functions
var functionHashToFunctionName = identifyDuplicateFunctions(ast);
// Phase 1.1 - Check for hash collisions
checkForHashCollisions(ast, functionHashToFunctionName);
// Phase 2 - identify variables that conflict with function names
var variableNames = getVariableNames(ast);
// Phase 3 - generate the equivalent function hash
var equivalentFunctionHash = generateEquivalentFunctionHash(functionHashToFunctionName, variableNames);
// Phase 4 - for each set of equivalent functions, pick one and
// use it to replace the other equivalent functions.
replaceDuplicateFuncs(ast, equivalentFunctionHash);
return;
}
function find(filename) {
var prefixes = [nodePath.join(__dirname, '..', 'src'), process.cwd()];
for (var i = 0; i < prefixes.length; ++i) {
var combined = nodePath.join(prefixes[i], filename);
if (nodeFS.existsSync(combined)) {
return combined;
}
}
return filename;
}
function findAsmAst(ast) {
var asmNode = undefined;
traverse(ast, function(node, type) {
if (type === 'var') {
var vars = node[1];
for (var i = 0; i < vars.length; i++) {
var ident = vars[i][0];
if (ident === 'asm') {
asmNode = vars[i][1][1][3]; // asm->call->toplevel-ast
}
}
}
});
return asmNode;
}
function printHashInfo(ast) {
debugLog(ZONE_TOP_LEVEL, "printHashInfo");
var infoHash = {};
infoHash['variable_names'] = getVariableNames(ast);
infoHash['fn_hash_to_fn_name'] = identifyDuplicateFunctions(ast);
infoHash['fn_hash_to_fn_body'] = checkForHashCollisions(ast, infoHash['fn_hash_to_fn_name']);
print(JSON.stringify(infoHash));
}
read = function(filename) {
var absolute = find(filename);
return nodeFS['readFileSync'](absolute).toString();
};
// Main
var arguments_ = process['argv'].slice(2);
var noMinimizeWhitespace = false; // Eliminate whitespace by default
var functionName = undefined;
var src = undefined;
for (var argIndex = 0; argIndex < arguments_.length; ++argIndex) {
var arg = arguments_[argIndex];
if (arg === '--debug') {
debug = true;
debugFile = nodeFS.openSync(debugFileName, 'w');
} else if (arg === '--no-minimize-whitespace') {
noMinimizeWhitespace = true;
} else if (arg === '--gen-hash-info') {
genHashInfo = true;
} else if (arg === '--use-hash-info') {
useHashInfo = true;
} else if (arg === '--use-asm-ast') {
useAsmAst = true;
} else if (arg === '--get-function-body') {
if (argIndex === arguments_.length_ - 1) {
throw new Error('Please specify valid arguments!');
}
functionName = arguments_[argIndex+1];
argIndex += 1;
} else if (/^--/.test(arg)) {
throw new Error('Please specify valid arguments!');
} else if (src === undefined) {
src = read(arg);
} else {
throw new Error('Please specify valid arguments!');
}
}
var ast = srcToAst(src);
var asmAst = ast;
if (useAsmAst) {
asmAst = findAsmAst(ast);
}
if (debug) {
dumpAst(ast);
}
if (functionName !== undefined) {
var functionBody = getBodyForFunction(ast, functionName);
if (functionBody === undefined) {
throw new Error('Could not find body for function ' + functionName + '!!!');
}
print(functionBody);
} else if (genHashInfo) {
printHashInfo(asmAst);
} else {
equivalentFunctionHash = {};
if (useHashInfo) {
// The last line has the required info
infoHashJsonStart = src.lastIndexOf("//") + 2 // 2 for going past the //
if (infoHashJsonStart == -1) {
throw new Error('--use-hash-info specified but no JSON found at the end of the file!');
}
equivalentFunctionHash = JSON.parse(src.substring(infoHashJsonStart));
logEquivalentFunctionHash(equivalentFunctionHash);
replaceDuplicateFuncs(asmAst, equivalentFunctionHash);
} else {
eliminateDuplicateFuncs(asmAst);
}
var minimizeWhitespace = (debug || noMinimizeWhitespace) ? false : true;
var js = astToSrc(ast, minimizeWhitespace);
print(js);
}
if (debug && debugFile !== undefined) {
printErr('Wrote debug log to ' + debugFileName);
nodeFS.close(debugFile);
}