blob: 3ed85b926976bc4e9077fc42ab5023c6768a2b6c [file] [log] [blame] [edit]
/*
* Copyright 2017 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Tries to reduce the input wasm into the smallest possible wasm
// that still generates the same result on a given command. This is
// useful to reduce bug testcases, for example, if a file crashes
// the optimizer, you can reduce it to find the smallest file that
// also crashes it (which generally will show the same bug, in a
// much more debuggable manner).
//
#include <cstdio>
#include <cstdlib>
#include "reducer.h"
#include "support/colors.h"
#include "support/command-line.h"
#include "support/file.h"
#include "support/path.h"
#include "tools/tool-options.h"
#include "wasm-io.h"
#include "tools/wasm-reduce/reducer.h"
namespace wasm {
// A timeout on every execution of the command.
size_t timeout = 2;
// Whether to save all intermediate working files as we go.
bool saveAllWorkingFiles = false;
// A string of feature flags and other things to pass while reducing. The
// default of enabling all features should work in most cases.
std::string extraFlags = "-all";
ProgramResult expected;
// Removing functions is extremely beneficial and efficient. We aggressively
// try to remove functions, unless we've seen they can't be removed, in which
// case we may try again but much later.
std::unordered_set<Name> functionsWeTriedToRemove;
// The index of the working file we save, when saveAllWorkingFiles. We must
// store this globally so that the difference instances of Reducer do not
// overlap.
size_t workingFileIndex = 0;
} // namespace wasm
using namespace wasm;
int main(int argc, const char* argv[]) {
std::string input, test, working, command;
// By default, look for binaries alongside our own binary.
std::string binDir = Path::getDirName(argv[0]);
bool binary = true, deNan = false, verbose = false, debugInfo = false,
force = false;
const std::string WasmReduceOption = "wasm-reduce options";
ToolOptions options(
"wasm-reduce",
R"(Reduce a wasm file to a smaller one with the same behavior on a given command.
Typical usage:
wasm-reduce orig.wasm '--command=bash a.sh' --test t.wasm --working w.wasm
The original file orig.wasm is where we begin. We then repeatedly test a small
reduction of it by writing that modification to the 'test file' (specified by
'--test'), and we run the command, in this example 'bash a.sh'. That command
should use the test file (and not the original file or any other one). Whenever
the reduction works, we write that new smaller file to the 'working file'
(specified by '--working'). The reduction 'works' if it correctly preserves the
behavior of the command on the original input, specifically, that it has the
same stdout and the result return code. Each time reduction works we continue to
reduce from that point (and each time it fails, we go back and try something
else).
As mentioned above, the command should run on the test file. That is, the first
thing that wasm-reduce does on the example above is, effectively,
cp orig.wasm t.wasm
bash a.sh
In other words, it copies the original to the test file, and runs the command.
Whatever the command does, we will preserve as we copy progressively smaller
files to t.wasm. As we make progress, the smallest file will be written to
the working file, w.wasm, and when reduction is done you will find the final
result there.
Comparison to creduce:
1. creduce requires the command to return 0. wasm-reduce is often used to reduce
crashes, which have non-zero return codes, so it is natural to allow any
return code. As mentioned above, we preserve the return code as we reduce.
2. creduce ignores stdout. wasm-reduce preserves stdout as it reduces, as part
of the principle of preserving the original behavior of the command (if your
stdout varies in uninteresting ways, your command can be a script that runs
the real command and captures stdout to /dev/null, or filters it).
3. creduce tramples the original input file as it reduces. wasm-reduce never
modifies the input (to avoid mistakes that cause data loss). Instead,
when reductions work we write to the 'working file' as mentioned above, and
the final reduction will be there.
4. creduce runs the command in a temp directory. That is safer in general, but
it is not how the original command ran, and in particular forces additional
work if you have multiple files (which, for wasm-reduce, is common, e.g. if
the testcase is a combination of JavaScript and wasm). wasm-reduce runs the
command in the current directory (of course, your command can be a script
that changes directory to anywhere else).
More documentation can be found at
https://github.com/WebAssembly/binaryen/wiki/Fuzzing#reducing
)");
options
.add("--command",
"-cmd",
"The command to run on the test, that we want to reduce while keeping "
"the command's output identical. "
"We look at the command's return code and stdout here (TODO: stderr), "
"and we reduce while keeping those unchanged.",
WasmReduceOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) { command = argument; })
.add("--test",
"-t",
"Test file (this will be written to test, the given command should "
"read it when we call it)",
WasmReduceOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) { test = argument; })
.add("--working",
"-w",
"Working file (this will contain the current good state while doing "
"temporary computations, "
"and will contain the final best result at the end)",
WasmReduceOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) { working = argument; })
.add("--binaries",
"-b",
"binaryen binaries location (bin/ directory)",
WasmReduceOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) {
// Add separator just in case
binDir = argument + Path::getPathSeparator();
})
.add("--text",
"-S",
"Emit intermediate files as text, instead of binary (also make sure "
"the test and working files have a .wat or .wast suffix)",
WasmReduceOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { binary = false; })
.add("--denan",
"",
"Avoid nans when reducing",
WasmReduceOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { deNan = true; })
.add("--verbose",
"-v",
"Verbose output mode",
WasmReduceOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { verbose = true; })
.add("--debugInfo",
"-g",
"Keep debug info in binaries",
WasmReduceOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { debugInfo = true; })
.add("--force",
"-f",
"Force the reduction attempt, ignoring problems that imply it is "
"unlikely to succeed",
WasmReduceOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { force = true; })
.add("--timeout",
"-to",
"A timeout to apply to each execution of the command, in seconds "
"(default: 2)",
WasmReduceOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) {
timeout = atoi(argument.c_str());
std::cout << "|applying timeout: " << timeout << "\n";
})
.add("--extra-flags",
"-ef",
"Extra commandline flags to pass to wasm-opt while reducing. "
"(default: --enable-all)",
WasmReduceOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) {
extraFlags = argument;
std::cout << "|applying extraFlags: " << extraFlags << "\n";
})
.add("--save-all-working",
"-saw",
"Save all intermediate working files, as $WORKING.0, .1, .2 etc",
WasmReduceOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) {
saveAllWorkingFiles = true;
std::cout << "|saving all intermediate working files\n";
})
.add_positional(
"INFILE",
Options::Arguments::One,
[&](Options* o, const std::string& argument) { input = argument; });
options.parse(argc, argv);
if (debugInfo) {
extraFlags += " -g ";
}
if (test.size() == 0) {
Fatal() << "test file not provided\n";
}
if (working.size() == 0) {
Fatal() << "working file not provided\n";
}
if (!binary) {
Colors::setEnabled(false);
}
Path::setBinaryenBinDir(binDir);
std::cerr << "|wasm-reduce\n";
std::cerr << "|input: " << input << '\n';
std::cerr << "|test: " << test << '\n';
std::cerr << "|working: " << working << '\n';
std::cerr << "|bin dir: " << binDir << '\n';
std::cerr << "|extra flags: " << extraFlags << '\n';
// get the expected output
copy_file(input, test);
expected.getFromExecution(command);
std::cerr << "|expected result:\n" << expected << '\n';
std::cerr << "|!! Make sure the above is what you expect! !!\n\n";
auto stopIfNotForced = [&](std::string message, ProgramResult& result) {
std::cerr << "|! " << message << '\n' << result << '\n';
if (!force) {
Fatal() << "|! stopping, as it is very unlikely reduction can succeed "
"(use -f to ignore this check)";
}
};
if (expected.time + 1 >= timeout) {
stopIfNotForced("execution time is dangerously close to the timeout - you "
"should probably increase the timeout",
expected);
}
if (!force) {
std::cerr << "|checking that command has different behavior on different "
"inputs (this "
"verifies that the test file is used by the command)\n";
// Try it on an invalid input.
{
std::ofstream dst(test, std::ios::binary);
dst << "waka waka\n";
}
ProgramResult resultOnInvalid(command);
if (resultOnInvalid == expected) {
// Try it on a valid input.
Module emptyModule;
ModuleWriter writer(options.passOptions);
writer.setBinary(true);
writer.write(emptyModule, test);
ProgramResult resultOnValid(command);
if (resultOnValid == expected) {
Fatal()
<< "running the command on the given input gives the same result as "
"when running it on either a trivial valid wasm or a file with "
"nonsense in it. does the script not look at the test file (" +
test + ")? (use -f to ignore this check)";
}
}
}
std::cerr << "|checking that command has expected behavior on canonicalized "
"(read-written) binary\n";
{
// read and write it
auto cmd = Path::getBinaryenBinaryTool("wasm-opt") + " " + input + " -o " +
test + " " + extraFlags;
if (!binary) {
cmd += " -S ";
}
ProgramResult readWrite(cmd);
if (readWrite.failed()) {
stopIfNotForced("failed to read and write the binary", readWrite);
} else {
ProgramResult result(command);
if (result != expected) {
stopIfNotForced("running command on the canonicalized module should "
"give the same results",
result);
}
}
}
copy_file(input, working);
auto workingSize = file_size(working);
std::cerr << "|input size: " << workingSize << "\n";
std::cerr << "|starting reduction!\n";
int factor = binary ? workingSize * 2 : workingSize / 10;
size_t lastDestructiveReductions = 0;
size_t lastPostPassesSize = 0;
bool stopping = false;
while (1) {
Reducer reducer(
command, test, working, binary, deNan, verbose, debugInfo, options);
// run binaryen optimization passes to reduce. passes are fast to run
// and can often reduce large amounts of code efficiently, as opposed
// to detructive reduction (i.e., that doesn't preserve correctness as
// passes do) since destrucive must operate one change at a time
std::cerr << "| reduce using passes...\n";
auto oldSize = file_size(working);
reducer.reduceUsingPasses();
auto newSize = file_size(working);
auto passProgress = oldSize - newSize;
std::cerr << "| after pass reduction: " << newSize << "\n";
// always stop after a pass reduction attempt, for final cleanup
if (stopping) {
break;
}
// check if the full cycle (destructive/passes) has helped or not
if (lastPostPassesSize && newSize >= lastPostPassesSize) {
std::cerr << "| progress has stopped, skipping to the end\n";
if (factor == 1) {
// this is after doing work with factor 1, so after the remaining work,
// stop
stopping = true;
} else {
// decrease the factor quickly
factor = (factor + 1) / 2; // stable on 1
}
}
lastPostPassesSize = newSize;
// If destructive reductions lead to useful proportionate pass reductions,
// keep going at the same factor, as pass reductions are far faster.
std::cerr << "| pass progress: " << passProgress
<< ", last destructive: " << lastDestructiveReductions << '\n';
if (passProgress >= 4 * lastDestructiveReductions) {
std::cerr << "| progress is good, do not quickly decrease factor\n";
// While the amount of pass reductions is proportionately high, we do
// still want to reduce the factor by some amount. If we do not then there
// is a risk that both pass and destructive reductions are very low, and
// we get "stuck" cycling through them. In that case we simply need to do
// more destructive reductions to make real progress. For that reason,
// decrease the factor by some small percentage.
factor = std::max(1, (factor * 9) / 10);
} else {
if (factor > 10) {
factor = (factor / 3) + 1;
} else {
factor = (factor + 1) / 2; // stable on 1
}
}
// no point in a factor lorger than the size
assert(newSize > 4); // wasm modules are >4 bytes anyhow
factor = std::min(factor, int(newSize) / 4);
// try to reduce destructively. if a high factor fails to find anything,
// quickly try a lower one (no point in doing passes until we reduce
// destructively at least a little)
while (1) {
std::cerr << "| reduce destructively... (factor: " << factor << ")\n";
lastDestructiveReductions = reducer.reduceDestructively(factor);
if (lastDestructiveReductions > 0) {
break;
}
// we failed to reduce destructively
if (factor == 1) {
stopping = true;
break;
}
factor = std::max(
1, factor / 4); // quickly now, try to find *something* we can reduce
}
std::cerr << "| destructive reduction led to size: " << file_size(working)
<< '\n';
}
std::cerr << "|finished, final size: " << file_size(working) << "\n";
copy_file(working, test); // just to avoid confusion
}