Update unescaping logic in `ReadDictionaryFromFile()` to support 2-digit hex codes. `absl::CUnescape()` fails with string such as `"foo\x0Abar"` (one of the [examples from libFuzzer](https://llvm.org/docs/LibFuzzer.html#dictionaries)) or `"\xffc"` (from the [JPEG fuzzing dict](https://github.com/google/fuzzing/blob/master/dictionaries/jpeg.dict)). PiperOrigin-RevId: 624264910
diff --git a/fuzztest/BUILD b/fuzztest/BUILD index af63b6b..fd4eae7 100644 --- a/fuzztest/BUILD +++ b/fuzztest/BUILD
@@ -66,12 +66,24 @@ ":registration", ":registry", "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/strings:string_view", ], ) +cc_test( + name = "fuzztest_macros_test", + srcs = ["fuzztest_macros_test.cc"], + deps = [ + ":fuzztest_macros", + "@com_google_absl//absl/status", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "fuzztest_gtest_main", testonly = 1, @@ -127,15 +139,13 @@ deps = [ ":domain_core", ":fuzztest", + ":fuzztest_macros", ":io", ":llvm_fuzzer_main", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/log:check", "@com_google_absl//absl/random", "@com_google_absl//absl/random:bit_gen_ref", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:string_view", - "@com_googlesource_code_re2//:re2", ], alwayslink = True, )
diff --git a/fuzztest/fuzztest_macros.cc b/fuzztest/fuzztest_macros.cc index c80d43f..ca11706 100644 --- a/fuzztest/fuzztest_macros.cc +++ b/fuzztest/fuzztest_macros.cc
@@ -12,12 +12,56 @@ #include <vector> #include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/escaping.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" namespace fuzztest { +namespace { +absl::StatusOr<std::string> ParseDictionaryEntry(absl::string_view entry) { + // We can't use absl::CUnescape directly on the string, because it assumes hex + // codes can have more than 2 digits, which is not the case here. "\x41BC" is + // a valid entry that should be unescaped to "ABC", but absl::CUnescape will + // fail as it will interpret it as a 4-digit hex code, which does not fit + // into a single byte. + // We unescape "\\", "\"", as well as each 2-digit hex codes (e.g. "\xab"). + std::string parsed_entry; + int i = 0; + while (i < entry.size()) { + if (entry[i] != '\\') { // Handle unescaped character + parsed_entry.push_back(entry[i]); + ++i; + } else if (i + 1 < entry.size() && + (entry[i + 1] == '\\' || + entry[i + 1] == '"')) { // Handle \\ and \" + parsed_entry.push_back(entry[i + 1]); + i += 2; + } else if (i + 3 < entry.size() && + entry[i + 1] == 'x') { // Handle \xHH escape sequence + std::string unescaped_hex; + std::string error; + if (!absl::CUnescape(entry.substr(i, 4), &unescaped_hex, &error)) { + return absl::InvalidArgumentError(absl::StrCat( + "Could not unescape ", entry.substr(i, 4), ": ", error)); + } + if (unescaped_hex.size() != 1) { + return absl::InvalidArgumentError( + absl::StrCat("Could not unescape ", entry.substr(i, 4))); + } + parsed_entry.append(unescaped_hex); + i += 4; + } else { // No other escape sequences are allowed. + return absl::InvalidArgumentError(absl::StrCat( + "Invalid escape sequence in dictionary entry: ", entry.substr(i, 2))); + } + } + return parsed_entry; +} +} // namespace + std::vector<std::tuple<std::string>> ReadFilesFromDirectory( std::string_view dir) { std::vector<std::tuple<std::string>> out; @@ -42,6 +86,41 @@ return out; } +absl::StatusOr<std::vector<std::string>> ParseDictionary( + absl::string_view text) { + std::vector<std::string> parsed_entries; + int line_number = 0; + for (absl::string_view line : absl::StrSplit(text, '\n')) { + ++line_number; + + if (line.empty() || line[0] == '#') continue; + auto first_index = line.find_first_of('"'); + auto last_index = line.find_last_of('"'); + if (last_index == std::string::npos) { + return absl::InvalidArgumentError( + absl::StrCat("Unparseable dictionary entry at line ", line_number, + ": missing quotes")); + } + if (last_index <= first_index) { + return absl::InvalidArgumentError( + absl::StrCat("Unparseable dictionary entry at line ", line_number, + ": entry must be enclosed in quotes")); + } + // Skip characters outside quotations. + const absl::string_view entry = + line.substr(first_index + 1, last_index - first_index - 1); + absl::StatusOr<std::string> parsed_entry = ParseDictionaryEntry(entry); + if (!parsed_entry.ok()) { + return absl::Status( + parsed_entry.status().code(), + absl::StrCat("Unparseable dictionary entry at line ", line_number, + ": ", parsed_entry.status().message())); + } + parsed_entries.emplace_back(std::move(*parsed_entry)); + } + return parsed_entries; +} + std::vector<std::string> ReadDictionaryFromFile( std::string_view dictionary_file) { std::vector<fuzztest::internal::FilePathAndData> files = @@ -52,20 +131,12 @@ // Dictionary must be in the format specified at // https://llvm.org/docs/LibFuzzer.html#dictionaries for (const fuzztest::internal::FilePathAndData& file : files) { - for (absl::string_view line : absl::StrSplit(file.data, '\n')) { - if (line.empty() || line[0] == '#') continue; - auto first_index = line.find_first_of('"'); - auto last_index = line.find_last_of('"'); - CHECK(last_index != std::string::npos && first_index < last_index) - << "Invalid dictionary entry: " << line; - // Skip characters outside quotations. - const absl::string_view entry = - line.substr(first_index + 1, last_index - first_index - 1); - std::string unescaped_entry; - CHECK(absl::CUnescape(entry, &unescaped_entry)) - << "Could not unescape: " << entry; - out.emplace_back(std::move(unescaped_entry)); - } + absl::StatusOr<std::vector<std::string>> parsed_entries = + ParseDictionary(file.data); + CHECK(parsed_entries.status().ok()) + << "Could not parse dictionary file " << file.path << ": " + << parsed_entries.status(); + out.insert(out.end(), parsed_entries->begin(), parsed_entries->end()); } return out; }
diff --git a/fuzztest/fuzztest_macros.h b/fuzztest/fuzztest_macros.h index 8b760d1..fb0f7f5 100644 --- a/fuzztest/fuzztest_macros.h +++ b/fuzztest/fuzztest_macros.h
@@ -22,7 +22,9 @@ #include <vector> // IWYU pragma: begin_exports +#include "absl/status/statusor.h" #include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" #include "./fuzztest/internal/io.h" #include "./fuzztest/internal/registration.h" #include "./fuzztest/internal/registry.h" @@ -126,6 +128,12 @@ std::vector<std::tuple<std::string>> ReadFilesFromDirectory( std::string_view dir); +// Returns parsed dictionary entries from fuzzer dictionary definition in the +// format specified at https://llvm.org/docs/LibFuzzer.html#dictionaries. +// If dictionary is in wrong format, return error status. +absl::StatusOr<std::vector<std::string>> ParseDictionary( + absl::string_view text); + // Reads entries from `dictionary_file` and returns a vector usable by // .WithDictionary(). //
diff --git a/fuzztest/fuzztest_macros_test.cc b/fuzztest/fuzztest_macros_test.cc new file mode 100644 index 0000000..208a661 --- /dev/null +++ b/fuzztest/fuzztest_macros_test.cc
@@ -0,0 +1,129 @@ +#include "./fuzztest/fuzztest_macros.h" + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/status/status.h" + +namespace fuzztest::internal { +namespace { + +using ::testing::ElementsAre; + +TEST(ParseDictionaryTest, Success) { + // Derived from https://llvm.org/docs/LibFuzzer.html#dictionaries + std::string dictionary_content = + R"(# Lines starting with '#' and empty lines are ignored. + +# Adds "blah" (w/o quotes) to the dictionary. +kw1="blah" +# Use \\ for backslash and \" for quotes. +kw2="\"ac\\dc\"" +# Use \xAB for hex values +kw3="\xF7\xF8" +# the name of the keyword followed by '=' may be omitted: +"foo\x0Abar" + +# Null character is unescaped as well +"foo\x00bar" +)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + ASSERT_TRUE(dictionary_entries.ok()); + EXPECT_THAT(*dictionary_entries, + ElementsAre("blah", "\"ac\\dc\"", "\xF7\xF8", "foo\nbar", + std::string("foo\0bar", 7))); +} +TEST(ParseDictionaryTest, FailsWithNoQuote) { + std::string dictionary_content = R"(kw1=world)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + EXPECT_EQ(dictionary_entries.status().code(), + absl::StatusCode::kInvalidArgument); + EXPECT_THAT(dictionary_entries.status().message(), + "Unparseable dictionary entry at line 1: missing quotes"); +} + +TEST(ParseDictionaryTest, FailsWithNoClosingQuote) { + std::string dictionary_content = R"(kw1="world)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + EXPECT_EQ(dictionary_entries.status().code(), + absl::StatusCode::kInvalidArgument); + EXPECT_THAT(dictionary_entries.status().message(), + "Unparseable dictionary entry at line 1: entry must be enclosed " + "in quotes"); +} + +TEST(ParseDictionaryTest, FailsWithInvalidEscapeSequence) { + std::string dictionary_content = R"( +# Valid +kw1="Hello" + +# Invalid +kw2="world\!" +)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + EXPECT_EQ(dictionary_entries.status().code(), + absl::StatusCode::kInvalidArgument); + EXPECT_THAT(dictionary_entries.status().message(), + "Unparseable dictionary entry at line 6: Invalid escape sequence " + "in dictionary entry: \\!"); +} + +TEST(ParseDictionaryTest, FailsWithEmptyHexEscapeSequence) { + std::string dictionary_content = R"( +# Valid +kw1="Hello" + +# Invalid +kw2="world\x" +)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + EXPECT_EQ(dictionary_entries.status().code(), + absl::StatusCode::kInvalidArgument); + EXPECT_THAT(dictionary_entries.status().message(), + "Unparseable dictionary entry at line 6: Invalid escape sequence " + "in dictionary entry: \\x"); +} + +TEST(ParseDictionaryTest, FailsWithHexEscapeSequenceWithSingleDigit) { + std::string dictionary_content = R"( +# Valid +kw1="Hello" + +# Invalid +kw2="world\x2" +)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + EXPECT_EQ(dictionary_entries.status().code(), + absl::StatusCode::kInvalidArgument); + EXPECT_THAT(dictionary_entries.status().message(), + "Unparseable dictionary entry at line 6: Invalid escape sequence " + "in dictionary entry: \\x"); +} + +TEST(ParseDictionaryTest, FailsWithInvalidTwoDigitHexEscapeSequence) { + std::string dictionary_content = R"( +# Valid +kw1="Hello" + +# Invalid +kw2="world\x5g" +)"; + absl::StatusOr<std::vector<std::string>> dictionary_entries = + ParseDictionary(dictionary_content); + EXPECT_EQ(dictionary_entries.status().code(), + absl::StatusCode::kInvalidArgument); + EXPECT_THAT( + dictionary_entries.status().message(), + "Unparseable dictionary entry at line 6: Could not unescape \\x5g"); +} + +} // namespace +} // namespace fuzztest::internal
diff --git a/fuzztest/llvm_fuzzer_wrapper.cc b/fuzztest/llvm_fuzzer_wrapper.cc index f7917eb..7b6c748 100644 --- a/fuzztest/llvm_fuzzer_wrapper.cc +++ b/fuzztest/llvm_fuzzer_wrapper.cc
@@ -10,15 +10,12 @@ #include "absl/log/check.h" #include "absl/random/bit_gen_ref.h" #include "absl/random/random.h" -#include "absl/strings/escaping.h" -#include "absl/strings/str_split.h" -#include "absl/strings/string_view.h" #include "./fuzztest/fuzztest.h" +#include "./fuzztest/fuzztest_macros.h" #include "./fuzztest/internal/domains/arbitrary_impl.h" #include "./fuzztest/internal/domains/container_of_impl.h" #include "./fuzztest/internal/domains/domain_base.h" #include "./fuzztest/internal/io.h" -#include "re2/re2.h" ABSL_DECLARE_FLAG(std::string, llvm_fuzzer_wrapper_dict_file); ABSL_DECLARE_FLAG(std::string, llvm_fuzzer_wrapper_corpus_dir); @@ -68,20 +65,14 @@ out.reserve(files.size()); // Dictionary must be in the format specified at // https://llvm.org/docs/LibFuzzer.html#dictionaries - constexpr absl::string_view kLineRegex = - "[^\\\"]*" // Skip an arbitrary prefix. - "\\\"(.+)\\\"" // Must be enclosed in quotes. - "[^\\\"]*"; // Skip an arbitrary suffix. for (const fuzztest::internal::FilePathAndData& file : files) { - for (absl::string_view line : absl::StrSplit(file.data, '\n')) { - if (line.empty() || line[0] == '#') continue; - std::string entry; - CHECK(RE2::FullMatch(line, kLineRegex, &entry)) - << "Invalid dictionary entry: " << line; - std::string unescaped_entry; - CHECK(absl::CUnescape(entry, &unescaped_entry)) - << "Could not unescape: " << entry; - out.emplace_back(unescaped_entry.begin(), unescaped_entry.end()); + absl::StatusOr<std::vector<std::string>> parsed_entries = + fuzztest::ParseDictionary(file.data); + CHECK(parsed_entries.status().ok()) + << "Could not parse dictionary file " << file.path << ": " + << parsed_entries.status(); + for (const std::string& parsed_entry : *parsed_entries) { + out.emplace_back(parsed_entry.begin(), parsed_entry.end()); } } return out;