blob: 08806f2bf826bf413a19d2abe49075626f1ec6ad [file] [log] [blame]
// Copyright (c) 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <algorithm>
#include <array>
#include <vector>
#include "third_party/icu/source/common/unicode/unistr.h"
// Taken from third_party/icu/source/data/mappings/convrtrs.txt file.
static const std::array<const char*, 45> kConverters = {
{
"UTF-8",
"utf-16be",
"utf-16le",
"UTF-32",
"UTF-32BE",
"UTF-32LE",
"ibm866-html",
"iso-8859-2-html",
"iso-8859-3-html",
"iso-8859-4-html",
"iso-8859-5-html",
"iso-8859-6-html",
"iso-8859-7-html",
"iso-8859-8-html",
"ISO-8859-8-I",
"iso-8859-10-html",
"iso-8859-13-html",
"iso-8859-14-html",
"iso-8859-15-html",
"iso-8859-16-html",
"koi8-r-html",
"koi8-u-html",
"macintosh-html",
"windows-874-html",
"windows-1250-html",
"windows-1251-html",
"windows-1252-html",
"windows-1253-html",
"windows-1254-html",
"windows-1255-html",
"windows-1256-html",
"windows-1257-html",
"windows-1258-html",
"x-mac-cyrillic-html",
"windows-936-2000",
"gb18030",
"big5-html",
"euc-jp-html",
"ISO_2022,locale=ja,version=0",
"shift_jis-html",
"euc-kr-html",
"ISO-2022-KR",
"ISO-2022-CN",
"ISO-2022-CN-EXT",
"HZ-GB-2312"
}
};
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) {
if (size < 2)
return 0;
// Need null-terminated string.
std::vector<char> buffer(size, 0);
// First byte will be used for random decisions.
unsigned char selector = data[0];
size -= 1;
std::copy(data + 1, data + size, buffer.data());
// Pointer to a part of fuzzer's data or to some valid codepage value.
char* codepage;
if (selector & 1) {
// Use random codepage value provided by fuzzer (split data in two parts).
// Remove least significant bit because we already used it as input value.
size_t codepage_length = (selector >> 1) & 0xF;
if (size <= codepage_length)
size /= 2;
else
size -= codepage_length;
codepage = buffer.data() + size;
} else {
// Use one of valid codepage values.
// Remove least significant bit because we already used it as input value.
size_t index = (selector >> 1) % kConverters.size();
codepage = const_cast<char*>(kConverters[index]);
}
icu::UnicodeString unicode_string(buffer.data(), size, codepage);
return 0;
}