blob: 8fd242afeab0f77410f04b537c814f5004e8ac37 [file] [log] [blame] [edit]
#include "simdutf.h"
#include <array>
#include <iostream>
#include <tests/helpers/transcode_test_base.h>
#include <tests/helpers/random_int.h>
#include <tests/helpers/test.h>
#include <memory>
namespace {
std::array<size_t, 7> input_size{7, 16, 12, 64, 67, 128, 256};
using simdutf::tests::helpers::transcode_utf8_to_utf16_test_base;
constexpr size_t trials = 10000;
}
TEST(convert_pure_ASCII) {
for(size_t trial = 0; trial < trials; trial ++) {
if((trial % 100) == 0) { std::cout << "."; std::cout.flush(); }
size_t counter = 0;
auto generator = [&counter]() -> uint32_t {
return counter++ & 0x7f;
};
auto procedure = [&implementation](const char* utf8, size_t size, char16_t* utf16le) -> size_t {
std::vector<char16_t> utf16be(2*size); // Assume each UTF-8 byte is converted into two UTF-16 bytes
size_t len = implementation.convert_valid_utf8_to_utf16be(utf8, size, utf16be.data());
implementation.change_endianness_utf16(utf16be.data(), len, utf16le);
return len;
};
for (size_t size: input_size) {
transcode_utf8_to_utf16_test_base test(generator, size);
ASSERT_TRUE(test(procedure));
}
}
}
TEST(convert_1_or_2_UTF8_bytes) {
for(size_t trial = 0; trial < trials; trial ++) {
uint32_t seed{1234+uint32_t(trial)};
if((trial % 100) == 0) { std::cout << "."; std::cout.flush(); }
simdutf::tests::helpers::RandomInt random(0x0000, 0x07ff, seed); // range for 1 or 2 UTF-8 bytes
auto procedure = [&implementation](const char* utf8, size_t size, char16_t* utf16le) -> size_t {
std::vector<char16_t> utf16be(2*size); // Assume each UTF-8 byte is converted into two UTF-16 bytes
size_t len = implementation.convert_valid_utf8_to_utf16be(utf8, size, utf16be.data());
implementation.change_endianness_utf16(utf16be.data(), len, utf16le);
return len;
};
for (size_t size: input_size) {
transcode_utf8_to_utf16_test_base test(random, size);
ASSERT_TRUE(test(procedure));
}
}
}
TEST(convert_1_or_2_or_3_UTF8_bytes) {
for(size_t trial = 0; trial < trials; trial ++) {
uint32_t seed{1234+uint32_t(trial)};
if((trial % 100) == 0) { std::cout << "."; std::cout.flush(); }
// range for 1, 2 or 3 UTF-8 bytes
simdutf::tests::helpers::RandomIntRanges random({{0x0000, 0xd7ff},
{0xe000, 0xffff}}, seed);
auto procedure = [&implementation](const char* utf8, size_t size, char16_t* utf16le) -> size_t {
std::vector<char16_t> utf16be(2*size); // Assume each UTF-8 byte is converted into two UTF-16 bytes
size_t len = implementation.convert_valid_utf8_to_utf16be(utf8, size, utf16be.data());
implementation.change_endianness_utf16(utf16be.data(), len, utf16le);
return len;
};
for (size_t size: input_size) {
transcode_utf8_to_utf16_test_base test(random, size);
ASSERT_TRUE(test(procedure));
}
}
}
TEST(convert_3_or_4_UTF8_bytes) {
for(size_t trial = 0; trial < trials; trial ++) {
uint32_t seed{1234+uint32_t(trial)};
if((trial % 100) == 0) { std::cout << "."; std::cout.flush(); }
simdutf::tests::helpers::RandomIntRanges random({{0x0800, 0xd800-1},
{0xe000, 0x10ffff}}, seed); // range for 3 or 4 UTF-8 bytes
auto procedure = [&implementation](const char* utf8, size_t size, char16_t* utf16le) -> size_t {
std::vector<char16_t> utf16be(size);
size_t len = implementation.convert_valid_utf8_to_utf16be(utf8, size, utf16be.data());
implementation.change_endianness_utf16(utf16be.data(), len, utf16le);
return len;
};
for (size_t size: input_size) {
transcode_utf8_to_utf16_test_base test(random, size);
ASSERT_TRUE(test(procedure));
}
}
}
TEST(special_cases) {
const uint8_t utf8[] = {0xC2, 0xA9}; // copyright sign
const uint8_t expected[] = {0x00, 0xA9}; // expected UTF-16BE
size_t utf16len = implementation.utf16_length_from_utf8((const char*)utf8, 2);
ASSERT_TRUE(utf16len == 1);
std::unique_ptr<char16_t[]> utf16(new char16_t[utf16len]);
size_t utf16size = implementation.convert_valid_utf8_to_utf16be((const char*)utf8, 2, utf16.get());
ASSERT_TRUE(utf16size == utf16len);
ASSERT_TRUE(memcmp((const char*)utf16.get(), expected, 2) == 0);
}
int main(int argc, char* argv[]) {
return simdutf::test::main(argc, argv);
}