blob: 3a110eb574910128eb8d2f19f9b7345989cd3cb6 [file] [log] [blame] [edit]
#include "simdutf.h"
#include <array>
#include <iostream>
#include <tests/reference/validate_utf16.h>
#include <tests/reference/decode_utf16.h>
#include <tests/helpers/transcode_test_base.h>
#include <tests/helpers/random_int.h>
#include <tests/helpers/test.h>
#include "reference/validate_utf16_to_latin1.h"
namespace {
std::array<size_t, 7> input_size{7, 16, 12, 64, 67, 128, 256};
using simdutf::tests::helpers::transcode_utf16_to_latin1_test_base;
constexpr int trials = 1000;
}
// For invalid inputs, we expect the conversion to fail (return 0)
TEST(convert_random_inputs) {
for(size_t trial = 0; trial < trials; trial ++) {
uint32_t seed{1234+uint32_t(trial)};
simdutf::tests::helpers::RandomInt r(0x00, 0xffff, seed);
if((trial % 100) == 0) { std::cout << "."; std::cout.flush(); }
for (size_t size: input_size) {
std::vector<char16_t> utf16(size);
for(size_t i = 0; i < size; i++) {
utf16[i] = r();
}
size_t buffer_size = implementation.latin1_length_from_utf16(size);
std::vector<char> latin1(buffer_size);
size_t actual_size = implementation.convert_utf16be_to_latin1(utf16.data(), size, latin1.data());
if(simdutf::tests::reference::validate_utf16_to_latin1(utf16.data(), size)) {
ASSERT_EQUAL(buffer_size, actual_size);
} else {
ASSERT_EQUAL(0, actual_size);
}
}
}
}
TEST(convert_2_UTF16_bytes) {
int seed = {1234};
for(size_t trial = 0; trial < trials; trial ++) {
if ((trial % 100) == 0) { std::cout << "."; std::cout.flush(); }
// range for 1, 2 or 3 UTF-8 bytes
simdutf::tests::helpers::RandomIntRanges random({{0x0000, 0x00ff},}, seed);
auto procedure = [&implementation](const char16_t* utf16le, size_t size, char* latin1) -> size_t {
std::vector<char16_t> utf16be(size);
implementation.change_endianness_utf16(utf16le, size, utf16be.data());
return implementation.convert_utf16be_to_latin1(utf16be.data(), size, latin1);
};
auto size_procedure = [&implementation](const char16_t* utf16, size_t size) -> size_t {
return implementation.latin1_length_from_utf16(size);
};
for (size_t size: input_size) {
transcode_utf16_to_latin1_test_base test(random, size);
ASSERT_TRUE(test(procedure));
ASSERT_TRUE(test.check_size(size_procedure));
}
}
}
TEST(convert_fails_if_input_too_large) {
uint32_t seed{12};
simdutf::tests::helpers::RandomInt generator(0x00ff, 0xffff, seed);
auto procedure = [&implementation](const char16_t* utf16le, size_t size, char* latin1) -> size_t {
std::vector<char16_t> utf16be(size);
implementation.change_endianness_utf16(utf16le, size, utf16be.data());
auto result = implementation.convert_utf16be_to_latin1(utf16be.data(), size, latin1);
return result;
};
const size_t size = 64;
transcode_utf16_to_latin1_test_base test([](){ return '*'; }, size+32); //for big endian, test encodes in BE. e.g. '*' becomes 0x2a00 instead of 0x002a
for (size_t j = 0; j < 1000; j++) {
uint16_t wrong_value = generator();
#if SIMDUTF_IS_BIG_ENDIAN // Big endian systems invert the declared generator's numbers when commited to memory.
// Each codepoints above 255 are thus mirrored.
// e.g. abcd becomes cdab, and vice versa. This is for most codepoints,not a cause for concern.
// One case is however problematic, that of the numbers in the BE format 0xYY00 where the mirror image indicates a number beneath 255
// which is undesirable in this particular test.
if ((wrong_value & 0xFF00) != 0){
// In this case, we swap bytes of the generated value:
wrong_value = uint16_t((wrong_value >> 8) | (wrong_value << 8));
}
#endif
for (size_t i=0; i < size; i++) {
auto old = test.input_utf16[i];
test.input_utf16[i] = wrong_value;
ASSERT_TRUE(test(procedure));
test.input_utf16[i] = old;
}
}
}
int main(int argc, char* argv[]) {
return simdutf::test::main(argc, argv);
}