| /* |
| * Copyright 2017 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "gtest/gtest.h" |
| |
| #include "wabt/utf8.h" |
| |
| using namespace wabt; |
| |
| namespace { |
| |
| void assert_is_valid_utf8(bool expected, |
| int length, |
| int cu0 = 0, |
| int cu1 = 0, |
| int cu2 = 0, |
| int cu3 = 0) { |
| assert(length <= 4); |
| char buf[4] = {static_cast<char>(cu0), static_cast<char>(cu1), |
| static_cast<char>(cu2), static_cast<char>(cu3)}; |
| if (expected) { |
| // Make sure it fails if there are continuation bytes past the end of the |
| // string. |
| for (int bad_length = 1; bad_length < length; ++bad_length) { |
| ASSERT_FALSE(IsValidUtf8(buf, bad_length)) |
| << cu0 << ", " << cu1 << ", " << cu2 << ", " << cu3; |
| } |
| } |
| |
| ASSERT_TRUE(expected == IsValidUtf8(buf, length)) |
| << cu0 << ", " << cu1 << ", " << cu2 << ", " << cu3; |
| } |
| |
| bool is_in_range(int x, int low, int high) { |
| return x >= low && x < high; |
| } |
| |
| } // end anonymous namespace |
| |
| #define FOR_RANGE(var, low, high) for (int var = low; var < high; var++) |
| #define FOR_EACH_BYTE(var) FOR_RANGE(var, 0, 0x100) |
| |
| TEST(utf8, valid_empty) { |
| assert_is_valid_utf8(true, 0); |
| } |
| |
| TEST(utf8, valid_1_byte) { |
| FOR_RANGE(cu0, 0, 0x80) { assert_is_valid_utf8(true, 1, cu0); } |
| } |
| |
| TEST(utf8, invalid_continuation_bytes) { |
| FOR_RANGE(cu0, 0x80, 0xc0) { assert_is_valid_utf8(false, 1, cu0); } |
| } |
| |
| TEST(utf8, invalid_2_byte) { |
| FOR_RANGE(cu0, 0xc0, 0xc2) { assert_is_valid_utf8(false, 1, cu0); } |
| } |
| |
| TEST(utf8, valid_2_bytes) { |
| FOR_RANGE(cu0, 0xc2, 0xe0) { |
| FOR_EACH_BYTE(cu1) { |
| bool is_valid = is_in_range(cu1, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 2, cu0, cu1); |
| } |
| } |
| } |
| |
| TEST(utf8, valid_3_bytes_e0) { |
| int cu0 = 0xe0; |
| FOR_EACH_BYTE(cu1) { |
| FOR_EACH_BYTE(cu2) { |
| bool is_valid = |
| is_in_range(cu1, 0xa0, 0xc0) && is_in_range(cu2, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 3, cu0, cu1, cu2); |
| } |
| } |
| } |
| |
| TEST(utf8, valid_3_bytes) { |
| FOR_RANGE(cu0, 0xe1, 0xf0) { |
| // Handle 0xed in valid_3_bytes_ed. |
| if (cu0 == 0xed) { |
| continue; |
| } |
| |
| FOR_EACH_BYTE(cu1) { |
| FOR_EACH_BYTE(cu2) { |
| bool is_valid = |
| is_in_range(cu1, 0x80, 0xc0) && is_in_range(cu2, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 3, cu0, cu1, cu2); |
| } |
| } |
| } |
| } |
| |
| TEST(utf8, valid_3_bytes_ed) { |
| int cu0 = 0xed; |
| FOR_EACH_BYTE(cu1) { |
| FOR_EACH_BYTE(cu2) { |
| bool is_valid = |
| is_in_range(cu1, 0x80, 0xa0) && is_in_range(cu2, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 3, cu0, cu1, cu2); |
| } |
| } |
| } |
| |
| TEST(utf8, valid_4_bytes_f0) { |
| int cu0 = 0xf0; |
| FOR_EACH_BYTE(cu1) { |
| FOR_EACH_BYTE(cu2) { |
| FOR_EACH_BYTE(cu3) { |
| bool is_valid = is_in_range(cu1, 0x90, 0xc0) && |
| is_in_range(cu2, 0x80, 0xc0) && |
| is_in_range(cu3, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 4, cu0, cu1, cu2, cu3); |
| } |
| } |
| } |
| } |
| |
| TEST(utf8, valid_4_bytes) { |
| FOR_RANGE(cu0, 0xf1, 0xf4) { |
| FOR_EACH_BYTE(cu1) { |
| FOR_EACH_BYTE(cu2) { |
| FOR_EACH_BYTE(cu3) { |
| bool is_valid = is_in_range(cu1, 0x80, 0xc0) && |
| is_in_range(cu2, 0x80, 0xc0) && |
| is_in_range(cu3, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 4, cu0, cu1, cu2, cu3); |
| } |
| } |
| } |
| } |
| } |
| |
| TEST(utf8, valid_4_bytes_f4) { |
| int cu0 = 0xf4; |
| FOR_EACH_BYTE(cu1) { |
| FOR_EACH_BYTE(cu2) { |
| FOR_EACH_BYTE(cu3) { |
| bool is_valid = is_in_range(cu1, 0x80, 0x90) && |
| is_in_range(cu2, 0x80, 0xc0) && |
| is_in_range(cu3, 0x80, 0xc0); |
| assert_is_valid_utf8(is_valid, 4, cu0, cu1, cu2, cu3); |
| } |
| } |
| } |
| } |
| |
| TEST(utf8, invalid_4_bytes) { |
| FOR_RANGE(cu0, 0xf5, 0x100) { |
| assert_is_valid_utf8(false, 4, cu0, 0x80, 0x80, 0x80); |
| } |
| } |