blob: 104d27d66fa5eb34bff0f43d030b65c0d7311328 [file] [log] [blame]
/*
* Copyright (C) 2013 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
#include <istream>
#include <map>
#include <stdio.h>
#include <string>
#include <vector>
// FIXME: We don't have any business owning this code. We should try to
// upstream this to unicode.org if possible (for other implementations to use).
// Unicode.org provides a reference implmentation, including parser:
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c
// But it, like the other implementations I've found, is rather tied to
// the algorithms it is testing. This file seeks to only implement the parser
// bits.
// Other C/C++ implementations of this parser:
// https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c
// http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp
// Both of those are too tied to their respective projects to be use to Blink.
// There are non-C implmentations to parse BidiTest.txt as well, including:
// https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb
// NOTE: None of this file is currently written to be thread-safe.
namespace bidi_test {
enum ParagraphDirection {
kDirectionNone = 0,
kDirectionAutoLTR = 1,
kDirectionLTR = 2,
kDirectionRTL = 4,
};
const int kMaxParagraphDirection =
kDirectionAutoLTR | kDirectionLTR | kDirectionRTL;
// For error printing:
std::string NameFromParagraphDirection(ParagraphDirection paragraph_direction) {
switch (paragraph_direction) {
case bidi_test::kDirectionAutoLTR:
return "Auto-LTR";
case bidi_test::kDirectionLTR:
return "LTR";
case bidi_test::kDirectionRTL:
return "RTL";
default:
// This should never be reached.
return "";
}
}
template <class Runner>
class Harness {
public:
Harness(Runner& runner) : runner_(runner) {}
void Parse(std::istream& bidi_test_file);
private:
Runner& runner_;
};
// We could use boost::trim, but no other part of Blink uses boost yet.
inline void Ltrim(std::string& s) {
static const std::string kSeparators(" \t");
s.erase(0, s.find_first_not_of(kSeparators));
}
inline void Rtrim(std::string& s) {
static const std::string kSeparators(" \t");
size_t last_non_space = s.find_last_not_of(kSeparators);
if (last_non_space == std::string::npos) {
s.erase();
return;
}
size_t first_space_at_end_of_string = last_non_space + 1;
if (first_space_at_end_of_string >= s.size())
return; // lastNonSpace was the last char.
s.erase(first_space_at_end_of_string,
std::string::npos); // erase to the end of the string.
}
inline void Trim(std::string& s) {
Rtrim(s);
Ltrim(s);
}
static std::vector<std::string> ParseStringList(const std::string& str) {
std::vector<std::string> strings;
static const std::string kSeparators(" \t");
size_t last_pos = str.find_first_not_of(kSeparators); // skip leading spaces
size_t pos = str.find_first_of(kSeparators, last_pos); // find next space
while (std::string::npos != pos || std::string::npos != last_pos) {
strings.push_back(str.substr(last_pos, pos - last_pos));
last_pos = str.find_first_not_of(kSeparators, pos);
pos = str.find_first_of(kSeparators, last_pos);
}
return strings;
}
static int ParseInt(const std::string& str) {
return atoi(str.c_str());
}
static std::vector<int> ParseIntList(const std::string& str) {
std::vector<int> ints;
std::vector<std::string> strings = ParseStringList(str);
for (size_t x = 0; x < strings.size(); x++) {
int i = ParseInt(strings[x]);
ints.push_back(i);
}
return ints;
}
static std::vector<int> ParseLevels(const std::string& line) {
std::vector<int> levels;
std::vector<std::string> strings = ParseStringList(line);
for (size_t x = 0; x < strings.size(); x++) {
const std::string& level_string = strings[x];
int i;
if (level_string == "x")
i = -1;
else
i = ParseInt(level_string);
levels.push_back(i);
}
return levels;
}
// This is not thread-safe as written.
static std::basic_string<UChar> ParseTestString(const std::string& line) {
std::basic_string<UChar> test_string;
static std::map<std::string, UChar> char_class_examples;
if (char_class_examples.empty()) {
// FIXME: Explicit make_pair is ugly, but required for C++98 compat.
char_class_examples.insert(std::make_pair("L", 0x6c)); // 'l' for L
char_class_examples.insert(std::make_pair("R", 0x05D0)); // HEBREW ALEF
char_class_examples.insert(std::make_pair("EN", 0x33)); // '3' for EN
char_class_examples.insert(std::make_pair("ES", 0x2d)); // '-' for ES
char_class_examples.insert(std::make_pair("ET", 0x25)); // '%' for ET
char_class_examples.insert(std::make_pair("AN", 0x0660)); // arabic 0
char_class_examples.insert(std::make_pair("CS", 0x2c)); // ',' for CS
char_class_examples.insert(std::make_pair("B", 0x0A)); // <control-000A>
char_class_examples.insert(std::make_pair("S", 0x09)); // <control-0009>
char_class_examples.insert(std::make_pair("WS", 0x20)); // ' ' for WS
char_class_examples.insert(std::make_pair("ON", 0x3d)); // '=' for ON
char_class_examples.insert(
std::make_pair("NSM", 0x05BF)); // HEBREW POINT RAFE
char_class_examples.insert(std::make_pair("AL", 0x0608)); // ARABIC RAY
char_class_examples.insert(std::make_pair("BN", 0x00AD)); // SOFT HYPHEN
char_class_examples.insert(std::make_pair("LRE", 0x202A));
char_class_examples.insert(std::make_pair("RLE", 0x202B));
char_class_examples.insert(std::make_pair("PDF", 0x202C));
char_class_examples.insert(std::make_pair("LRO", 0x202D));
char_class_examples.insert(std::make_pair("RLO", 0x202E));
char_class_examples.insert(std::make_pair("LRI", 0x2066));
char_class_examples.insert(std::make_pair("RLI", 0x2067));
char_class_examples.insert(std::make_pair("FSI", 0x2068));
char_class_examples.insert(std::make_pair("PDI", 0x2069));
}
std::vector<std::string> char_classes = ParseStringList(line);
for (size_t i = 0; i < char_classes.size(); i++) {
// FIXME: If the lookup failed we could return false for a parse error.
test_string.push_back(char_class_examples.find(char_classes[i])->second);
}
return test_string;
}
static bool ParseParagraphDirectionMask(const std::string& line,
int& mode_mask) {
mode_mask = ParseInt(line);
return mode_mask >= 1 && mode_mask <= kMaxParagraphDirection;
}
static void ParseError(const std::string& line, size_t line_number) {
// Use printf to avoid the expense of std::cout.
printf("Parse error, line %zu : %s\n", line_number, line.c_str());
}
template <class Runner>
void Harness<Runner>::Parse(std::istream& bidi_test_file) {
static const std::string kLevelsPrefix("@Levels");
static const std::string kReorderPrefix("@Reorder");
// FIXME: UChar is an ICU type and cheating a bit to use here.
// uint16_t might be more portable.
std::basic_string<UChar> test_string;
std::vector<int> levels;
std::vector<int> reorder;
int paragraph_direction_mask;
std::string line;
size_t line_number = 0;
while (std::getline(bidi_test_file, line)) {
line_number++;
const std::string original_line = line;
size_t comment_start = line.find_first_of('#');
if (comment_start != std::string::npos)
line = line.substr(0, comment_start);
Trim(line);
if (line.empty())
continue;
if (line[0] == '@') {
if (!line.find(kLevelsPrefix)) {
levels = ParseLevels(line.substr(kLevelsPrefix.length() + 1));
continue;
}
if (!line.find(kReorderPrefix)) {
reorder = ParseIntList(line.substr(kReorderPrefix.length() + 1));
continue;
}
} else {
// Assume it's a data line.
size_t seperator_index = line.find_first_of(';');
if (seperator_index == std::string::npos) {
ParseError(original_line, line_number);
continue;
}
test_string = ParseTestString(line.substr(0, seperator_index));
if (!ParseParagraphDirectionMask(line.substr(seperator_index + 1),
paragraph_direction_mask)) {
ParseError(original_line, line_number);
continue;
}
if (paragraph_direction_mask & kDirectionAutoLTR) {
runner_.RunTest(test_string, reorder, levels, kDirectionAutoLTR,
original_line, line_number);
}
if (paragraph_direction_mask & kDirectionLTR) {
runner_.RunTest(test_string, reorder, levels, kDirectionLTR,
original_line, line_number);
}
if (paragraph_direction_mask & kDirectionRTL) {
runner_.RunTest(test_string, reorder, levels, kDirectionRTL,
original_line, line_number);
}
}
}
}
template <class Runner>
class CharacterHarness {
public:
CharacterHarness(Runner& runner) : runner_(runner) {}
void Parse(std::istream& bidi_test_file);
private:
Runner& runner_;
};
static std::basic_string<UChar> ParseUCharHexadecimalList(
const std::string& str) {
std::basic_string<UChar> string;
std::vector<std::string> strings = ParseStringList(str);
for (size_t x = 0; x < strings.size(); x++) {
int i = strtol(strings[x].c_str(), nullptr, 16);
string.push_back((UChar)i);
}
return string;
}
static ParagraphDirection ParseParagraphDirection(const std::string& str) {
int i = ParseInt(str);
switch (i) {
case 0:
return kDirectionLTR;
case 1:
return kDirectionRTL;
case 2:
return kDirectionAutoLTR;
default:
return kDirectionNone;
}
}
static int ParseSuppresedChars(const std::string& str) {
std::vector<std::string> strings = ParseStringList(str);
int suppresed_chars = 0;
for (size_t x = 0; x < strings.size(); x++) {
if (strings[x] == "x")
suppresed_chars++;
}
return suppresed_chars;
}
template <class Runner>
void CharacterHarness<Runner>::Parse(std::istream& bidi_test_file) {
std::string line;
size_t line_number = 0;
while (std::getline(bidi_test_file, line)) {
line_number++;
const std::string original_line = line;
size_t comment_start = line.find_first_of('#');
if (comment_start != std::string::npos)
line = line.substr(0, comment_start);
Trim(line);
if (line.empty())
continue;
// Field 0: list of uchars as 4 char strings
size_t separator_index = line.find_first_of(';');
if (separator_index == std::string::npos) {
ParseError(original_line, line_number);
continue;
}
std::basic_string<UChar> test_string =
ParseUCharHexadecimalList(line.substr(0, separator_index));
if (test_string.empty()) {
ParseError(original_line, line_number);
continue;
}
line = line.substr(separator_index + 1);
// Field 1: paragraph direction (0 LTR, 1 RTL, 2 AutoLTR)
separator_index = line.find_first_of(';');
if (separator_index == std::string::npos) {
ParseError(original_line, line_number);
continue;
}
ParagraphDirection paragraph_direction =
ParseParagraphDirection(line.substr(0, separator_index));
if (paragraph_direction == kDirectionNone) {
ParseError(original_line, line_number);
continue;
}
line = line.substr(separator_index + 1);
// Field 2: resolved paragraph embedding level
separator_index = line.find_first_of(';');
if (separator_index == std::string::npos) {
ParseError(original_line, line_number);
continue;
}
int paragraph_embedding_level = ParseInt(line.substr(0, separator_index));
if (paragraph_embedding_level < 0) {
ParseError(original_line, line_number);
continue;
}
line = line.substr(separator_index + 1);
// Field 3: List of resolved levels
separator_index = line.find_first_of(';');
if (separator_index == std::string::npos) {
ParseError(original_line, line_number);
continue;
}
int supressed_chars = ParseSuppresedChars(line.substr(0, separator_index));
std::vector<int> levels = ParseLevels(line.substr(0, separator_index));
if (test_string.size() != levels.size()) {
ParseError(original_line, line_number);
continue;
}
line = line.substr(separator_index + 1);
// Field 4: visual ordering of characters
separator_index = line.find_first_of(';');
if (separator_index != std::string::npos) {
ParseError(original_line, line_number);
continue;
}
std::vector<int> visual_ordering = ParseIntList(line);
if (test_string.size() - supressed_chars != visual_ordering.size()) {
ParseError(original_line, line_number);
continue;
}
runner_.RunTest(test_string, visual_ordering, levels, paragraph_direction,
original_line, line_number);
}
}
} // namespace bidi_test
#endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_