blob: f9545a1592dad30ca359c9e5672590ac00740b55 [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_PARAGRAPH_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_PARAGRAPH_H_
#include <unicode/ubidi.h>
#include <optional>
#include "base/check_op.h"
#include "base/containers/span.h"
#include "third_party/blink/renderer/platform/text/text_direction.h"
#include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
#include "third_party/blink/renderer/platform/wtf/forward.h"
#include "third_party/blink/renderer/platform/wtf/text/string_view.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"
namespace blink {
// BidiParagraph resolves bidirectional runs in a paragraph using ICU BiDi.
// http://userguide.icu-project.org/transforms/bidi
//
// Given a string of a paragraph, it runs Unicode Bidirectional Algorithm in
// UAX#9 and create logical runs.
// http://unicode.org/reports/tr9/
// It can also create visual runs once lines breaks are determined.
class PLATFORM_EXPORT BidiParagraph {
STACK_ALLOCATED();
public:
BidiParagraph() = default;
BidiParagraph(const String& text,
std::optional<TextDirection> base_direction) {
SetParagraph(text, base_direction);
}
// Splits the given paragraph to bidi runs and resolves the bidi embedding
// level of each run.
//
// Returns false on failure. Nothing other than the destructor should be
// called.
bool SetParagraph(const String&, std::optional<TextDirection> base_direction);
// @return the entire text is unidirectional.
bool IsUnidirectional() const {
return ubidi_getDirection(ubidi_.get()) != UBIDI_MIXED;
}
// The base direction (a.k.a. paragraph direction) of this block.
// This is determined by the 'direction' property of the block, or by the
// heuristic rules defined in UAX#9 if 'unicode-bidi: plaintext'.
TextDirection BaseDirection() const { return base_direction_; }
// Compute the base direction for a given string using the heuristic
// rules defined in UAX#9. It determines the direction by the first strong
// character, or returns `nullopt` if no strong characters are found before
// the first segment break.
// http://unicode.org/reports/tr9/#The_Paragraph_Level
static std::optional<TextDirection> BaseDirectionForString(
const StringView&,
bool (*stop_at)(UChar) = nullptr);
// Same as `BaseDirectionForString().value_or(kLtr)`, with an optimized code
// path for when the default (no strong characters) is LTR.
static TextDirection BaseDirectionForStringOrLtr(
const StringView& text,
bool (*stop_at)(UChar) = nullptr);
// Create a string that enforces directional override by wrapping the given
// string with a Unicode BiDi override character (LRO or ROL) and PDF.
// https://unicode.org/reports/tr9/#Explicit_Directional_Overrides
// https://unicode.org/reports/tr9/#Terminating_Explicit_Directional_Embeddings_and_Overrides
static String StringWithDirectionalOverride(const StringView& text,
TextDirection direction);
struct Run {
Run(unsigned start, unsigned end, UBiDiLevel level)
: start(start), end(end), level(level) {
DCHECK_GT(end, start);
}
unsigned Length() const { return end - start; }
TextDirection Direction() const { return DirectionFromLevel(level); }
bool operator==(const Run& other) const {
return start == other.start && end == other.end && level == other.level;
}
unsigned start;
unsigned end;
UBiDiLevel level;
};
using Runs = Vector<Run, 32>;
// Get a list of `Run` in the logical order (before bidi reorder.)
// `text` must be the same one as `SetParagraph`.
// This is higher-level API for `GetLogicalRun`.
void GetLogicalRuns(const String& text, Runs* runs) const;
// Returns the end offset of a logical run that starts from the |start|
// offset.
unsigned GetLogicalRun(unsigned start, UBiDiLevel*) const;
// Get a list of `Run` in the visual order (after bidi reorder.)
// `text` must be the same one as `SetParagraph`.
// This is higher-level API for `GetLogicalRuns` and `IndicesInVisualOrder`.
void GetVisualRuns(const String& text, Runs* runs) const;
// Create a list of indices in the visual order.
// A wrapper for ICU |ubidi_reorderVisual()|.
static void IndicesInVisualOrder(
const Vector<UBiDiLevel, 32>& levels,
Vector<int32_t, 32>* indices_in_visual_order_out);
private:
template <typename TChar>
static std::optional<TextDirection> BaseDirectionForString(
base::span<const TChar>,
bool (*stop_at)(UChar));
struct UBiDiDeleter {
void operator()(UBiDi* ubidi) const { ubidi_close(ubidi); }
};
using UBidiPtr = std::unique_ptr<UBiDi, UBiDiDeleter>;
UBidiPtr ubidi_;
TextDirection base_direction_ = TextDirection::kLtr;
};
// static
inline TextDirection BidiParagraph::BaseDirectionForStringOrLtr(
const StringView& text,
bool (*stop_at)(UChar)) {
if (text.empty() || text.Is8Bit()) {
// The result is LTR when 8 bits string and the distinction between LTR or
// neutral is not needed, because U+0000-00FF are LTR or neutral.
return TextDirection::kLtr;
}
return BaseDirectionForString(text, stop_at).value_or(TextDirection::kLtr);
}
} // namespace blink
#endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_PARAGRAPH_H_