blob: 5f5d2be79cd1083b382bd0a807749add52be0fba [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/core/dom/element.h"
#include <algorithm>
#include "base/auto_reset.h"
#include "third_party/blink/renderer/core/display_lock/display_lock_utilities.h"
#include "third_party/blink/renderer/core/dom/node_computed_style.h"
#include "third_party/blink/renderer/core/dom/node_traversal.h"
#include "third_party/blink/renderer/core/dom/text.h"
#include "third_party/blink/renderer/core/editing/editing_utilities.h"
#include "third_party/blink/renderer/core/editing/ephemeral_range.h"
#include "third_party/blink/renderer/core/html/forms/html_opt_group_element.h"
#include "third_party/blink/renderer/core/html/forms/html_option_element.h"
#include "third_party/blink/renderer/core/html/forms/html_select_element.h"
#include "third_party/blink/renderer/core/html/html_br_element.h"
#include "third_party/blink/renderer/core/html/html_paragraph_element.h"
#include "third_party/blink/renderer/core/layout/layout_table_cell.h"
#include "third_party/blink/renderer/core/layout/layout_table_row.h"
#include "third_party/blink/renderer/core/layout/layout_table_section.h"
#include "third_party/blink/renderer/core/layout/layout_text_fragment.h"
#include "third_party/blink/renderer/core/layout/line/inline_text_box.h"
#include "third_party/blink/renderer/core/layout/ng/inline/ng_inline_node.h"
#include "third_party/blink/renderer/core/layout/ng/inline/ng_inline_node_data.h"
#include "third_party/blink/renderer/core/layout/ng/inline/ng_offset_mapping.h"
#include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"
namespace blink {
namespace {
// The implementation of Element#innerText algorithm[1].
// [1]
// https://html.spec.whatwg.org/C/#the-innertext-idl-attribute
class ElementInnerTextCollector final {
STACK_ALLOCATED();
public:
ElementInnerTextCollector() = default;
String RunOn(const Element& element);
private:
// Result characters of innerText collection steps.
class Result final {
public:
Result() = default;
void EmitChar16(UChar code_point);
void EmitNewline();
void EmitRequiredLineBreak(int count);
void EmitTab();
void EmitText(const StringView& text);
String Finish();
private:
void FlushRequiredLineBreak();
StringBuilder builder_;
int required_line_break_count_ = 0;
DISALLOW_COPY_AND_ASSIGN(Result);
};
static bool HasDisplayContentsStyle(const Node& node);
static bool IsBeingRendered(const Node& node);
// Returns true if used value of "display" is block-level.
static bool IsDisplayBlockLevel(const Node&);
static LayoutObject* PreviousLeafOf(const LayoutObject& layout_object);
static bool ShouldEmitNewlineForTableRow(const LayoutTableRow& table_row);
const NGOffsetMapping* GetOffsetMapping(const LayoutText& layout_text);
void ProcessChildren(const Node& node);
void ProcessChildrenWithRequiredLineBreaks(const Node& node,
int required_line_break_count);
void ProcessLayoutText(const LayoutText& layout_text, const Text& text_node);
void ProcessLayoutTextEmpty(const LayoutText& layout_text);
void ProcessNode(const Node& node);
void ProcessOptionElement(const HTMLOptionElement& element);
void ProcessSelectElement(const HTMLSelectElement& element);
void ProcessTextNode(const Text& node);
// Result character buffer.
Result result_;
DISALLOW_COPY_AND_ASSIGN(ElementInnerTextCollector);
};
String ElementInnerTextCollector::RunOn(const Element& element) {
DCHECK(!element.InActiveDocument() || !NeedsLayoutTreeUpdate(element));
// 1. If this element is locked or a part of a locked subtree, then it is
// hidden from view (and also possibly not laid out) and innerText should be
// empty.
if (DisplayLockUtilities::NearestLockedInclusiveAncestor(element))
return {};
// 2. If this element is not being rendered, or if the user agent is a non-CSS
// user agent, then return the same value as the textContent IDL attribute on
// this element.
// Note: To pass WPT test, case we don't use |textContent| for
// "display:content". See [1] for discussion about "display:contents" and
// "being rendered".
// [1] https://github.com/whatwg/html/issues/1837
if (!IsBeingRendered(element) && !HasDisplayContentsStyle(element)) {
const bool convert_brs_to_newlines = false;
return element.textContent(convert_brs_to_newlines);
}
// 3. Let results be a new empty list.
// 4. For each child node node of this element:
// 1. Let current be the list resulting in running the inner text collection
// steps with node. Each item in results will either be a JavaScript
// string or a positive integer (a required line break count).
// 2. For each item item in current, append item to results.
// Note: Handles <select> and <option> here since they are implemented as
// UA shadow DOM, e.g. Text nodes in <option> don't have layout object.
// See also: https://github.com/whatwg/html/issues/3797
if (IsHTMLSelectElement(element))
ProcessSelectElement(ToHTMLSelectElement(element));
else if (IsHTMLOptionElement(element))
ProcessOptionElement(ToHTMLOptionElement(element));
else
ProcessChildren(element);
return result_.Finish();
}
// static
bool ElementInnerTextCollector::HasDisplayContentsStyle(const Node& node) {
auto* element = DynamicTo<Element>(node);
return element && element->HasDisplayContentsStyle();
}
// An element is *being rendered* if it has any associated CSS layout boxes,
// SVG layout boxes, or some equivalent in other styling languages.
// Note: Just being off-screen does not mean the element is not being rendered.
// The presence of the "hidden" attribute normally means the element is not
// being rendered, though this might be overridden by the style sheets.
// From https://html.spec.whatwg.org/C/#being-rendered
// static
bool ElementInnerTextCollector::IsBeingRendered(const Node& node) {
return node.GetLayoutObject();
}
// static
bool ElementInnerTextCollector::IsDisplayBlockLevel(const Node& node) {
const LayoutObject* const layout_object = node.GetLayoutObject();
if (!layout_object)
return false;
if (!layout_object->IsLayoutBlock()) {
if (layout_object->IsTableSection()) {
// Note: |LayoutTableSeleciton::IsInline()| returns false, but it is not
// block-level.
return false;
}
// Note: Block-level replaced elements, e.g. <img style=display:block>,
// reach here. Unlike |LayoutBlockFlow::AddChild()|, innerText considers
// floats and absolutely-positioned elements as block-level node.
return !layout_object->IsInline();
}
// TODO(crbug.com/567964): Due by the issue, |IsAtomicInlineLevel()| is always
// true for replaced elements event if it has display:block, once it is fixed
// we should check at first.
if (layout_object->IsAtomicInlineLevel())
return false;
if (layout_object->IsRubyText()) {
// RT isn't consider as block-level.
// e.g. <ruby>abc<rt>def</rt>.innerText == "abcdef"
return false;
}
// Note: CAPTION is associated to |LayoutNGTableCaption| in LayoutNG or
// |LayoutBlockFlow| in legacy layout.
return true;
}
// static
LayoutObject* ElementInnerTextCollector::PreviousLeafOf(
const LayoutObject& layout_object) {
LayoutObject* parent = layout_object.Parent();
for (LayoutObject* runner = layout_object.PreviousInPreOrder(); runner;
runner = runner->PreviousInPreOrder()) {
if (runner != parent)
return runner;
parent = runner->Parent();
}
return nullptr;
}
// static
bool ElementInnerTextCollector::ShouldEmitNewlineForTableRow(
const LayoutTableRow& table_row) {
const LayoutTable* const table = table_row.Table();
if (!table)
return false;
if (table_row.NextRow())
return true;
// For TABLE contains TBODY, TFOOTER, THEAD.
const LayoutTableSection* const table_section = table_row.Section();
if (!table_section)
return false;
// See |LayoutTable::SectionAbove()| and |SectionBelow()| for traversing
// |LayoutTableSection|.
for (LayoutObject* runner = table_section->NextSibling(); runner;
runner = runner->NextSibling()) {
if (!runner->IsTableSection())
continue;
if (ToLayoutTableSection(*runner).NumRows() > 0)
return true;
}
// No table row after |node|.
return false;
}
const NGOffsetMapping* ElementInnerTextCollector::GetOffsetMapping(
const LayoutText& layout_text) {
// TODO(editing-dev): We should handle "text-transform" in "::first-line".
// In legacy layout, |InlineTextBox| holds original text and text box
// paint does text transform.
LayoutBlockFlow* const block_flow =
NGOffsetMapping::GetInlineFormattingContextOf(layout_text);
DCHECK(block_flow) << layout_text;
return NGInlineNode::GetOffsetMapping(block_flow);
}
void ElementInnerTextCollector::ProcessChildren(const Node& container) {
for (const Node& node : NodeTraversal::ChildrenOf(container))
ProcessNode(node);
}
void ElementInnerTextCollector::ProcessChildrenWithRequiredLineBreaks(
const Node& node,
int required_line_break_count) {
DCHECK_GE(required_line_break_count, 1);
DCHECK_LE(required_line_break_count, 2);
result_.EmitRequiredLineBreak(required_line_break_count);
ProcessChildren(node);
result_.EmitRequiredLineBreak(required_line_break_count);
}
void ElementInnerTextCollector::ProcessLayoutText(const LayoutText& layout_text,
const Text& text_node) {
if (layout_text.TextLength() == 0)
return;
if (layout_text.Style()->Visibility() != EVisibility::kVisible) {
// TODO(editing-dev): Once we make ::first-letter don't apply "visibility",
// we should get rid of this if-statement. http://crbug.com/866744
return;
}
const NGOffsetMapping* const mapping = GetOffsetMapping(layout_text);
if (!mapping) {
// TODO(crbug.com/967995): There are certain cases where we fail to compute
// |NGOffsetMapping| due to failures in layout. As the root cause is hard to
// fix at the moment, we work around it here so that the production build
// doesn't crash.
NOTREACHED() << layout_text;
return;
}
for (const NGOffsetMappingUnit& unit :
mapping->GetMappingUnitsForNode(text_node)) {
result_.EmitText(
StringView(mapping->GetText(), unit.TextContentStart(),
unit.TextContentEnd() - unit.TextContentStart()));
}
}
// The "inner text collection steps".
void ElementInnerTextCollector::ProcessNode(const Node& node) {
// 1. Let items be the result of running the inner text collection steps with
// each child node of node in tree order, and then concatenating the results
// to a single list.
// 2. If the node is display locked, then we should not process it or its
// children, since they are not visible or accessible via innerText.
if (auto* element = DynamicTo<Element>(node)) {
auto* context = element->GetDisplayLockContext();
if (context && context->IsLocked())
return;
}
// 3. If node's computed value of 'visibility' is not 'visible', then return
// items.
const ComputedStyle* style = node.GetComputedStyle();
if (style && style->Visibility() != EVisibility::kVisible)
return ProcessChildren(node);
// 4. If node is not being rendered, then return items. For the purpose of
// this step, the following elements must act as described if the computed
// value of the 'display' property is not 'none':
// Note: items can be non-empty due to 'display:contents'.
if (!IsBeingRendered(node)) {
// "display:contents" also reaches here since it doesn't have a CSS box.
return ProcessChildren(node);
}
// * select elements have an associated non-replaced inline CSS box whose
// child boxes include only those of optgroup and option element child
// nodes;
// * optgroup elements have an associated non-replaced block-level CSS box
// whose child boxes include only those of option element child nodes; and
// * option element have an associated non-replaced block-level CSS box whose
// child boxes are as normal for non-replaced block-level CSS boxes.
if (IsHTMLSelectElement(node))
return ProcessSelectElement(ToHTMLSelectElement(node));
if (IsHTMLOptionElement(node)) {
// Since child nodes of OPTION are not rendered, we use dedicated function.
// e.g. <div>ab<option>12</div>cd</div>innerText == "ab\n12\ncd"
// Note: "label" attribute doesn't affect value of innerText.
return ProcessOptionElement(ToHTMLOptionElement(node));
}
// 5. If node is a Text node, then for each CSS text box produced by node.
auto* text_node = DynamicTo<Text>(node);
if (text_node)
return ProcessTextNode(*text_node);
// 6. If node is a br element, then append a string containing a single U+000A
// LINE FEED (LF) character to items.
if (IsHTMLBRElement(node)) {
ProcessChildren(node);
result_.EmitNewline();
return;
}
// 7. If node's computed value of 'display' is 'table-cell', and node's CSS
// box is not the last 'table-cell' box of its enclosing 'table-row' box, then
// append a string containing a single U+0009 CHARACTER TABULATION (tab)
// character to items.
const LayoutObject& layout_object = *node.GetLayoutObject();
if (style->Display() == EDisplay::kTableCell) {
ProcessChildren(node);
if (layout_object.IsTableCell() &&
ToLayoutTableCell(layout_object).NextCell())
result_.EmitTab();
return;
}
// 8. If node's computed value of 'display' is 'table-row', and node's CSS box
// is not the last 'table-row' box of the nearest ancestor 'table' box, then
// append a string containing a single U+000A LINE FEED (LF) character to
// items.
if (style->Display() == EDisplay::kTableRow) {
ProcessChildren(node);
if (layout_object.IsTableRow() &&
ShouldEmitNewlineForTableRow(ToLayoutTableRow(layout_object)))
result_.EmitNewline();
return;
}
// 9. If node is a p element, then append 2 (a required line break count) at
// the beginning and end of items.
if (IsHTMLParagraphElement(node)) {
// Note: <p style="display:contents>foo</p> doesn't generate layout object
// for P.
ProcessChildrenWithRequiredLineBreaks(node, 2);
return;
}
// 10. If node's used value of 'display' is block-level or 'table-caption',
// then append 1 (a required line break count) at the beginning and end of
// items.
if (IsDisplayBlockLevel(node))
return ProcessChildrenWithRequiredLineBreaks(node, 1);
ProcessChildren(node);
}
void ElementInnerTextCollector::ProcessOptionElement(
const HTMLOptionElement& option_element) {
result_.EmitRequiredLineBreak(1);
result_.EmitText(option_element.text());
result_.EmitRequiredLineBreak(1);
}
void ElementInnerTextCollector::ProcessSelectElement(
const HTMLSelectElement& select_element) {
for (const Node& child : NodeTraversal::ChildrenOf(select_element)) {
if (IsHTMLOptionElement(child)) {
ProcessOptionElement(ToHTMLOptionElement(child));
continue;
}
if (!IsHTMLOptGroupElement(child))
continue;
// Note: We should emit newline for OPTGROUP even if it has no OPTION.
// e.g. <div>a<select><optgroup></select>b</div>.innerText == "a\nb"
result_.EmitRequiredLineBreak(1);
for (const Node& maybe_option : NodeTraversal::ChildrenOf(child)) {
if (IsHTMLOptionElement(maybe_option))
ProcessOptionElement(ToHTMLOptionElement(maybe_option));
}
result_.EmitRequiredLineBreak(1);
}
}
void ElementInnerTextCollector::ProcessTextNode(const Text& node) {
if (!node.GetLayoutObject())
return;
const LayoutText& layout_text = *node.GetLayoutObject();
if (LayoutText* first_letter_part = layout_text.GetFirstLetterPart()) {
if (layout_text.TextLength() == 0 ||
NGOffsetMapping::GetInlineFormattingContextOf(layout_text) !=
NGOffsetMapping::GetInlineFormattingContextOf(*first_letter_part)) {
// "::first-letter" with "float" reach here.
ProcessLayoutText(*first_letter_part, node);
}
}
ProcessLayoutText(layout_text, node);
}
// ----
void ElementInnerTextCollector::Result::EmitChar16(UChar code_point) {
FlushRequiredLineBreak();
DCHECK_EQ(required_line_break_count_, 0);
builder_.Append(code_point);
}
void ElementInnerTextCollector::Result::EmitNewline() {
FlushRequiredLineBreak();
builder_.Append(kNewlineCharacter);
}
void ElementInnerTextCollector::Result::EmitRequiredLineBreak(int count) {
DCHECK_GE(count, 0);
DCHECK_LE(count, 2);
if (count == 0)
return;
// 4. Remove any runs of consecutive required line break count items at the
// start or end of results.
if (builder_.IsEmpty()) {
DCHECK_EQ(required_line_break_count_, 0);
return;
}
// 5. Replace each remaining run of consecutive required line break count
// items with a string consisting of as many U+000A LINE FEED (LF) characters
// as the maximum of the values in the required line break count items.
required_line_break_count_ = std::max(required_line_break_count_, count);
}
void ElementInnerTextCollector::Result::EmitTab() {
FlushRequiredLineBreak();
builder_.Append(kTabulationCharacter);
}
void ElementInnerTextCollector::Result::EmitText(const StringView& text) {
if (text.IsEmpty())
return;
FlushRequiredLineBreak();
DCHECK_EQ(required_line_break_count_, 0);
builder_.Append(text);
}
String ElementInnerTextCollector::Result::Finish() {
return builder_.ToString();
}
void ElementInnerTextCollector::Result::FlushRequiredLineBreak() {
DCHECK_GE(required_line_break_count_, 0);
DCHECK_LE(required_line_break_count_, 2);
builder_.Append("\n\n", required_line_break_count_);
required_line_break_count_ = 0;
}
} // anonymous namespace
String Element::innerText() {
// We need to update layout, since |ElementInnerTextCollector()| uses line
// boxes in the layout tree.
GetDocument().UpdateStyleAndLayoutForNode(this);
return ElementInnerTextCollector().RunOn(*this);
}
} // namespace blink