blob: d028b88daa352e00969b45da8af755208d9fae6d [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/optimization_guide/content/browser/page_content_proto_util.h"
#include <vector>
#include "base/notreached.h"
#include "components/optimization_guide/proto/features/common_quality_data.pb.h"
#include "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom.h"
#include "url/gurl.h"
namespace optimization_guide {
namespace {
optimization_guide::proto::ContentAttributeType ConvertAttributeType(
blink::mojom::AIPageContentAttributeType type) {
switch (type) {
case blink::mojom::AIPageContentAttributeType::kRoot:
return optimization_guide::proto::CONTENT_ATTRIBUTE_ROOT;
case blink::mojom::AIPageContentAttributeType::kIframe:
return optimization_guide::proto::CONTENT_ATTRIBUTE_IFRAME;
case blink::mojom::AIPageContentAttributeType::kContainer:
return optimization_guide::proto::CONTENT_ATTRIBUTE_CONTAINER;
case blink::mojom::AIPageContentAttributeType::kText:
return optimization_guide::proto::CONTENT_ATTRIBUTE_TEXT;
case blink::mojom::AIPageContentAttributeType::kAnchor:
return optimization_guide::proto::CONTENT_ATTRIBUTE_ANCHOR;
case blink::mojom::AIPageContentAttributeType::kImage:
return optimization_guide::proto::CONTENT_ATTRIBUTE_IMAGE;
case blink::mojom::AIPageContentAttributeType::kForm:
return optimization_guide::proto::CONTENT_ATTRIBUTE_FORM;
case blink::mojom::AIPageContentAttributeType::kTable:
return optimization_guide::proto::CONTENT_ATTRIBUTE_TABLE;
case blink::mojom::AIPageContentAttributeType::kTableRow:
return optimization_guide::proto::CONTENT_ATTRIBUTE_TABLE_ROW;
case blink::mojom::AIPageContentAttributeType::kParagraph:
return optimization_guide::proto::CONTENT_ATTRIBUTE_PARAGRAPH;
case blink::mojom::AIPageContentAttributeType::kHeading:
return optimization_guide::proto::CONTENT_ATTRIBUTE_HEADING;
case blink::mojom::AIPageContentAttributeType::kOrderedList:
return optimization_guide::proto::CONTENT_ATTRIBUTE_ORDERED_LIST;
case blink::mojom::AIPageContentAttributeType::kUnorderedList:
return optimization_guide::proto::CONTENT_ATTRIBUTE_UNORDERED_LIST;
case blink::mojom::AIPageContentAttributeType::kTableCell:
return optimization_guide::proto::CONTENT_ATTRIBUTE_TABLE_CELL;
case blink::mojom::AIPageContentAttributeType::kListItem:
return optimization_guide::proto::CONTENT_ATTRIBUTE_LIST_ITEM;
}
NOTREACHED();
}
optimization_guide::proto::AnnotatedRole ConvertAnnotatedRole(
blink::mojom::AIPageContentAnnotatedRole role) {
switch (role) {
case blink::mojom::AIPageContentAnnotatedRole::kHeader:
return optimization_guide::proto::ANNOTATED_ROLE_HEADER;
case blink::mojom::AIPageContentAnnotatedRole::kNav:
return optimization_guide::proto::ANNOTATED_ROLE_NAV;
case blink::mojom::AIPageContentAnnotatedRole::kSearch:
return optimization_guide::proto::ANNOTATED_ROLE_SEARCH;
case blink::mojom::AIPageContentAnnotatedRole::kMain:
return optimization_guide::proto::ANNOTATED_ROLE_MAIN;
case blink::mojom::AIPageContentAnnotatedRole::kArticle:
return optimization_guide::proto::ANNOTATED_ROLE_ARTICLE;
case blink::mojom::AIPageContentAnnotatedRole::kSection:
return optimization_guide::proto::ANNOTATED_ROLE_SECTION;
case blink::mojom::AIPageContentAnnotatedRole::kAside:
return optimization_guide::proto::ANNOTATED_ROLE_ASIDE;
case blink::mojom::AIPageContentAnnotatedRole::kFooter:
return optimization_guide::proto::ANNOTATED_ROLE_FOOTER;
}
NOTREACHED();
}
void ConvertRect(const gfx::Rect& mojom_rect,
optimization_guide::proto::BoundingRect* proto_rect) {
proto_rect->set_x(mojom_rect.x());
proto_rect->set_y(mojom_rect.y());
proto_rect->set_width(mojom_rect.width());
proto_rect->set_height(mojom_rect.height());
}
void ConvertGeometry(const blink::mojom::AIPageContentGeometry& mojom_geometry,
optimization_guide::proto::Geometry* proto_geometry) {
ConvertRect(mojom_geometry.outer_bounding_box,
proto_geometry->mutable_outer_bounding_box());
ConvertRect(mojom_geometry.visible_bounding_box,
proto_geometry->mutable_visible_bounding_box());
proto_geometry->set_is_fixed_or_sticky_position(
mojom_geometry.is_fixed_or_sticky_position);
proto_geometry->set_scrolls_overflow_x(mojom_geometry.scrolls_overflow_x);
proto_geometry->set_scrolls_overflow_y(mojom_geometry.scrolls_overflow_y);
}
optimization_guide::proto::TextSize ConvertTextSize(
blink::mojom::AIPageContentTextSize text_size) {
switch (text_size) {
case blink::mojom::AIPageContentTextSize::kXS:
return optimization_guide::proto::TextSize::TEXT_SIZE_XS;
case blink::mojom::AIPageContentTextSize::kS:
return optimization_guide::proto::TextSize::TEXT_SIZE_S;
case blink::mojom::AIPageContentTextSize::kM:
return optimization_guide::proto::TextSize::TEXT_SIZE_M_DEFAULT;
case blink::mojom::AIPageContentTextSize::kL:
return optimization_guide::proto::TextSize::TEXT_SIZE_L;
case blink::mojom::AIPageContentTextSize::kXL:
return optimization_guide::proto::TextSize::TEXT_SIZE_XL;
}
NOTREACHED();
}
void ConvertTextInfo(const blink::mojom::AIPageContentTextInfo& mojom_text_info,
optimization_guide::proto::TextInfo* proto_text_info) {
proto_text_info->set_text_content(mojom_text_info.text_content);
auto* text_style = proto_text_info->mutable_text_style();
text_style->set_text_size(
ConvertTextSize(mojom_text_info.text_style->text_size));
text_style->set_has_emphasis(mojom_text_info.text_style->has_emphasis);
}
void ConvertImageInfo(
const blink::mojom::AIPageContentImageInfo& mojom_image_info,
optimization_guide::proto::ImageInfo* proto_image_info) {
if (mojom_image_info.image_caption) {
proto_image_info->set_image_caption(*mojom_image_info.image_caption);
}
if (mojom_image_info.source_origin) {
proto_image_info->set_source_url(
mojom_image_info.source_origin->GetURL().spec());
}
}
optimization_guide::proto::AnchorRel ConvertAnchorRel(
blink::mojom::AIPageContentAnchorRel rel) {
switch (rel) {
case blink::mojom::AIPageContentAnchorRel::kRelationUnknown:
return optimization_guide::proto::ANCHOR_REL_UNKNOWN;
case blink::mojom::AIPageContentAnchorRel::kRelationNoReferrer:
return optimization_guide::proto::ANCHOR_REL_NO_REFERRER;
case blink::mojom::AIPageContentAnchorRel::kRelationNoOpener:
return optimization_guide::proto::ANCHOR_REL_NO_OPENER;
case blink::mojom::AIPageContentAnchorRel::kRelationOpener:
return optimization_guide::proto::ANCHOR_REL_OPENER;
case blink::mojom::AIPageContentAnchorRel::kRelationPrivacyPolicy:
return optimization_guide::proto::ANCHOR_REL_PRIVACY_POLICY;
case blink::mojom::AIPageContentAnchorRel::kRelationTermsOfService:
return optimization_guide::proto::ANCHOR_REL_TERMS_OF_SERVICE;
}
NOTREACHED();
}
void ConvertAnchorData(
const blink::mojom::AIPageContentAnchorData& mojom_anchor_data,
optimization_guide::proto::AnchorData* proto_anchor_data) {
proto_anchor_data->set_url(mojom_anchor_data.url.spec());
for (const auto& rel : mojom_anchor_data.rel) {
proto_anchor_data->add_rel(ConvertAnchorRel(rel));
}
}
void ConvertFormData(const blink::mojom::AIPageContentFormData& mojom_form_data,
optimization_guide::proto::FormData* proto_form_data) {
// TODO(crbug.com/381879263): Add fields for form data.
}
void ConvertTableData(
const blink::mojom::AIPageContentTableData& mojom_table_data,
optimization_guide::proto::TableData* proto_table_data) {
if (mojom_table_data.table_name) {
proto_table_data->set_table_name(*mojom_table_data.table_name);
}
}
void ConvertTableRowData(
const blink::mojom::AIPageContentTableRowData mojom_table_row_data,
optimization_guide::proto::TableRowData* proto_table_row_data) {
switch (mojom_table_row_data.row_type) {
case blink::mojom::AIPageContentTableRowType::kHeader:
proto_table_row_data->set_type(
optimization_guide::proto::TableRowType::TABLE_ROW_TYPE_HEADER);
break;
case blink::mojom::AIPageContentTableRowType::kBody:
proto_table_row_data->set_type(
optimization_guide::proto::TableRowType::TABLE_ROW_TYPE_BODY);
break;
case blink::mojom::AIPageContentTableRowType::kFooter:
proto_table_row_data->set_type(
optimization_guide::proto::TableRowType::TABLE_ROW_TYPE_FOOTER);
break;
}
}
bool ConvertAttributes(
const blink::mojom::AIPageContentAttributes& mojom_attributes,
optimization_guide::proto::ContentAttributes* proto_attributes) {
for (const auto& dom_node_id : mojom_attributes.dom_node_ids) {
proto_attributes->add_dom_node_ids(dom_node_id);
}
if (mojom_attributes.common_ancestor_dom_node_id.has_value()) {
proto_attributes->set_common_ancestor_dom_node_id(
mojom_attributes.common_ancestor_dom_node_id.value());
}
proto_attributes->set_attribute_type(
ConvertAttributeType(mojom_attributes.attribute_type));
if (mojom_attributes.geometry) {
ConvertGeometry(*mojom_attributes.geometry,
proto_attributes->mutable_geometry());
}
if (mojom_attributes.text_info) {
if (mojom_attributes.attribute_type !=
blink::mojom::AIPageContentAttributeType::kText) {
return false;
}
ConvertTextInfo(*mojom_attributes.text_info,
proto_attributes->mutable_text_data());
} else if (mojom_attributes.image_info) {
if (mojom_attributes.attribute_type !=
blink::mojom::AIPageContentAttributeType::kImage) {
return false;
}
ConvertImageInfo(*mojom_attributes.image_info,
proto_attributes->mutable_image_data());
} else if (mojom_attributes.anchor_data) {
if (mojom_attributes.attribute_type !=
blink::mojom::AIPageContentAttributeType::kAnchor) {
return false;
}
ConvertAnchorData(*mojom_attributes.anchor_data,
proto_attributes->mutable_anchor_data());
} else if (mojom_attributes.form_data) {
if (mojom_attributes.attribute_type !=
blink::mojom::AIPageContentAttributeType::kForm) {
return false;
}
ConvertFormData(*mojom_attributes.form_data,
proto_attributes->mutable_form_data());
} else if (mojom_attributes.table_data) {
if (mojom_attributes.attribute_type !=
blink::mojom::AIPageContentAttributeType::kTable) {
return false;
}
ConvertTableData(*mojom_attributes.table_data,
proto_attributes->mutable_table_data());
} else if (mojom_attributes.table_row_data) {
if (mojom_attributes.attribute_type !=
blink::mojom::AIPageContentAttributeType::kTableRow) {
return false;
}
ConvertTableRowData(*mojom_attributes.table_row_data,
proto_attributes->mutable_table_row_data());
}
for (const auto& annotated_role : mojom_attributes.annotated_roles) {
proto_attributes->add_annotated_roles(ConvertAnnotatedRole(annotated_role));
}
return true;
}
void ConvertIframeData(
const RenderFrameInfo& render_frame_info,
const blink::mojom::AIPageContentIframeData& iframe_data,
optimization_guide::proto::IframeData* proto_iframe_data) {
proto_iframe_data->set_url(render_frame_info.source_origin.Serialize());
proto_iframe_data->set_likely_ad_frame(iframe_data.likely_ad_frame);
}
bool ConvertNode(content::GlobalRenderFrameHostToken source_frame_token,
const blink::mojom::AIPageContentNode& mojom_node,
const AIPageContentMap& page_content_map,
GetRenderFrameInfo get_render_frame_info,
optimization_guide::proto::ContentNode* proto_node) {
const auto& mojom_attributes = *mojom_node.content_attributes;
if (!ConvertAttributes(mojom_attributes,
proto_node->mutable_content_attributes())) {
return false;
}
std::optional<RenderFrameInfo> render_frame_info;
if (mojom_attributes.attribute_type ==
blink::mojom::AIPageContentAttributeType::kIframe) {
if (!mojom_attributes.iframe_data) {
return false;
}
const auto& iframe_data = *mojom_attributes.iframe_data;
const auto frame_token = iframe_data.frame_token;
// The frame may have been torn down or crashed before we got a response.
render_frame_info =
get_render_frame_info.Run(source_frame_token.child_id, frame_token);
if (!render_frame_info) {
return false;
}
if (frame_token.Is<blink::RemoteFrameToken>()) {
// RemoteFrame should have no child nodes since the content is out of
// process.
if (!mojom_node.children_nodes.empty()) {
return false;
}
auto it = page_content_map.find(render_frame_info->global_frame_token);
if (it == page_content_map.end()) {
return true;
}
const auto& frame_page_content = *it->second;
auto* proto_child_frame_node = proto_node->add_children_nodes();
if (!ConvertNode(render_frame_info->global_frame_token,
*frame_page_content.root_node, page_content_map,
get_render_frame_info, proto_child_frame_node)) {
return false;
}
}
auto* proto_iframe_data =
proto_node->mutable_content_attributes()->mutable_iframe_data();
ConvertIframeData(*render_frame_info, iframe_data, proto_iframe_data);
}
const auto source_frame_for_children =
render_frame_info ? render_frame_info->global_frame_token
: source_frame_token;
for (const auto& mojom_child : mojom_node.children_nodes) {
auto* proto_child = proto_node->add_children_nodes();
if (!ConvertNode(source_frame_for_children, *mojom_child, page_content_map,
get_render_frame_info, proto_child)) {
return false;
}
}
return true;
}
} // namespace
bool ConvertAIPageContentToProto(
content::GlobalRenderFrameHostToken main_frame_token,
const AIPageContentMap& page_content_map,
GetRenderFrameInfo get_render_frame_info,
optimization_guide::proto::AnnotatedPageContent* proto) {
auto it = page_content_map.find(main_frame_token);
if (it == page_content_map.end()) {
return false;
}
const auto& main_frame_page_content = *it->second;
if (!ConvertNode(main_frame_token, *main_frame_page_content.root_node,
page_content_map, get_render_frame_info,
proto->mutable_root_node())) {
return false;
}
proto->set_version(
optimization_guide::proto::ANNOTATED_PAGE_CONTENT_VERSION_1_0);
return true;
}
} // namespace optimization_guide