| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/optimization_guide/content/browser/page_content_proto_util.h" |
| |
| #include <vector> |
| |
| #include "base/notreached.h" |
| #include "components/optimization_guide/proto/features/common_quality_data.pb.h" |
| #include "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom.h" |
| #include "url/gurl.h" |
| |
| namespace optimization_guide { |
| |
| namespace { |
| optimization_guide::proto::features::ContentAttributeType ConvertAttributeType( |
| blink::mojom::AIPageContentAttributeType type) { |
| switch (type) { |
| case blink::mojom::AIPageContentAttributeType::kRoot: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_ROOT; |
| case blink::mojom::AIPageContentAttributeType::kContainer: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_CONTAINER; |
| case blink::mojom::AIPageContentAttributeType::kIframe: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_IFRAME; |
| case blink::mojom::AIPageContentAttributeType::kParagraph: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_PARAGRAPH; |
| case blink::mojom::AIPageContentAttributeType::kHeading: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_HEADING; |
| case blink::mojom::AIPageContentAttributeType::kOrderedList: |
| return optimization_guide::proto::features:: |
| CONTENT_ATTRIBUTE_ORDERED_LIST; |
| case blink::mojom::AIPageContentAttributeType::kUnorderedList: |
| return optimization_guide::proto::features:: |
| CONTENT_ATTRIBUTE_UNORDERED_LIST; |
| case blink::mojom::AIPageContentAttributeType::kAnchor: |
| // TODO(crbug.com/382083796): Add this type to the proto. |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_UNKNOWN; |
| case blink::mojom::AIPageContentAttributeType::kForm: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_FORM; |
| case blink::mojom::AIPageContentAttributeType::kFigure: |
| case blink::mojom::AIPageContentAttributeType::kHeader: |
| case blink::mojom::AIPageContentAttributeType::kNav: |
| case blink::mojom::AIPageContentAttributeType::kSearch: |
| case blink::mojom::AIPageContentAttributeType::kMain: |
| case blink::mojom::AIPageContentAttributeType::kArticle: |
| case blink::mojom::AIPageContentAttributeType::kSection: |
| case blink::mojom::AIPageContentAttributeType::kAside: |
| case blink::mojom::AIPageContentAttributeType::kFooter: |
| // TODO(crbug.com/382083796): Add this type to the proto. |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_UNKNOWN; |
| case blink::mojom::AIPageContentAttributeType::kTable: |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_TABLE; |
| case blink::mojom::AIPageContentAttributeType::kTableCell: |
| // TODO(crbug.com/382083796): Add this type to the proto. |
| return optimization_guide::proto::features::CONTENT_ATTRIBUTE_UNKNOWN; |
| } |
| |
| NOTREACHED(); |
| } |
| |
| void ConvertRect( |
| const gfx::Rect& mojom_rect, |
| optimization_guide::proto::features::BoundingRect* proto_rect) { |
| proto_rect->set_x(mojom_rect.x()); |
| proto_rect->set_y(mojom_rect.y()); |
| proto_rect->set_width(mojom_rect.width()); |
| proto_rect->set_height(mojom_rect.height()); |
| } |
| |
| void ConvertGeometry( |
| const blink::mojom::AIPageContentGeometry& mojom_geometry, |
| optimization_guide::proto::features::Geometry* proto_geometry) { |
| ConvertRect(mojom_geometry.outer_bounding_box, |
| proto_geometry->mutable_outer_bounding_box()); |
| ConvertRect(mojom_geometry.visible_bounding_box, |
| proto_geometry->mutable_visible_bounding_box()); |
| } |
| |
| void ConvertTextInfo( |
| const std::vector<blink::mojom::AIPageContentTextInfoPtr>& mojom_text_info, |
| google::protobuf::RepeatedPtrField< |
| optimization_guide::proto::features::TextInfo>* proto_text_info) { |
| for (const auto& mojom_text : mojom_text_info) { |
| auto* proto_text = proto_text_info->Add(); |
| proto_text->set_text_content(mojom_text->text_content); |
| |
| auto* bounding_box = proto_text->mutable_text_bounding_box(); |
| bounding_box->set_x(mojom_text->text_bounding_box.x()); |
| bounding_box->set_y(mojom_text->text_bounding_box.y()); |
| bounding_box->set_width(mojom_text->text_bounding_box.width()); |
| bounding_box->set_height(mojom_text->text_bounding_box.height()); |
| } |
| } |
| |
| void ConvertImageInfo( |
| const std::vector<blink::mojom::AIPageContentImageInfoPtr>& |
| mojom_image_info, |
| google::protobuf::RepeatedPtrField< |
| optimization_guide::proto::features::ImageInfo>* proto_image_info) { |
| for (const auto& mojom_image : mojom_image_info) { |
| auto* proto_image = proto_image_info->Add(); |
| if (mojom_image->image_caption) { |
| proto_image->set_image_caption(*mojom_image->image_caption); |
| } |
| |
| auto* bounding_box = proto_image->mutable_image_bounding_box(); |
| bounding_box->set_x(mojom_image->image_bounding_box.x()); |
| bounding_box->set_y(mojom_image->image_bounding_box.y()); |
| bounding_box->set_width(mojom_image->image_bounding_box.width()); |
| bounding_box->set_height(mojom_image->image_bounding_box.height()); |
| |
| if (mojom_image->source_origin) { |
| proto_image->set_source_url(mojom_image->source_origin->GetURL().spec()); |
| } |
| } |
| } |
| |
| void ConvertAttributes( |
| const blink::mojom::AIPageContentAttributes& mojom_attributes, |
| optimization_guide::proto::features::ContentAttributes* proto_attributes) { |
| for (const auto& dom_node_id : mojom_attributes.dom_node_ids) { |
| proto_attributes->add_dom_node_ids(dom_node_id); |
| } |
| |
| if (mojom_attributes.common_ancestor_dom_node_id.has_value()) { |
| proto_attributes->set_common_ancestor_dom_node_id( |
| mojom_attributes.common_ancestor_dom_node_id.value()); |
| } |
| |
| proto_attributes->set_attribute_type( |
| ConvertAttributeType(mojom_attributes.attribute_type)); |
| |
| if (mojom_attributes.geometry) { |
| ConvertGeometry(*mojom_attributes.geometry, |
| proto_attributes->mutable_geometry()); |
| } |
| |
| ConvertTextInfo(mojom_attributes.text_info, |
| proto_attributes->mutable_text_info()); |
| ConvertImageInfo(mojom_attributes.image_info, |
| proto_attributes->mutable_image_info()); |
| } |
| |
| void ConvertIframeData( |
| const RenderFrameInfo& render_frame_info, |
| const blink::mojom::AIPageContentIframeData& iframe_data, |
| optimization_guide::proto::features::IframeData* proto_iframe_data) { |
| proto_iframe_data->set_url(render_frame_info.source_origin.Serialize()); |
| proto_iframe_data->set_likely_ad_frame(iframe_data.likely_ad_frame); |
| } |
| |
| bool ConvertNode(content::GlobalRenderFrameHostToken source_frame_token, |
| const blink::mojom::AIPageContentNode& mojom_node, |
| const AIPageContentMap& page_content_map, |
| GetRenderFrameInfo get_render_frame_info, |
| optimization_guide::proto::features::ContentNode* proto_node) { |
| const auto& mojom_attributes = *mojom_node.content_attributes; |
| ConvertAttributes(mojom_attributes, proto_node->mutable_content_attributes()); |
| |
| std::optional<RenderFrameInfo> render_frame_info; |
| if (mojom_attributes.attribute_type == |
| blink::mojom::AIPageContentAttributeType::kIframe) { |
| if (!mojom_attributes.iframe_data) { |
| return false; |
| } |
| |
| const auto& iframe_data = *mojom_attributes.iframe_data; |
| const auto frame_token = iframe_data.frame_token; |
| |
| // The frame may have been torn down or crashed before we got a response. |
| render_frame_info = |
| get_render_frame_info.Run(source_frame_token.child_id, frame_token); |
| if (!render_frame_info) { |
| return false; |
| } |
| |
| if (frame_token.Is<blink::RemoteFrameToken>()) { |
| // RemoteFrame should have no child nodes since the content is out of |
| // process. |
| if (!mojom_node.children_nodes.empty()) { |
| return false; |
| } |
| |
| auto it = page_content_map.find(render_frame_info->global_frame_token); |
| if (it == page_content_map.end()) { |
| return true; |
| } |
| |
| const auto& frame_page_content = *it->second; |
| auto* proto_child_frame_node = proto_node->add_children_nodes(); |
| if (!ConvertNode(render_frame_info->global_frame_token, |
| *frame_page_content.root_node, page_content_map, |
| get_render_frame_info, proto_child_frame_node)) { |
| return false; |
| } |
| } |
| |
| auto* proto_iframe_data = |
| proto_node->mutable_content_attributes()->mutable_iframe_data(); |
| ConvertIframeData(*render_frame_info, iframe_data, proto_iframe_data); |
| } |
| |
| const auto source_frame_for_children = |
| render_frame_info ? render_frame_info->global_frame_token |
| : source_frame_token; |
| for (const auto& mojom_child : mojom_node.children_nodes) { |
| auto* proto_child = proto_node->add_children_nodes(); |
| if (!ConvertNode(source_frame_for_children, *mojom_child, page_content_map, |
| get_render_frame_info, proto_child)) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| } // namespace |
| |
| bool ConvertAIPageContentToProto( |
| content::GlobalRenderFrameHostToken main_frame_token, |
| const AIPageContentMap& page_content_map, |
| GetRenderFrameInfo get_render_frame_info, |
| optimization_guide::proto::features::AnnotatedPageContent* proto) { |
| auto it = page_content_map.find(main_frame_token); |
| if (it == page_content_map.end()) { |
| return false; |
| } |
| |
| const auto& main_frame_page_content = *it->second; |
| if (!ConvertNode(main_frame_token, *main_frame_page_content.root_node, |
| page_content_map, get_render_frame_info, |
| proto->mutable_root_node())) { |
| return false; |
| } |
| |
| proto->set_version( |
| optimization_guide::proto::features::ANNOTATED_PAGE_CONTENT_VERSION_1_0); |
| return true; |
| } |
| |
| } // namespace optimization_guide |