blob: 027f6598a2faa9e213c53190e4bd42cc5a0ad8f9 [file] [log] [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_DUMP_RESULT_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_DUMP_RESULT_H_
#include <optional>
#include <set>
#include <string>
#include "components/optimization_guide/content/mojom/page_text_service.mojom.h"
#include "content/public/browser/global_routing_id.h"
namespace optimization_guide {
// This class contains the text dump from a single renderer and associated
// metadata. The lifecycle of this class is in two phases, preliminary and
// final. Preliminary is when the frame metadata is known, but the text dump has
// not been received yet. Once the text dump is received, then the dump is
// completed.
class FrameTextDumpResult {
public:
~FrameTextDumpResult();
FrameTextDumpResult(const FrameTextDumpResult&);
// Creates a preliminary instance with the given metadata.
static FrameTextDumpResult Initialize(mojom::TextDumpEvent event,
content::GlobalRenderFrameHostId rfh_id,
bool amp_frame,
int unique_navigation_id);
// Returns a copy of |this| that is completed with |contents|. This is only
// expected to be called once on Preliminary instances.
FrameTextDumpResult CompleteWithContents(
const std::u16string& contents) const;
// Whether the class instance is completed yet.
bool IsCompleted() const;
// The text dump contents. Set only for completed instances. Note that
// the string must be treated as untrusted data.
const std::optional<std::u16string>& contents() const { return contents_; }
// The text dump contents, decoded to UTF-8 as a best effort. Set only for
// completed instances. Note that the string must be treated as untrusted
// data.
std::optional<std::string> utf8_contents() const;
// The event at which the text dump is taken. Set for both preliminary and
// completed instances.
mojom::TextDumpEvent event() const { return event_; }
// The unique identifier for the content::RenderFrameHost that the text dump
// was taken in. Set for both preliminary and completed instances.
content::GlobalRenderFrameHostId rfh_id() const { return rfh_id_; }
// The unique id of the visible navigation for this frame dump, taken from the
// visible NavigationEntry.
int unique_navigation_id() const { return unique_navigation_id_; }
// Whether the frame the text dump is taken in an AMP frame. Set for both
// preliminary and completed instances.
bool amp_frame() const { return amp_frame_; }
// These objects are sorted in the following manner:
// * AMP frames first - When there are AMP frames on a page, it is expected
// that they will contain the most content.
// * Longer contents first - Most consumers will only be interested in some of
// the page text. In this case, ensure that the biggest blob of text comes
// first since that is most likely to be the main content on the page.
// * Later events first - The later in the page's lifetime a text dump is
// taken, the more likely that it is complete.
// * Everything else is just done willy-nilly for completeness of equality
// checking.
inline bool operator<(const FrameTextDumpResult& rhs) const {
if (amp_frame() != rhs.amp_frame()) {
return amp_frame();
}
size_t lhs_size = contents() ? contents()->size() : 0;
size_t rhs_size = rhs.contents() ? rhs.contents()->size() : 0;
if (lhs_size != rhs_size) {
// Note the reverse ordering to put longer contents first.
return lhs_size > rhs_size;
}
if (event() != rhs.event()) {
// Note the reverse ordering to put later events first.
return event() > rhs.event();
}
return std::tie(rfh_id_, contents_, unique_navigation_id_) <
std::tie(rhs.rfh_id_, rhs.contents_, rhs.unique_navigation_id_);
}
inline bool operator==(const FrameTextDumpResult& other) const {
return std::tie(event_, contents_, rfh_id_, amp_frame_,
unique_navigation_id_) ==
std::tie(other.event_, other.contents_, other.rfh_id_,
other.amp_frame_, other.unique_navigation_id_);
}
private:
FrameTextDumpResult();
mojom::TextDumpEvent event_;
std::optional<std::u16string> contents_;
content::GlobalRenderFrameHostId rfh_id_;
bool amp_frame_ = false;
int unique_navigation_id_ = -1;
};
// Contains 0 or more FrameTextDumpResults from the same page load.
class PageTextDumpResult {
public:
PageTextDumpResult();
PageTextDumpResult(const PageTextDumpResult&);
~PageTextDumpResult();
// Adds another frame text dump to |this|.
void AddFrameTextDumpResult(const FrameTextDumpResult& frame_result);
// Returns the concatenation of all AMP frames. nullopt if no AMP frames are
// present. Note that the string must be treated as untrusted data.
std::optional<std::string> GetAMPTextContent() const;
// Returns the concatenation of the mainframe. nullopt if not present.
// Note that the string must be treated as untrusted data.
std::optional<std::string> GetMainFrameTextContent() const;
// Returns the concatenation of all frames, AMP or main. nullopt if |empty()|.
// Note that the string must be treated as untrusted data.
std::optional<std::string> GetAllFramesTextContent() const;
bool empty() const { return frame_results_.empty(); }
const std::set<FrameTextDumpResult>& frame_results() const {
return frame_results_;
}
inline bool operator==(const PageTextDumpResult& other) const {
return frame_results_ == other.frame_results_;
}
private:
std::set<FrameTextDumpResult> frame_results_;
};
// Useful for debugging.
std::ostream& operator<<(std::ostream& os, const FrameTextDumpResult& frame);
std::ostream& operator<<(std::ostream& os, const PageTextDumpResult& page);
} // namespace optimization_guide
#endif // COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_DUMP_RESULT_H_