| // Copyright 2021 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <utility> |
| |
| #include "components/continuous_search/common/public/mojom/continuous_search.mojom.h" |
| #include "components/continuous_search/renderer/search_result_extractor_impl.h" |
| #include "content/public/renderer/render_frame.h" |
| #include "content/public/renderer/render_view.h" |
| #include "content/public/test/render_view_test.h" |
| #include "third_party/blink/public/web/web_document.h" |
| #include "third_party/blink/public/web/web_local_frame.h" |
| |
| namespace continuous_search { |
| |
| class SearchResultExtractorImplRenderViewTest : public content::RenderViewTest { |
| public: |
| SearchResultExtractorImplRenderViewTest() = default; |
| ~SearchResultExtractorImplRenderViewTest() override = default; |
| |
| // Loads the contents of `html` and attempts to extract data. Caller should |
| // provide the `expected_status` and `expected_results` which are used to |
| // verify the extraction behaved as intended. Note that |
| // `expected_results->document_url` will be overwritten with the document url |
| // once the provided `html` is loaded. |
| void LoadHtmlAndExpectExtractedOutput( |
| base::StringPiece html, |
| const std::vector<mojom::ResultType>& result_types, |
| mojom::SearchResultExtractor::Status expected_status, |
| mojom::CategoryResultsPtr expected_results) { |
| LoadHTML(html.data()); |
| expected_results->document_url = |
| GURL(GetMainRenderFrame()->GetWebFrame()->GetDocument().Url()); |
| base::RunLoop loop; |
| mojom::SearchResultExtractor::Status out_status; |
| mojom::CategoryResultsPtr out_results; |
| { |
| auto* extractor = SearchResultExtractorImpl::Create(GetMainRenderFrame()); |
| EXPECT_NE(extractor, nullptr); |
| extractor->ExtractCurrentSearchResults( |
| result_types, base::BindOnce( |
| [](base::OnceClosure quit, |
| mojom::SearchResultExtractor::Status* out_status, |
| mojom::CategoryResultsPtr* out_results, |
| mojom::SearchResultExtractor::Status status, |
| mojom::CategoryResultsPtr results) { |
| *out_status = status; |
| *out_results = std::move(results); |
| std::move(quit).Run(); |
| }, |
| loop.QuitClosure(), base::Unretained(&out_status), |
| base::Unretained(&out_results))); |
| loop.Run(); |
| } |
| EXPECT_EQ(expected_status, out_status); |
| EXPECT_TRUE(expected_results.Equals(out_results)); |
| } |
| }; |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractAdsOnly) { |
| auto result1 = mojom::SearchResult::New(); |
| result1->link = GURL("https://www.example.com/"); |
| result1->title = u"Hello"; |
| |
| auto result2 = mojom::SearchResult::New(); |
| result2->link = GURL("https://www.example1.com/"); |
| result2->title = u"World"; |
| |
| auto ad_group = mojom::ResultGroup::New(); |
| ad_group->type = mojom::ResultType::kAds; |
| ad_group->results.push_back(std::move(result1)); |
| ad_group->results.push_back(std::move(result2)); |
| |
| auto expected_results = mojom::CategoryResults::New(); |
| expected_results->groups.push_back(std::move(ad_group)); |
| |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="tads"> |
| <div class="mnr-c foo"> |
| <a href="https://www.example.com/"> |
| <div role="heading"> |
| <span>Hello</span> |
| </div> |
| </a> |
| <a href="https://www.skipped_url.com/"> |
| <div role="heading"> |
| <span>Skipped</span> |
| </div> |
| </a> |
| </div> |
| <div class="mnr-c bar"> |
| <a href="https://www.example1.com/"> |
| <div role="heading"> |
| <span>World</span> |
| </div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kAds}, mojom::SearchResultExtractor::Status::kSuccess, |
| std::move(expected_results)); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractNoAds) { |
| // If only ads are requested but ads are not present, the status still reports |
| // success because extracting ads is not a requirement. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kAds}, mojom::SearchResultExtractor::Status::kSuccess, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractAdsAndResults) { |
| auto ad_result = mojom::SearchResult::New(); |
| ad_result->link = GURL("https://www.example.com/"); |
| ad_result->title = u"Hello"; |
| |
| auto ad_group = mojom::ResultGroup::New(); |
| ad_group->type = mojom::ResultType::kAds; |
| ad_group->results.push_back(std::move(ad_result)); |
| |
| auto result1 = mojom::SearchResult::New(); |
| result1->link = GURL("https://www.foo.com/"); |
| result1->title = u"Foo"; |
| |
| auto result2 = mojom::SearchResult::New(); |
| result2->link = GURL("https://www.bar.com/"); |
| result2->title = u"Bar"; |
| |
| auto result_group = mojom::ResultGroup::New(); |
| result_group->type = mojom::ResultType::kSearchResults; |
| result_group->results.push_back(std::move(result1)); |
| result_group->results.push_back(std::move(result2)); |
| |
| auto expected_results = mojom::CategoryResults::New(); |
| expected_results->category_type = mojom::Category::kOrganic; |
| expected_results->groups.push_back(std::move(ad_group)); |
| expected_results->groups.push_back(std::move(result_group)); |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div> |
| <div></div> |
| <div id="tads"> |
| <div> |
| <div class="mnr-c foo"> |
| <a href="https://www.example.com/"> |
| <div></div> |
| <div role="heading"> |
| <div>Hello</div> |
| </div> |
| </a> |
| </div> |
| </div> |
| </div> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <div></div> |
| <div> |
| <a href="https://www.foo.com/"> |
| <div role="heading">Foo </div> |
| </a> |
| </div> |
| </div> |
| <div class="mnr-c"> |
| <div></div> |
| <div> |
| <a href="https://www.bar.com/"> |
| <div role="heading">Bar |
| </div> |
| </a> |
| </div> |
| </div> |
| <div class="alpha"> |
| <div></div> |
| <div> |
| <a href="https://www.beta.com/"> |
| <div role="heading">Beta</div> |
| </a> |
| </div> |
| </div> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kAds, mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kSuccess, |
| std::move(expected_results)); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractResultsOnly) { |
| auto result1 = mojom::SearchResult::New(); |
| result1->link = GURL("https://www.foo.com/"); |
| result1->title = u"Foo"; |
| |
| auto result2 = mojom::SearchResult::New(); |
| result2->link = GURL("https://www.bar.com/"); |
| result2->title = u"Bar"; |
| |
| auto result_group = mojom::ResultGroup::New(); |
| result_group->type = mojom::ResultType::kSearchResults; |
| result_group->results.push_back(std::move(result1)); |
| result_group->results.push_back(std::move(result2)); |
| |
| auto expected_results = mojom::CategoryResults::New(); |
| expected_results->category_type = mojom::Category::kOrganic; |
| expected_results->groups.push_back(std::move(result_group)); |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div> |
| <div></div> |
| <div id="tads"> |
| <div> |
| <div class="mnr-c foo"> |
| <a href="https://www.example.com/"> |
| <div></div> |
| <div role="heading"> |
| <div>Hello</div> |
| </div> |
| </a> |
| </div> |
| </div> |
| </div> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <div></div> |
| <div> |
| <a href="https://www.foo.com/"> |
| <div role="heading">Foo </div> |
| </a> |
| </div> |
| </div> |
| <div class="mnr-c"> |
| <div></div> |
| <div> |
| <a href="https://www.bar.com/"> |
| <div role="heading">Bar |
| </div> |
| </a> |
| </div> |
| </div> |
| <div class="alpha"> |
| <div></div> |
| <div> |
| <a href="https://www.beta.com/"> |
| <div role="heading">Beta</div> |
| </a> |
| </div> |
| </div> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kSuccess, |
| std::move(expected_results)); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractRelatedSearches) { |
| auto result1 = mojom::SearchResult::New(); |
| result1->link = GURL("https://www.example1.com/"); |
| result1->title = u"Related 1"; |
| |
| auto result2 = mojom::SearchResult::New(); |
| result2->link = GURL("https://www.example2.com/"); |
| result2->title = u"Related 2"; |
| |
| auto related_searches_group = mojom::ResultGroup::New(); |
| related_searches_group->type = mojom::ResultType::kRelatedSearches; |
| related_searches_group->results.push_back(std::move(result1)); |
| related_searches_group->results.push_back(std::move(result2)); |
| |
| auto expected_results = mojom::CategoryResults::New(); |
| expected_results->category_type = mojom::Category::kOrganic; |
| expected_results->groups.push_back(std::move(related_searches_group)); |
| |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="w3bYAd"> |
| <div class="foo"> |
| <a href="https://www.example1.com/" class="k8XOCe bar"> |
| <div class="s75CSd bar"> |
| <span>Related 1</span> |
| </div> |
| </a> |
| <a href="https://www.example2.com/" class="k8XOCe baz"> |
| <div class="s75CSd baz"> |
| <span>Related 2</span> |
| </div> |
| </a> |
| </div> |
| <div class="mnr-c bar"> |
| <a href="https://www.example1.com/" class="k8XOCe buz"> |
| <div> |
| <span>Skipped</span> |
| </div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kRelatedSearches}, |
| mojom::SearchResultExtractor::Status::kSuccess, |
| std::move(expected_results)); |
| } |
| |
| // The tests below this line are intended to test the branching of the |
| // extractor. The goal is to ensure there are no scenarios where the extraction |
| // might crash/fail if an almost correct result is presented. |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractRelatedSearchesNoId) { |
| // No id="w3bYAd". |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div> |
| <a href="https://www.example1.com/" class="k8XOCe bar"> |
| <div class="s75CSd bar"> |
| <span>Related 1</span> |
| </div> |
| </a> |
| </div> |
| </body>)", |
| {mojom::ResultType::kRelatedSearches}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractRelatedSearchesNoAnchors) { |
| // No anchors. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="w3bYAd"> |
| </div> |
| </body>)", |
| {mojom::ResultType::kRelatedSearches}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractRelatedSearchesNoAnchorClass) { |
| // No "k8XOCe" class on anchors. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="w3bYAd"> |
| <a href="https://www.example1.com/" class="bar"> |
| <div class="s75CSd bar"> |
| <span>Related 1</span> |
| </div> |
| </a> |
| </div> |
| </body>)", |
| {mojom::ResultType::kRelatedSearches}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractRelatedSearchesNoTitleClass) { |
| // No "s75CSd" class on title div. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="w3bYAd"> |
| <a href="https://www.example1.com/" class="k8XOCe bar"> |
| <div class="bar"> |
| <span>Related 1</span> |
| </div> |
| </a> |
| </div> |
| </body>)", |
| {mojom::ResultType::kRelatedSearches}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractResultsNoRso) { |
| // No class="rso". |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div> |
| <div class="mnr-c"> |
| <a href="https://www.foo.com/"> |
| <div role="heading">Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, TestExtractResultsNoDivs) { |
| // No divs inside "rso". |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <a href="https://www.foo.com/"> |
| <span role="heading">Foo</span> |
| </a> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoMnrCardNoClass) { |
| // No class attribute on inner divs. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div> |
| <a href="https://www.foo.com/"> |
| <div role="heading">Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractNoMnrCardNotFirstClass) { |
| // mnr-c is the second class. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="foo mnr-c"> |
| <a href="https://www.foo.com/"> |
| <div role="heading">Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoLinkNoAnchor) { |
| // No anchor inside mnr-c. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <div href="https://www.foo.com/"> |
| <div role="heading">Foo</div> |
| </div> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoLinkNoHref) { |
| // No href for the anchor. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a> |
| <div role="heading">Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoLinkEmptyHref) { |
| // Empty href. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a href=""> |
| <div role="heading">Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoLinkWrongScheme) { |
| // href is not http/https. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a href="mailto:foo@example.com"> |
| <div role="heading">Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoTitleNoDiv) { |
| // No inner div. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a href="https://www.foo.com/"> |
| Foo |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoTitleNoRole) { |
| // Inner div has no role. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a href="https://www.foo.com/"> |
| <div>Foo</div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoTitleNotDivHeading) { |
| // Not a div, but role="heading". |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a href="https://www.bar.com/" role="heading"> |
| <span role="heading">Bar</span> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| TEST_F(SearchResultExtractorImplRenderViewTest, |
| TestExtractResultsNoTitleNoText) { |
| // No text. |
| LoadHtmlAndExpectExtractedOutput( |
| R"(<!doctype html> |
| <body> |
| <div id="rso"> |
| <div class="mnr-c"> |
| <a href="https://www.baz.com/"> |
| <div role="heading"></div> |
| </a> |
| </div> |
| </div> |
| </body>)", |
| {mojom::ResultType::kSearchResults}, |
| mojom::SearchResultExtractor::Status::kNoResults, |
| mojom::CategoryResults::New()); |
| } |
| |
| } // namespace continuous_search |