blob: 939f6ec880100c648f3509d64c88fa68322a495a [file] [log] [blame] [edit]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Note that although this is not a "browser" test, it runs as part of
// browser_tests. This is because WebKit does not work properly if it is
// shutdown and re-initialized. Since browser_tests runs each test in a
// new process, this avoids the problem.
#include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
#include "base/bind.h"
#include "base/callback.h"
#include "base/compiler_specific.h"
#include "base/memory/weak_ptr.h"
#include "base/message_loop.h"
#include "base/time.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/test_utils.h"
#include "content/public/test/render_view_fake_resources_test.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/platform/WebString.h"
using ::testing::DoAll;
using ::testing::Invoke;
using ::testing::Return;
namespace safe_browsing {
class PhishingDOMFeatureExtractorTest
: public content::RenderViewFakeResourcesTest {
public:
// Helper for the SubframeRemoval test that posts a message to remove
// the iframe "frame1" from the document.
void ScheduleRemoveIframe() {
message_loop_.PostTask(
FROM_HERE,
base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
weak_factory_.GetWeakPtr()));
}
protected:
PhishingDOMFeatureExtractorTest()
: content::RenderViewFakeResourcesTest(),
ALLOW_THIS_IN_INITIALIZER_LIST(weak_factory_(this)) {}
virtual ~PhishingDOMFeatureExtractorTest() {}
virtual void SetUp() {
// Set up WebKit and the RenderView.
content::RenderViewFakeResourcesTest::SetUp();
extractor_.reset(new PhishingDOMFeatureExtractor(view(), &clock_));
}
virtual void TearDown() {
content::RenderViewFakeResourcesTest::TearDown();
}
// Runs the DOMFeatureExtractor on the RenderView, waiting for the
// completion callback. Returns the success boolean from the callback.
bool ExtractFeatures(FeatureMap* features) {
success_ = false;
extractor_->ExtractFeatures(
features,
base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
base::Unretained(this)));
message_loop_.Run();
return success_;
}
// Completion callback for feature extraction.
void ExtractionDone(bool success) {
success_ = success;
message_loop_.Quit();
}
// Does the actual work of removing the iframe "frame1" from the document.
void RemoveIframe() {
WebKit::WebFrame* main_frame = GetMainFrame();
ASSERT_TRUE(main_frame);
main_frame->executeScript(
WebKit::WebString(
"document.body.removeChild(document.getElementById('frame1'));"));
}
MockFeatureExtractorClock clock_;
scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
bool success_; // holds the success value from ExtractFeatures
base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
};
TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
// This test doesn't exercise the extraction timing.
EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
responses_["http://host.com/"] =
"<html><head><body>"
"<form action=\"query\"><input type=text><input type=checkbox></form>"
"<form action=\"http://cgi.host.com/submit\"></form>"
"<form action=\"http://other.com/\"></form>"
"<form action=\"query\"></form>"
"<form></form></body></html>";
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
expected_features.AddBooleanFeature(features::kPageHasTextInputs);
expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
FeatureMap features;
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
responses_["http://host.com/"] =
"<html><head><body>"
"<input type=\"radio\"><input type=password></body></html>";
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
features.Clear();
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
responses_["http://host.com/"] =
"<html><head><body><input></body></html>";
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageHasTextInputs);
features.Clear();
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
responses_["http://host.com/"] =
"<html><head><body><input type=\"invalid\"></body></html>";
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageHasTextInputs);
features.Clear();
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
}
TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
// This test doesn't exercise the extraction timing.
EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
responses_["http://www.host.com/"] =
"<html><head><body>"
"<a href=\"http://www2.host.com/abc\">link</a>"
"<a name=page_anchor></a>"
"<a href=\"http://www.chromium.org/\">chromium</a>"
"</body></html";
FeatureMap expected_features;
expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
expected_features.AddBooleanFeature(features::kPageLinkDomain +
std::string("chromium.org"));
FeatureMap features;
LoadURL("http://www.host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
responses_.clear();
responses_["https://www.host.com/"] =
"<html><head><body>"
"<a href=\"login\">this is secure</a>"
"<a href=\"http://host.com\">not secure</a>"
"<a href=\"https://www2.host.com/login\">also secure</a>"
"<a href=\"http://chromium.org/\">also not secure</a>"
"</body></html>";
expected_features.Clear();
expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
expected_features.AddBooleanFeature(features::kPageLinkDomain +
std::string("chromium.org"));
features.Clear();
LoadURL("https://www.host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
}
TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
// This test doesn't exercise the extraction timing.
EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
responses_["http://host.com/"] =
"<html><head><script></script><script></script></head></html>";
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
FeatureMap features;
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
responses_["http://host.com/"] =
"<html><head><script></script><script></script><script></script>"
"<script></script><script></script><script></script><script></script>"
"</head><body><img src=\"blah.gif\">"
"<img src=\"http://host2.com/blah.gif\"></body></html>";
expected_features.Clear();
expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
features.Clear();
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
}
TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
// This test doesn't exercise the extraction timing.
EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
// Test that features are aggregated across all frames.
responses_["http://host.com/"] =
"<html><body><input type=text><a href=\"info.html\">link</a>"
"<iframe src=\"http://host2.com/\"></iframe>"
"<iframe src=\"http://host3.com/\"></iframe>"
"</body></html>";
responses_["http://host2.com/"] =
"<html><head><script></script><body>"
"<form action=\"http://host4.com/\"><input type=checkbox></form>"
"<form action=\"http://host2.com/submit\"></form>"
"<a href=\"http://www.host2.com/home\">link</a>"
"<iframe src=\"nested.html\"></iframe>"
"<body></html>";
responses_["http://host2.com/nested.html"] =
"<html><body><input type=password>"
"<a href=\"https://host4.com/\">link</a>"
"<a href=\"relative\">another</a>"
"</body></html>";
responses_["http://host3.com/"] =
"<html><head><script></script><body>"
"<img src=\"http://host.com/123.png\">"
"</body></html>";
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
// Form action domains are compared to the URL of the document they're in,
// not the URL of the toplevel page. So http://host2.com/ has two form
// actions, one of which is external.
expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
expected_features.AddBooleanFeature(features::kPageHasTextInputs);
expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
expected_features.AddBooleanFeature(features::kPageLinkDomain +
std::string("host4.com"));
expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
FeatureMap features;
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
}
TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
// For this test, we'll cause the feature extraction to run multiple
// iterations by incrementing the clock.
// This page has a total of 50 elements. For the external forms feature to
// be computed correctly, the extractor has to examine the whole document.
// Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
// there isn't one present, which can be confusing for the element counts.
std::string response = "<html><head></head><body>"
"<form action=\"ondomain\"></form>";
for (int i = 0; i < 45; ++i) {
response.append("<p>");
}
response.append("<form action=\"http://host2.com/\"></form></body></html>");
responses_["http://host.com/"] = response;
// Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
// Note that this assumes kClockCheckGranularity = 10 and
// kMaxTimePerChunkMs = 10.
base::TimeTicks now = base::TimeTicks::Now();
EXPECT_CALL(clock_, Now())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
.WillOnce(Return(now))
// Time check after the first 10 elements.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
// Time check after the next 10 elements. This is over the chunk
// time limit, so a continuation task will be posted.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
// Time check at the start of the second chunk of work.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
// Time check after resuming iteration for the second chunk.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
// Time check after the next 10 elements.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
// Time check after the next 10 elements. This will trigger another
// continuation task.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
// Time check at the start of the third chunk of work.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
// Time check after resuming iteration for the third chunk.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
// Time check after the last 10 elements.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
// A final time check for the histograms.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
FeatureMap features;
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
// Make sure none of the mock expectations carry over to the next test.
::testing::Mock::VerifyAndClearExpectations(&clock_);
// Now repeat the test with the same page, but advance the clock faster so
// that the extraction time exceeds the maximum total time for the feature
// extractor. Extraction should fail. Note that this assumes
// kMaxTotalTimeMs = 500.
EXPECT_CALL(clock_, Now())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
.WillOnce(Return(now))
// Time check after the first 10 elements.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
// Time check at the start of the second chunk of work.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
// Time check after resuming iteration for the second chunk.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
// Time check after the next 10 elements. This is over the limit.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
// A final time check for the histograms.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
features.Clear();
EXPECT_FALSE(ExtractFeatures(&features));
}
TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
// In this test, we'll advance the feature extractor so that it is positioned
// inside an iframe, and have it pause due to exceeding the chunk time limit.
// Then, prior to continuation, the iframe is removed from the document.
// As currently implemented, this should finish extraction from the removed
// iframe document.
responses_["http://host.com/"] =
"<html><head></head><body>"
"<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
"<form></form></body></html>";
responses_["http://host.com/frame.html"] =
"<html><body><p><p><p><input type=password></body></html>";
base::TimeTicks now = base::TimeTicks::Now();
EXPECT_CALL(clock_, Now())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
.WillOnce(Return(now))
// Time check after the first 10 elements. Enough time has passed
// to stop extraction. Schedule the iframe removal to happen as soon as
// the feature extractor returns control to the message loop.
.WillOnce(DoAll(
Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
Return(now + base::TimeDelta::FromMilliseconds(21))))
// Time check at the start of the second chunk of work.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
// Time check after resuming iteration for the second chunk.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
// A final time check for the histograms.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
FeatureMap features;
LoadURL("http://host.com/");
ASSERT_TRUE(ExtractFeatures(&features));
ExpectFeatureMapsAreEqual(features, expected_features);
}
} // namespace safe_browsing