blob: 671e2bbdc91523cdfa6e7e9902204fe0e3042cfc [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/renderer/safe_browsing/threat_dom_details.h"
#include <algorithm>
#include <map>
#include <unordered_set>
#include "base/compiler_specific.h"
#include "base/metrics/field_trial_params.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/stringprintf.h"
#include "components/safe_browsing/common/safebrowsing_messages.h"
#include "content/public/renderer/render_frame.h"
#include "third_party/WebKit/public/platform/WebString.h"
#include "third_party/WebKit/public/web/WebDocument.h"
#include "third_party/WebKit/public/web/WebElement.h"
#include "third_party/WebKit/public/web/WebElementCollection.h"
#include "third_party/WebKit/public/web/WebFrame.h"
#include "third_party/WebKit/public/web/WebLocalFrame.h"
namespace safe_browsing {
// A map for keeping track of the identity of DOM Elements, used to generate
// unique IDs for each element and lookup elements IDs by parent Element, to
// maintain proper parent/child relationships.
// They key is a WebNode from the DOM, which is basically a pointer so can be
// copied into the map when inserting new elements.
// The values are indices into the resource vector, and are used to retrieve IPC
// messages generated by ThreatDOMDetails.
using ElementToNodeMap = std::map<blink::WebNode, int>;
// This Feature specifies which non-resource HTML Elements to collect based on
// their tag and attributes. It's a single param containing a comma-separated
// list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will
// collect elements with tag "tag1" that have attribute "id" or "height" set,
// and elements of tag "tag2" if they have attribute "foo" set. All tag names
// and attributes should be lower case.
const base::Feature kThreatDomDetailsTagAndAttributeFeature{
"ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT};
// The name of the param containing the tags and attributes list.
const char kTagAndAttributeParamName[] = "tag_attribute_csv";
namespace {
// Predicate used to search |tag_and_attributes_list_| by tag_name.
class TagNameIs {
public:
explicit TagNameIs(const std::string& tag) : tag_(tag) {}
bool operator()(const TagAndAttributesItem& tag_and_attribute) {
return tag_ == tag_and_attribute.tag_name;
}
private:
std::string tag_;
};
void ParseTagAndAttributeParams(
std::vector<TagAndAttributesItem>* tag_and_attributes_list) {
DCHECK(tag_and_attributes_list);
if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) {
return;
}
tag_and_attributes_list->clear();
const std::string& tag_attribute_csv_param =
base::GetFieldTrialParamValueByFeature(
kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName);
if (tag_attribute_csv_param.empty()) {
return;
}
std::vector<std::string> split =
base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
// If we don't have the right number of pairs in the csv then don't bother
// parsing further.
if (split.size() % 2 != 0) {
return;
}
for (size_t i = 0; i < split.size(); i += 2) {
const std::string& tag_name = split[i];
const std::string& attribute = split[i + 1];
auto item_iter =
std::find_if(tag_and_attributes_list->begin(),
tag_and_attributes_list->end(), TagNameIs(tag_name));
if (item_iter == tag_and_attributes_list->end()) {
TagAndAttributesItem item;
item.tag_name = tag_name;
item.attributes.push_back(attribute);
tag_and_attributes_list->push_back(item);
} else {
item_iter->attributes.push_back(attribute);
}
}
std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(),
[](const TagAndAttributesItem& a, const TagAndAttributesItem& b) {
return a.tag_name < b.tag_name;
});
}
SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement(
const blink::WebNode& element,
const safe_browsing::ElementToNodeMap& element_to_node_map,
std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
DCHECK(element_to_node_map.count(element) > 0);
int resource_index = element_to_node_map.at(element);
return &(resources->at(resource_index));
}
// Handler for the various HTML elements that we extract URLs from.
void HandleElement(
const blink::WebElement& element,
SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node,
std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources,
safe_browsing::ElementToNodeMap* element_to_node_map) {
// Retrieve the link and resolve the link in case it's relative.
blink::WebURL full_url =
element.document().completeURL(element.getAttribute("src"));
const GURL& child_url = GURL(full_url);
if (!child_url.is_empty() && child_url.is_valid()) {
summary_node->children.push_back(child_url);
}
SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node;
child_node.url = child_url;
child_node.tag_name = element.tagName().utf8();
child_node.parent = summary_node->url;
// Update the ID mapping. First generate the ID for the current node.
// Then, if its parent is available, set the current node's parent ID, and
// also update the parent's children with the current node's ID.
const int child_id = element_to_node_map->size() + 1;
child_node.node_id = child_id;
blink::WebNode cur_parent_element = element.parentNode();
while (!cur_parent_element.isNull()) {
if (element_to_node_map->count(cur_parent_element) > 0) {
SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node =
GetNodeForElement(cur_parent_element, *element_to_node_map,
resources);
child_node.parent_node_id = parent_node->node_id;
parent_node->child_node_ids.push_back(child_id);
// TODO(lpz): Consider also updating the URL-level parent/child mapping
// here. Eg: child_node.parent=parent_node.url, and
// parent_node.children.push_back(child_url).
break;
} else {
// It's possible that the direct parent of this node wasn't handled, so it
// isn't represented in |element_to_node_map|. Try walking up the
// hierarchy to see if a parent further up was handled.
cur_parent_element = cur_parent_element.parentNode();
}
}
// Add the child node to the list of resources.
resources->push_back(child_node);
// .. and remember which index it was inserted at so we can look it up later.
(*element_to_node_map)[element] = resources->size() - 1;
}
bool ShouldHandleElement(
const blink::WebElement& element,
const std::vector<TagAndAttributesItem>& tag_and_attributes_list) {
// Resources with a SRC are always handled.
if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") ||
element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) &&
element.hasAttribute("src")) {
return true;
}
std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii());
const auto& tag_attribute_iter =
std::find_if(tag_and_attributes_list.begin(),
tag_and_attributes_list.end(), TagNameIs(tag_name_lower));
if (tag_attribute_iter == tag_and_attributes_list.end()) {
return false;
}
const std::vector<std::string>& valid_attributes =
tag_attribute_iter->attributes;
for (const std::string& attribute : valid_attributes) {
if (element.hasAttribute(blink::WebString::fromASCII(attribute))) {
return true;
}
}
return false;
}
} // namespace
TagAndAttributesItem::TagAndAttributesItem() {}
TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item)
: tag_name(item.tag_name), attributes(item.attributes) {}
TagAndAttributesItem::~TagAndAttributesItem() {}
// An upper limit on the number of nodes we collect.
uint32_t ThreatDOMDetails::kMaxNodes = 500;
// static
ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) {
// Private constructor and public static Create() method to facilitate
// stubbing out this class for binary-size reduction purposes.
return new ThreatDOMDetails(render_frame);
}
ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame)
: content::RenderFrameObserver(render_frame) {
ParseTagAndAttributeParams(&tag_and_attributes_list_);
}
ThreatDOMDetails::~ThreatDOMDetails() {}
bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) {
bool handled = true;
IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message)
IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails,
OnGetThreatDOMDetails)
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
}
void ThreatDOMDetails::OnGetThreatDOMDetails() {
std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node> resources;
ExtractResources(&resources);
// Notify the browser.
Send(new SafeBrowsingHostMsg_ThreatDOMDetails(routing_id(), resources));
}
void ThreatDOMDetails::ExtractResources(
std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
blink::WebFrame* frame = render_frame()->GetWebFrame();
if (!frame)
return;
SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node;
blink::WebDocument document = frame->document();
details_node.url = GURL(document.url());
if (document.isNull()) {
// Nothing in this frame. Just report its URL.
resources->push_back(details_node);
return;
}
ElementToNodeMap element_to_node_map;
blink::WebElementCollection elements = document.all();
blink::WebElement element = elements.firstItem();
for (; !element.isNull(); element = elements.nextItem()) {
if (ShouldHandleElement(element, tag_and_attributes_list_)) {
HandleElement(element, &details_node, resources, &element_to_node_map);
if (resources->size() >= kMaxNodes) {
// We have reached kMaxNodes, exit early.
resources->push_back(details_node);
return;
}
}
}
resources->push_back(details_node);
}
void ThreatDOMDetails::OnDestruct() {
delete this;
}
} // namespace safe_browsing