components/safe_browsing/browser/threat_details.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // Implementation of the ThreatDetails class.

 #include "components/safe_browsing/browser/threat_details.h"

 #include <stddef.h>
 #include <stdint.h>
 #include <unordered_set>
 #include <utility>
 #include <vector>

 #include "base/bind.h"
 #include "base/lazy_instance.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/stl_util.h"
 #include "base/strings/string_util.h"
 #include "base/task/post_task.h"
 #include "components/history/core/browser/history_service.h"
 #include "components/safe_browsing/base_ui_manager.h"
 #include "components/safe_browsing/browser/referrer_chain_provider.h"
 #include "components/safe_browsing/browser/threat_details_cache.h"
 #include "components/safe_browsing/browser/threat_details_history.h"
 #include "components/safe_browsing/db/hit_report.h"
 #include "components/safe_browsing/features.h"
 #include "components/safe_browsing/web_ui/safe_browsing_ui.h"
 #include "content/public/browser/browser_task_traits.h"
 #include "content/public/browser/browser_thread.h"
 #include "content/public/browser/navigation_controller.h"
 #include "content/public/browser/navigation_entry.h"
 #include "content/public/browser/render_frame_host.h"
 #include "content/public/browser/render_process_host.h"
 #include "content/public/browser/web_contents.h"
 #include "services/network/public/cpp/shared_url_loader_factory.h"
 #include "services/service_manager/public/cpp/interface_provider.h"

 using content::BrowserThread;
 using content::NavigationEntry;
 using content::RenderFrameHost;
 using content::WebContents;

 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/
 // threat_dom_details.cc
 static const uint32_t kMaxDomNodes = 500;

 namespace safe_browsing {

 // static
 ThreatDetailsFactory* ThreatDetails::factory_ = nullptr;

 namespace {

 // An element ID indicating that an HTML Element has no parent.
 const int kElementIdNoParent = -1;

 // The number of user gestures to trace back for the referrer chain.
 const int kThreatDetailsUserGestureLimit = 2;

 typedef std::unordered_set<std::string> StringSet;
 // A set of HTTPS headers that are allowed to be collected. Contains both
 // request and response headers. All entries in this list should be lower-case
 // to support case-insensitive comparison.
 struct WhitelistedHttpsHeadersTraits
     : base::internal::DestructorAtExitLazyInstanceTraits<StringSet> {
   static StringSet* New(void* instance) {
     StringSet* headers =
         base::internal::DestructorAtExitLazyInstanceTraits<StringSet>::New(
             instance);
     headers->insert({"google-creative-id", "google-lineitem-id", "referer",
                      "content-type", "content-length", "date", "server",
                      "cache-control", "pragma", "expires"});
     return headers;
   }
 };
 base::LazyInstance<StringSet, WhitelistedHttpsHeadersTraits>
     g_https_headers_whitelist = LAZY_INSTANCE_INITIALIZER;

 // Helper function that converts SBThreatType to
 // ClientSafeBrowsingReportRequest::ReportType.
 ClientSafeBrowsingReportRequest::ReportType GetReportTypeFromSBThreatType(
     SBThreatType threat_type) {
   switch (threat_type) {
     case SB_THREAT_TYPE_URL_PHISHING:
       return ClientSafeBrowsingReportRequest::URL_PHISHING;
     case SB_THREAT_TYPE_URL_MALWARE:
       return ClientSafeBrowsingReportRequest::URL_MALWARE;
     case SB_THREAT_TYPE_URL_UNWANTED:
       return ClientSafeBrowsingReportRequest::URL_UNWANTED;
     case SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING:
       return ClientSafeBrowsingReportRequest::URL_CLIENT_SIDE_PHISHING;
     case SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE:
       return ClientSafeBrowsingReportRequest::URL_CLIENT_SIDE_MALWARE;
     case SB_THREAT_TYPE_AD_SAMPLE:
       return ClientSafeBrowsingReportRequest::AD_SAMPLE;
     case SB_THREAT_TYPE_SIGN_IN_PASSWORD_REUSE:
     case SB_THREAT_TYPE_ENTERPRISE_PASSWORD_REUSE:
       return ClientSafeBrowsingReportRequest::URL_PASSWORD_PROTECTION_PHISHING;
     case SB_THREAT_TYPE_SUSPICIOUS_SITE:
       return ClientSafeBrowsingReportRequest::URL_SUSPICIOUS;
     case SB_THREAT_TYPE_BILLING:
       return ClientSafeBrowsingReportRequest::BILLING;
     case SB_THREAT_TYPE_APK_DOWNLOAD:
       return ClientSafeBrowsingReportRequest::APK_DOWNLOAD;
     case SB_THREAT_TYPE_UNUSED:
     case SB_THREAT_TYPE_SAFE:
     case SB_THREAT_TYPE_URL_BINARY_MALWARE:
     case SB_THREAT_TYPE_EXTENSION:
     case SB_THREAT_TYPE_BLACKLISTED_RESOURCE:
     case SB_THREAT_TYPE_API_ABUSE:
     case SB_THREAT_TYPE_SUBRESOURCE_FILTER:
     case SB_THREAT_TYPE_CSD_WHITELIST:
     case DEPRECATED_SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING:
       // Gated by SafeBrowsingBlockingPage::ShouldReportThreatDetails.
       NOTREACHED() << "We should not send report for threat type: "
                    << threat_type;
       return ClientSafeBrowsingReportRequest::UNKNOWN;
   }
 }

 // Clears the specified HTTPS resource of any sensitive data, only retaining
 // data that is whitelisted for collection.
 void ClearHttpsResource(ClientSafeBrowsingReportRequest::Resource* resource) {
   // Make a copy of the original resource to retain all data.
   ClientSafeBrowsingReportRequest::Resource orig_resource(*resource);

   // Clear the request headers and copy over any whitelisted ones.
   resource->clear_request();
   for (int i = 0; i < orig_resource.request().headers_size(); ++i) {
     ClientSafeBrowsingReportRequest::HTTPHeader* orig_header =
         orig_resource.mutable_request()->mutable_headers(i);
     if (g_https_headers_whitelist.Get().count(
             base::ToLowerASCII(orig_header->name())) > 0) {
       resource->mutable_request()->add_headers()->Swap(orig_header);
     }
   }
   // Also copy some other request fields.
   resource->mutable_request()->mutable_bodydigest()->swap(
       *orig_resource.mutable_request()->mutable_bodydigest());
   resource->mutable_request()->set_bodylength(
       orig_resource.request().bodylength());

   // ...repeat for response headers.
   resource->clear_response();
   for (int i = 0; i < orig_resource.response().headers_size(); ++i) {
     ClientSafeBrowsingReportRequest::HTTPHeader* orig_header =
         orig_resource.mutable_response()->mutable_headers(i);
     if (g_https_headers_whitelist.Get().count(
             base::ToLowerASCII(orig_header->name())) > 0) {
       resource->mutable_response()->add_headers()->Swap(orig_header);
     }
   }
   // Also copy some other response fields.
   resource->mutable_response()->mutable_bodydigest()->swap(
       *orig_resource.mutable_response()->mutable_bodydigest());
   resource->mutable_response()->set_bodylength(
       orig_resource.response().bodylength());
   resource->mutable_response()->mutable_remote_ip()->swap(
       *orig_resource.mutable_response()->mutable_remote_ip());
 }

 std::string GetElementKey(const int frame_tree_node_id,
                           const int element_node_id) {
   return base::StringPrintf("%d-%d", frame_tree_node_id, element_node_id);
 }

 using CSBRR = safe_browsing::ClientSafeBrowsingReportRequest;
 CSBRR::SafeBrowsingUrlApiType GetUrlApiTypeForThreatSource(
     safe_browsing::ThreatSource source) {
   switch (source) {
     case safe_browsing::ThreatSource::DATA_SAVER:
       return CSBRR::FLYWHEEL;
     case safe_browsing::ThreatSource::LOCAL_PVER3:
       return CSBRR::PVER3_NATIVE;
     case safe_browsing::ThreatSource::LOCAL_PVER4:
       return CSBRR::PVER4_NATIVE;
     case safe_browsing::ThreatSource::REMOTE:
       return CSBRR::ANDROID_SAFETYNET;
     case safe_browsing::ThreatSource::UNKNOWN:
     case safe_browsing::ThreatSource::CLIENT_SIDE_DETECTION:
     case safe_browsing::ThreatSource::PASSWORD_PROTECTION_SERVICE:
       break;
   }
   return CSBRR::SAFE_BROWSING_URL_API_TYPE_UNSPECIFIED;
 }

 void TrimElements(const std::set<int> target_ids,
                   ElementMap* elements,
                   ResourceMap* resources) {
   if (target_ids.empty()) {
     elements->clear();
     resources->clear();
     return;
   }

   // First, scan over the elements and create a list ordered by element ID as
   // well as a reverse mapping from element ID to its parent ID.
   std::vector<HTMLElement*> elements_by_id(elements->size());

   // The parent vector is initialized with |kElementIdNoParent| so we can
   // identify elements that have no parent.
   std::vector<int> element_id_to_parent_id(elements->size(),
                                            kElementIdNoParent);
   for (const auto& element_pair : *elements) {
     HTMLElement* element = element_pair.second.get();
     elements_by_id[element->id()] = element;

     for (int child_id : element->child_ids()) {
       element_id_to_parent_id[child_id] = element->id();
     }
   }

   // Create a similar map for resources, ordered by resource ID.
   std::vector<std::string> resource_id_to_url(resources->size());
   for (const auto& resource_pair : *resources) {
     const std::string& url = resource_pair.first;
     ClientSafeBrowsingReportRequest::Resource* resource =
         resource_pair.second.get();
     resource_id_to_url[resource->id()] = url;
   }

   // Take a second pass and determine which element IDs to keep. We want to keep
   // the immediate parent, the siblings, and the children of the target ids.
   // By keeping the parent of the target and all of its children, this covers
   // the target's siblings as well.
   std::vector<int> element_ids_to_keep;
   // Resource IDs are also tracked so that we remember which resources are
   // attached to elements that we are keeping. This avoids deleting resources
   // that are shared between kept elements and trimmed elements.
   std::vector<int> kept_resource_ids;
   for (int target_id : target_ids) {
     const int parent_id = element_id_to_parent_id[target_id];
     if (parent_id == kElementIdNoParent) {
       // If one of the target elements has no parent then we skip trimming the
       // report further. Since we collect all siblings of this element, it will
       // effectively span the whole report, so no trimming necessary.
       return;
     }

     // Otherwise, insert the parent ID into the list of ids to keep. This will
     // capture the parent and siblings of the target element, as well as each of
     // their children.
     if (!base::ContainsValue(element_ids_to_keep, parent_id)) {
       element_ids_to_keep.push_back(parent_id);

       // Check if this element has a resource. If so, remember to also keep the
       // resource.
       const HTMLElement& elem = *elements_by_id[parent_id];
       if (elem.has_resource_id()) {
         kept_resource_ids.push_back(elem.resource_id());
       }
     }
   }

   // Walk through |element_ids_to_keep| and append the children of each of
   // element to |element_ids_to_keep|. This is effectively a breadth-first
   // traversal of the tree. The list will stop growing when we reach the leaf
   // nodes that have no more children.
   for (size_t index = 0; index < element_ids_to_keep.size(); ++index) {
     int cur_element_id = element_ids_to_keep[index];
     const HTMLElement& element = *(elements_by_id[cur_element_id]);
     if (element.has_resource_id()) {
       kept_resource_ids.push_back(element.resource_id());
     }
     for (int child_id : element.child_ids()) {
       element_ids_to_keep.push_back(child_id);

       // Check if each child element has a resource. If so, remember to also
       // keep the resource.
       const HTMLElement& child_element = *elements_by_id[child_id];
       if (child_element.has_resource_id()) {
         kept_resource_ids.push_back(child_element.resource_id());
       }
     }
   }
   // Sort the list for easier lookup below.
   std::sort(element_ids_to_keep.begin(), element_ids_to_keep.end());

   // Now we know which elements we want to keep, scan through |elements| and
   // erase anything that we aren't keeping.
   for (auto element_iter = elements->begin();
        element_iter != elements->end();) {
     const HTMLElement& element = *element_iter->second;

     // Delete any elements that we do not want to keep.
     if (!base::ContainsValue(element_ids_to_keep, element.id())) {
       // If this element has a resource then maybe delete the resouce too. Some
       // resources may be shared between kept and trimmed elements, and those
       // ones should not be deleted.
       if (element.has_resource_id() &&
           !base::ContainsValue(kept_resource_ids, element.resource_id())) {
         const std::string& resource_url =
             resource_id_to_url[element.resource_id()];
         resources->erase(resource_url);
       }
       element_iter = elements->erase(element_iter);
     } else {
       ++element_iter;
     }
   }
 }
 }  // namespace

 // The default ThreatDetailsFactory.  Global, made a singleton so we
 // don't leak it.
 class ThreatDetailsFactoryImpl : public ThreatDetailsFactory {
  public:
   std::unique_ptr<ThreatDetails> CreateThreatDetails(
       BaseUIManager* ui_manager,
       WebContents* web_contents,
       const security_interstitials::UnsafeResource& unsafe_resource,
       scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
       history::HistoryService* history_service,
       ReferrerChainProvider* referrer_chain_provider,
       bool trim_to_ad_tags,
       ThreatDetailsDoneCallback done_callback) override {
     // We can't use make_unique due to the protected constructor. We can't
     // directly use std::unique_ptr<ThreatDetails>(new ThreatDetails(...))
     // due to presubmit errors. So we use base::WrapUnique:
     auto threat_details = base::WrapUnique(new ThreatDetails(
         ui_manager, web_contents, unsafe_resource, url_loader_factory,
         history_service, referrer_chain_provider, trim_to_ad_tags,
         done_callback));
     threat_details->StartCollection();
     return threat_details;
   }

  private:
   friend struct base::LazyInstanceTraitsBase<ThreatDetailsFactoryImpl>;

   ThreatDetailsFactoryImpl() {}

   DISALLOW_COPY_AND_ASSIGN(ThreatDetailsFactoryImpl);
 };

 static base::LazyInstance<ThreatDetailsFactoryImpl>::DestructorAtExit
     g_threat_details_factory_impl = LAZY_INSTANCE_INITIALIZER;

 // Create a ThreatDetails for the given tab.
 /* static */
 std::unique_ptr<ThreatDetails> ThreatDetails::NewThreatDetails(
     BaseUIManager* ui_manager,
     WebContents* web_contents,
     const UnsafeResource& resource,
     scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
     history::HistoryService* history_service,
     ReferrerChainProvider* referrer_chain_provider,
     bool trim_to_ad_tags,
     ThreatDetailsDoneCallback done_callback) {
   // Set up the factory if this has not been done already (tests do that
   // before this method is called).
   if (!factory_)
     factory_ = g_threat_details_factory_impl.Pointer();
   return factory_->CreateThreatDetails(
       ui_manager, web_contents, resource, url_loader_factory, history_service,
       referrer_chain_provider, trim_to_ad_tags, done_callback);
 }

 // Create a ThreatDetails for the given tab. Runs in the UI thread.
 ThreatDetails::ThreatDetails(
     BaseUIManager* ui_manager,
     content::WebContents* web_contents,
     const UnsafeResource& resource,
     scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
     history::HistoryService* history_service,
     ReferrerChainProvider* referrer_chain_provider,
     bool trim_to_ad_tags,
     ThreatDetailsDoneCallback done_callback)
     : content::WebContentsObserver(web_contents),
       url_loader_factory_(url_loader_factory),
       ui_manager_(ui_manager),
       resource_(resource),
       referrer_chain_provider_(referrer_chain_provider),
       cache_result_(false),
       did_proceed_(false),
       num_visits_(0),
       ambiguous_dom_(false),
       trim_to_ad_tags_(trim_to_ad_tags),
       cache_collector_(new ThreatDetailsCacheCollector),
       done_callback_(done_callback),
       all_done_expected_(false),
       is_all_done_(false),
       weak_factory_(this) {
   redirects_collector_ = new ThreatDetailsRedirectsCollector(
       history_service ? history_service->AsWeakPtr()
                       : base::WeakPtr<history::HistoryService>());
 }

 // TODO(lpz): Consider making this constructor delegate to the parameterized one
 // above.
 ThreatDetails::ThreatDetails()
     : cache_result_(false),
       did_proceed_(false),
       num_visits_(0),
       ambiguous_dom_(false),
       trim_to_ad_tags_(false),
       all_done_expected_(false),
       is_all_done_(false),
       weak_factory_(this) {}

 ThreatDetails::~ThreatDetails() {
   DCHECK(all_done_expected_ == is_all_done_);
 }

 bool ThreatDetails::IsReportableUrl(const GURL& url) const {
   // TODO(panayiotis): also skip internal urls.
   return url.SchemeIs("http") || url.SchemeIs("https");
 }

 // Looks for a Resource for the given url in resources_.  If found, it
 // updates |resource|. Otherwise, it creates a new message, adds it to
 // resources_ and updates |resource| to point to it.
 //
 ClientSafeBrowsingReportRequest::Resource* ThreatDetails::FindOrCreateResource(
     const GURL& url) {
   auto& resource = resources_[url.spec()];
   if (!resource) {
     // Create the resource for |url|.
     int id = resources_.size() - 1;
     std::unique_ptr<ClientSafeBrowsingReportRequest::Resource> new_resource(
         new ClientSafeBrowsingReportRequest::Resource());
     new_resource->set_url(url.spec());
     new_resource->set_id(id);
     resource = std::move(new_resource);
   }
   return resource.get();
 }

 HTMLElement* ThreatDetails::FindOrCreateElement(
     const std::string& element_key) {
   auto& element = elements_[element_key];
   if (!element) {
     // Create an entry for this element.
     int element_dom_id = elements_.size() - 1;
     std::unique_ptr<HTMLElement> new_element(new HTMLElement());
     new_element->set_id(element_dom_id);
     element = std::move(new_element);
   }
   return element.get();
 }

 ClientSafeBrowsingReportRequest::Resource* ThreatDetails::AddUrl(
     const GURL& url,
     const GURL& parent,
     const std::string& tagname,
     const std::vector<GURL>* children) {
   if (!url.is_valid() || !IsReportableUrl(url))
     return nullptr;

   // Find (or create) the resource for the url.
   ClientSafeBrowsingReportRequest::Resource* url_resource =
       FindOrCreateResource(url);
   if (!tagname.empty())
     url_resource->set_tag_name(tagname);
   if (!parent.is_empty() && IsReportableUrl(parent)) {
     // Add the resource for the parent.
     ClientSafeBrowsingReportRequest::Resource* parent_resource =
         FindOrCreateResource(parent);
     // Update the parent-child relation
     url_resource->set_parent_id(parent_resource->id());
   }
   if (children) {
     for (auto it = children->begin(); it != children->end(); ++it) {
       // TODO(lpz): Should this first check if the child URL is reportable
       // before creating the resource?
       ClientSafeBrowsingReportRequest::Resource* child_resource =
           FindOrCreateResource(*it);
       bool duplicate_child = false;
       for (auto child_id : url_resource->child_ids()) {
         if (child_id == child_resource->id()) {
           duplicate_child = true;
           break;
         }
       }
       if (!duplicate_child)
         url_resource->add_child_ids(child_resource->id());
     }
   }

   return url_resource;
 }

 void ThreatDetails::AddDomElement(
     const int frame_tree_node_id,
     const int element_node_id,
     const std::string& tagname,
     const int parent_element_node_id,
     const std::vector<mojom::AttributeNameValuePtr> attributes,
     const ClientSafeBrowsingReportRequest::Resource* resource) {
   // Create the element. It should not exist already since this function should
   // only be called once for each element.
   const std::string element_key =
       GetElementKey(frame_tree_node_id, element_node_id);
   HTMLElement* cur_element = FindOrCreateElement(element_key);

   // Set some basic metadata about the element.
   const std::string tag_name_upper = base::ToUpperASCII(tagname);
   if (!tag_name_upper.empty()) {
     cur_element->set_tag(tag_name_upper);
   }
   for (const mojom::AttributeNameValuePtr& attribute : attributes) {
     HTMLElement::Attribute* attribute_pb = cur_element->add_attribute();
     attribute_pb->set_name(std::move(attribute->name));
     attribute_pb->set_value(std::move(attribute->value));

     // Remember which the IDs of elements that represent ads so we can trim the
     // report down to just those parts later.
     if (trim_to_ad_tags_ && attribute_pb->name() == "data-google-query-id") {
       trimmed_dom_element_ids_.insert(cur_element->id());
     }
   }

   if (resource) {
     cur_element->set_resource_id(resource->id());
   }

   // Next we try to lookup the parent of the current element and add ourselves
   // as a child of it.
   HTMLElement* parent_element = nullptr;
   if (parent_element_node_id == 0) {
     // No parent indicates that this element is at the top of the current frame.
     // Remember that this is a top-level element of the frame with the
     // current |frame_tree_node_id|. If this element is inside an iframe, a
     // second pass will insert this element as a child of its parent iframe.
     frame_tree_id_to_children_map_[frame_tree_node_id].insert(
         cur_element->id());
   } else {
     // We have a parent ID, so this element is just a child of something inside
     // of our current frame. We can easily lookup our parent.
     const std::string& parent_key =
         GetElementKey(frame_tree_node_id, parent_element_node_id);
     if (base::ContainsKey(elements_, parent_key)) {
       parent_element = elements_[parent_key].get();
     }
   }

   // If a parent element was found, add ourselves as a child, ensuring not to
   // duplicate child IDs.
   if (parent_element) {
     bool duplicate_child = false;
     for (const int child_id : parent_element->child_ids()) {
       if (child_id == cur_element->id()) {
         duplicate_child = true;
         break;
       }
     }
     if (!duplicate_child) {
       parent_element->add_child_ids(cur_element->id());
     }
   }
 }

 void ThreatDetails::StartCollection() {
   DVLOG(1) << "Starting to compute threat details.";
   report_.reset(new ClientSafeBrowsingReportRequest());

   if (IsReportableUrl(resource_.url)) {
     report_->set_url(resource_.url.spec());
     report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type));
   }

   GURL referrer_url;
   NavigationEntry* nav_entry = resource_.GetNavigationEntryForResource();
   if (nav_entry) {
     GURL page_url = nav_entry->GetURL();
     if (IsReportableUrl(page_url))
       report_->set_page_url(page_url.spec());

     referrer_url = nav_entry->GetReferrer().url;
     if (IsReportableUrl(referrer_url))
       report_->set_referrer_url(referrer_url.spec());

     // Add the nodes, starting from the page url.
     AddUrl(page_url, GURL(), std::string(), nullptr);
   }

   // Add the resource_url and its original url, if non-empty and different.
   if (!resource_.original_url.is_empty() &&
       resource_.url != resource_.original_url) {
     // Add original_url, as the parent of resource_url.
     AddUrl(resource_.original_url, GURL(), std::string(), nullptr);
     AddUrl(resource_.url, resource_.original_url, std::string(), nullptr);
   } else {
     AddUrl(resource_.url, GURL(), std::string(), nullptr);
   }

   // Add the redirect urls, if non-empty. The redirect urls do not include the
   // original url, but include the unsafe url which is the last one of the
   // redirect urls chain
   GURL parent_url;
   // Set the original url as the parent of the first redirect url if it's not
   // empty.
   if (!resource_.original_url.is_empty())
     parent_url = resource_.original_url;

   // Set the previous redirect url as the parent of the next one
   for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) {
     AddUrl(resource_.redirect_urls[i], parent_url, std::string(), nullptr);
     parent_url = resource_.redirect_urls[i];
   }

   // Add the referrer url.
   if (!referrer_url.is_empty())
     AddUrl(referrer_url, GURL(), std::string(), nullptr);

   if (!resource_.IsMainPageLoadBlocked()) {
     // Get URLs of frames, scripts etc from the DOM.
     // OnReceivedThreatDOMDetails will be called when the renderer replies.
     // TODO(mattm): In theory, if the user proceeds through the warning DOM
     // detail collection could be started once the page loads.
     web_contents()->ForEachFrame(base::BindRepeating(
         &ThreatDetails::RequestThreatDOMDetails, GetWeakPtr()));
   }
 }

 void ThreatDetails::RequestThreatDOMDetails(content::RenderFrameHost* frame) {
   safe_browsing::mojom::ThreatReporterPtr threat_reporter;
   frame->GetRemoteInterfaces()->GetInterface(&threat_reporter);
   safe_browsing::mojom::ThreatReporter* raw_threat_report =
       threat_reporter.get();
   pending_render_frame_hosts_.push_back(frame);
   raw_threat_report->GetThreatDOMDetails(
       base::BindOnce(&ThreatDetails::OnReceivedThreatDOMDetails, GetWeakPtr(),
                      std::move(threat_reporter), frame));
 }

 // When the renderer is done, this is called.
 void ThreatDetails::OnReceivedThreatDOMDetails(
     mojom::ThreatReporterPtr threat_reporter,
     content::RenderFrameHost* sender,
     std::vector<mojom::ThreatDOMDetailsNodePtr> params) {
   // If the RenderFrameHost was closed between sending the IPC and this callback
   // running, |sender| will be invalid.
   const auto sender_it = std::find(pending_render_frame_hosts_.begin(),
                                    pending_render_frame_hosts_.end(), sender);
   if (sender_it == pending_render_frame_hosts_.end()) {
     return;
   }

   pending_render_frame_hosts_.erase(sender_it);

   // Lookup the FrameTreeNode ID of any child frames in the list of DOM nodes.
   const int sender_process_id = sender->GetProcess()->GetID();
   const int sender_frame_tree_node_id = sender->GetFrameTreeNodeId();
   KeyToFrameTreeIdMap child_frame_tree_map;
   for (const mojom::ThreatDOMDetailsNodePtr& node : params) {
     if (node->child_frame_routing_id == 0)
       continue;

     const std::string cur_element_key =
         GetElementKey(sender_frame_tree_node_id, node->node_id);
     int child_frame_tree_node_id =
         content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId(
             sender_process_id, node->child_frame_routing_id);
     if (child_frame_tree_node_id ==
         content::RenderFrameHost::kNoFrameTreeNodeId) {
       ambiguous_dom_ = true;
     } else {
       child_frame_tree_map[cur_element_key] = child_frame_tree_node_id;
     }
   }

   AddDOMDetails(sender_frame_tree_node_id, std::move(params),
                 child_frame_tree_map);
 }

 void ThreatDetails::AddDOMDetails(
     const int frame_tree_node_id,
     std::vector<mojom::ThreatDOMDetailsNodePtr> params,
     const KeyToFrameTreeIdMap& child_frame_tree_map) {
   DCHECK_CURRENTLY_ON(BrowserThread::UI);
   DVLOG(1) << "Nodes from the DOM: " << params.size();

   // If we have already started getting redirects from history service,
   // don't modify state, otherwise will invalidate the iterators.
   if (redirects_collector_->HasStarted())
     return;

   // If we have already started collecting data from the HTTP cache, don't
   // modify our state.
   if (cache_collector_->HasStarted())
     return;

   // Exit early if there are no nodes to process.
   if (params.empty())
     return;

   // Copy FrameTreeNode IDs for the child frame into the combined mapping.
   iframe_key_to_frame_tree_id_map_.insert(child_frame_tree_map.begin(),
                                           child_frame_tree_map.end());

   // Add the urls from the DOM to |resources_|. The renderer could be sending
   // bogus messages, so limit the number of nodes we accept.
   // Also update |elements_| with the DOM structure.
   for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
     mojom::ThreatDOMDetailsNode& node = *params[i];
     DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
     ClientSafeBrowsingReportRequest::Resource* resource = nullptr;
     if (!node.url.is_empty()) {
       resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children));
     }
     // Check for a tag_name to avoid adding the summary node to the DOM.
     if (!node.tag_name.empty()) {
       AddDomElement(frame_tree_node_id, node.node_id, node.tag_name,
                     node.parent_node_id, std::move(node.attributes), resource);
     }
   }
 }

 // Called from the SB Service on the IO thread, after the user has
 // closed the tab, or clicked proceed or goback.  Since the user needs
 // to take an action, we expect this to be called after
 // OnReceivedThreatDOMDetails in most cases. If not, we don't include
 // the DOM data in our report.
 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) {
   DCHECK_CURRENTLY_ON(BrowserThread::UI);

   all_done_expected_ = true;

   // Do a second pass over the elements and update iframe elements to have
   // references to their children. Children may have been received from a
   // different renderer than the iframe element.
   for (auto& element_pair : elements_) {
     const std::string& element_key = element_pair.first;
     HTMLElement* element = element_pair.second.get();
     if (base::ContainsKey(iframe_key_to_frame_tree_id_map_, element_key)) {
       int frame_tree_id_of_iframe_renderer =
           iframe_key_to_frame_tree_id_map_[element_key];
       const std::unordered_set<int>& child_ids =
           frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer];
       for (const int child_id : child_ids) {
         element->add_child_ids(child_id);
       }
     }
   }

   did_proceed_ = did_proceed;
   num_visits_ = num_visit;
   std::vector<GURL> urls;
   for (ResourceMap::const_iterator it = resources_.begin();
        it != resources_.end(); ++it) {
     urls.push_back(GURL(it->first));
   }
   redirects_collector_->StartHistoryCollection(
       urls,
       base::Bind(&ThreatDetails::OnRedirectionCollectionReady, GetWeakPtr()));
 }

 void ThreatDetails::OnRedirectionCollectionReady() {
   DCHECK_CURRENTLY_ON(BrowserThread::UI);
   const std::vector<RedirectChain>& redirects =
       redirects_collector_->GetCollectedUrls();

   for (size_t i = 0; i < redirects.size(); ++i)
     AddRedirectUrlList(redirects[i]);

   // Call the cache collector
   cache_collector_->StartCacheCollection(
       url_loader_factory_, &resources_, &cache_result_,
       base::Bind(&ThreatDetails::OnCacheCollectionReady, GetWeakPtr()));
 }

 void ThreatDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
   DCHECK_CURRENTLY_ON(BrowserThread::UI);
   for (size_t i = 0; i < urls.size() - 1; ++i) {
     AddUrl(urls[i], urls[i + 1], std::string(), nullptr);
   }
 }

 void ThreatDetails::OnCacheCollectionReady() {
   DVLOG(1) << "OnCacheCollectionReady.";

   // All URLs have been collected, trim the report if necessary.
   if (trim_to_ad_tags_) {
     TrimElements(trimmed_dom_element_ids_, &elements_, &resources_);
     // If trimming the report removed all the elements then don't bother
     // sending it.
     if (elements_.empty()) {
       AllDone();
       return;
     }
   }

   // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
   for (auto& resource_pair : resources_) {
     ClientSafeBrowsingReportRequest::Resource* pb_resource =
         report_->add_resources();
     pb_resource->Swap(resource_pair.second.get());
     const GURL url(pb_resource->url());
     if (url.SchemeIs("https")) {
       // Sanitize the HTTPS resource by clearing out private data (like cookie
       // headers).
       DVLOG(1) << "Clearing out HTTPS resource: " << pb_resource->url();
       ClearHttpsResource(pb_resource);
       // Keep id, parent_id, child_ids, and tag_name.
     }
   }
   for (auto& element_pair : elements_) {
     report_->add_dom()->Swap(element_pair.second.get());
   }
   if (!elements_.empty()) {
     // TODO(lpz): Consider including the ambiguous_dom_ bit in the report
     // itself.
     UMA_HISTOGRAM_BOOLEAN("SafeBrowsing.ThreatReport.DomIsAmbiguous",
                           ambiguous_dom_);
   }

   report_->set_did_proceed(did_proceed_);
   // Only sets repeat_visit if num_visits_ >= 0.
   if (num_visits_ >= 0) {
     report_->set_repeat_visit(num_visits_ > 0);
   }
   report_->set_complete(cache_result_);

   report_->mutable_client_properties()->set_url_api_type(
       GetUrlApiTypeForThreatSource(resource_.threat_source));

   // Fill the referrer chain if applicable.
   MaybeFillReferrerChain();

   // Send the report, using the SafeBrowsingService.
   std::string serialized;
   if (!report_->SerializeToString(&serialized)) {
     DLOG(ERROR) << "Unable to serialize the threat report.";
     AllDone();
     return;
   }

   base::PostTaskWithTraits(
       FROM_HERE, {content::BrowserThread::UI},
       base::BindOnce(&WebUIInfoSingleton::AddToCSBRRsSent,
                      base::Unretained(WebUIInfoSingleton::GetInstance()),
                      std::move(report_)));

   ui_manager_->SendSerializedThreatDetails(serialized);

   AllDone();
 }

 void ThreatDetails::MaybeFillReferrerChain() {
   if (!referrer_chain_provider_)
     return;

   if (!report_ ||
       (report_->type() != ClientSafeBrowsingReportRequest::URL_SUSPICIOUS &&
        report_->type() != ClientSafeBrowsingReportRequest::APK_DOWNLOAD)) {
     return;
   }

   referrer_chain_provider_->IdentifyReferrerChainByWebContents(
       web_contents(), kThreatDetailsUserGestureLimit,
       report_->mutable_referrer_chain());
 }

 void ThreatDetails::AllDone() {
   is_all_done_ = true;
   base::PostTaskWithTraits(
       FROM_HERE, {content::BrowserThread::UI},
       base::BindOnce(done_callback_, base::Unretained(web_contents())));
 }

 void ThreatDetails::FrameDeleted(RenderFrameHost* render_frame_host) {
   auto render_frame_host_it =
       std::find(pending_render_frame_hosts_.begin(),
                 pending_render_frame_hosts_.end(), render_frame_host);
   if (render_frame_host_it != pending_render_frame_hosts_.end()) {
     pending_render_frame_hosts_.erase(render_frame_host_it);
   }
 }

 void ThreatDetails::RenderFrameHostChanged(RenderFrameHost* old_host,
                                            RenderFrameHost* new_host) {
   FrameDeleted(old_host);
 }

 base::WeakPtr<ThreatDetails> ThreatDetails::GetWeakPtr() {
   return weak_factory_.GetWeakPtr();
 }

 }  // namespace safe_browsing