Save-Page-As-Complete-HTML: Even better handling of <object> elements.

crrev.com/1416113012 improved handling of <object> elements by
save-page-as-complete-html, but always treated them as resources.
The current CL tries to report them as subframes if they contain
a html document.

This CL also adjusts WebFrame::fromFrameOwnerElement and
WebLocalFrameImpl::fromFrameOwnerElement so that they truly work with
instances of HTMLFrameOwnerElement (as the name of these methods
suggests), rather than only with instances of HTMLFrameElementBase (a
subclass of HTMLFrameOwnerElement that only covers <frame> and <iframe>
elements and not <object> element or other frame-owning elements).

FWIW, I tracked down the comment in Web(Local)FrameImpl.cpp that questions
why we only check for <iframe> and <frame> to the following commit:
abarth@webkit.org fd5b347 2012-09-28 18:31:12 :
    // FIXME: Why do we check specifically for <iframe> and <frame> here?
    // Why can't we get the WebFrameImpl from an <object> element,
    // for example.

BUG=553478

Review URL: https://codereview.chromium.org/1442463002

Cr-Commit-Position: refs/heads/master@{#362165}
diff --git a/chrome/browser/download/save_page_browsertest.cc b/chrome/browser/download/save_page_browsertest.cc
index 993813c..85147fc 100644
--- a/chrome/browser/download/save_page_browsertest.cc
+++ b/chrome/browser/download/save_page_browsertest.cc
@@ -1010,12 +1010,14 @@
 
   // 4 = main frame + iframe + object w/ html doc + object w/ pdf doc
   // (svg and png objects do not get a separate frame)
-  int expected_number_of_frames = 4;
+  int expected_number_of_frames = 6;
 
   std::vector<std::string> expected_substrings{
       "frames-objects.htm: 8da13db4-a512-4d9b-b1c5-dc1c134234b9",
       "a.htm: 1b8aae2b-e164-462f-bd5b-98aa366205f2",
       "b.htm: 3a35f7fa-96a9-4487-9f18-4470263907fa",
+      "frames-nested.htm: 4388232f-8d45-4d2e-9807-721b381be153",
+      "frames-nested2.htm: 6d23dc47-f283-4977-96ec-66bcf72301a4",
   };
 
   GURL url(
diff --git a/chrome/test/data/save_page/frames-objects.htm b/chrome/test/data/save_page/frames-objects.htm
index 0f521f2..974bf8e 100644
--- a/chrome/test/data/save_page/frames-objects.htm
+++ b/chrome/test/data/save_page/frames-objects.htm
@@ -9,7 +9,7 @@
 
     Html object:
     <br>
-    <object data="b.htm" border="1">err_no_object_data</object>
+    <object data="frames-nested.htm" border="1">err_no_object_data</object>
     <br>
     <br>
 
diff --git a/content/renderer/savable_resources.cc b/content/renderer/savable_resources.cc
index 4c62c7cd..eb6c9da 100644
--- a/content/renderer/savable_resources.cc
+++ b/content/renderer/savable_resources.cc
@@ -34,41 +34,63 @@
 namespace content {
 namespace {
 
-// Get all savable resource links from current element. One element might
-// have more than one resource link. It is possible to have some links
-// in one CSS stylesheet.
+// Returns |true| if |web_frame| contains (or should be assumed to contain)
+// a html document.
+bool DoesFrameContainHtmlDocument(const WebFrame& web_frame,
+                                  const WebElement& element) {
+  if (web_frame.isWebLocalFrame()) {
+    WebDocument doc = web_frame.document();
+    return doc.isHTMLDocument() || doc.isXHTMLDocument();
+  }
+
+  // Cannot inspect contents of a remote frame, so we use a heuristic:
+  // Assume that <iframe> and <frame> elements contain a html document,
+  // and other elements (i.e. <object>) contain plugins or other resources.
+  // If the heuristic is wrong (i.e. the remote frame in <object> does
+  // contain an html document), then things will still work, but with the
+  // following caveats: 1) original frame content will be saved and 2) links
+  // in frame's html doc will not be rewritten to point to locally saved
+  // files.
+  return element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame");
+}
+
+// If present and valid, then push the link associated with |element|
+// into either SavableResourcesResult::subframes or
+// SavableResourcesResult::resources_list.
 void GetSavableResourceLinkForElement(
     const WebElement& element,
     const WebDocument& current_doc,
     SavableResourcesResult* result) {
-  if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) {
-    GURL complete_url = current_doc.completeURL(element.getAttribute("src"));
-    WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
-
-    SavableSubframe subframe;
-    subframe.original_url = complete_url;
-    subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
-
-    result->subframes->push_back(subframe);
-    return;
-  }
-
   // Check whether the node has sub resource URL or not.
   WebString value = GetSubResourceLinkFromElement(element);
   if (value.isNull())
     return;
+
   // Get absolute URL.
-  GURL u = current_doc.completeURL(value);
-  // ignore invalid URL
-  if (!u.is_valid())
+  GURL element_url = current_doc.completeURL(value);
+
+  // See whether to report this element as a subframe.
+  WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
+  if (web_frame && DoesFrameContainHtmlDocument(*web_frame, element)) {
+    SavableSubframe subframe;
+    subframe.original_url = element_url;
+    subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
+    result->subframes->push_back(subframe);
     return;
+  }
+
+  // Ignore invalid URL.
+  if (!element_url.is_valid())
+    return;
+
   // Ignore those URLs which are not standard protocols. Because FTP
   // protocol does no have cache mechanism, we will skip all
   // sub-resources if they use FTP protocol.
-  if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme))
+  if (!element_url.SchemeIsHTTPOrHTTPS() &&
+      !element_url.SchemeIs(url::kFileScheme))
     return;
 
-  result->resources_list->push_back(u);
+  result->resources_list->push_back(element_url);
 }
 
 }  // namespace
@@ -112,6 +134,8 @@
 WebString GetSubResourceLinkFromElement(const WebElement& element) {
   const char* attribute_name = NULL;
   if (element.hasHTMLTagName("img") ||
+      element.hasHTMLTagName("frame") ||
+      element.hasHTMLTagName("iframe") ||
       element.hasHTMLTagName("script")) {
     attribute_name = "src";
   } else if (element.hasHTMLTagName("input")) {
@@ -130,10 +154,6 @@
              element.hasHTMLTagName("ins")) {
     attribute_name = "cite";
   } else if (element.hasHTMLTagName("object")) {
-    // TODO(lukasza): When <object> contains a html document, it should be
-    // reported as a subframe, not as a savable resource (reporting as a
-    // savable resource works, but will save original html contents, not
-    // current html contents of the frame).
     attribute_name = "data";
   } else if (element.hasHTMLTagName("link")) {
     // If the link element is not linked to css, ignore it.
diff --git a/third_party/WebKit/Source/web/WebFrame.cpp b/third_party/WebKit/Source/web/WebFrame.cpp
index 52f55905..727010d 100644
--- a/third_party/WebKit/Source/web/WebFrame.cpp
+++ b/third_party/WebKit/Source/web/WebFrame.cpp
@@ -266,9 +266,9 @@
 {
     Element* element = PassRefPtrWillBeRawPtr<Element>(webElement).get();
 
-    if (!isHTMLFrameElementBase(element))
+    if (!element->isFrameOwnerElement())
         return nullptr;
-    return fromFrame(toHTMLFrameElementBase(element)->contentFrame());
+    return fromFrame(toHTMLFrameOwnerElement(element)->contentFrame());
 }
 
 bool WebFrame::isLoading() const
diff --git a/third_party/WebKit/Source/web/WebLocalFrameImpl.cpp b/third_party/WebKit/Source/web/WebLocalFrameImpl.cpp
index a2c0cb6..8673e8b 100644
--- a/third_party/WebKit/Source/web/WebLocalFrameImpl.cpp
+++ b/third_party/WebKit/Source/web/WebLocalFrameImpl.cpp
@@ -1858,7 +1858,7 @@
 WebLocalFrameImpl* WebLocalFrameImpl::fromFrame(LocalFrame* frame)
 {
     if (!frame)
-        return 0;
+        return nullptr;
     return fromFrame(*frame);
 }
 
@@ -1866,22 +1866,21 @@
 {
     FrameLoaderClient* client = frame.loader().client();
     if (!client || !client->isFrameLoaderClientImpl())
-        return 0;
+        return nullptr;
     return toFrameLoaderClientImpl(client)->webFrame();
 }
 
 WebLocalFrameImpl* WebLocalFrameImpl::fromFrameOwnerElement(Element* element)
 {
-    // FIXME: Why do we check specifically for <iframe> and <frame> here? Why can't we get the WebLocalFrameImpl from an <object> element, for example.
-    if (!isHTMLFrameElementBase(element))
-        return 0;
-    return fromFrame(toLocalFrame(toHTMLFrameElementBase(element)->contentFrame()));
+    if (!element->isFrameOwnerElement())
+        return nullptr;
+    return fromFrame(toLocalFrame(toHTMLFrameOwnerElement(element)->contentFrame()));
 }
 
 WebViewImpl* WebLocalFrameImpl::viewImpl() const
 {
     if (!frame())
-        return 0;
+        return nullptr;
     return WebViewImpl::fromPage(frame()->page());
 }