third_party/WebKit/Source/web/WebFrameSerializerImpl.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2009 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *     * Neither the name of Google Inc. nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 // How we handle the base tag better.
 // Current status:
 // At now the normal way we use to handling base tag is
 // a) For those links which have corresponding local saved files, such as
 // savable CSS, JavaScript files, they will be written to relative URLs which
 // point to local saved file. Why those links can not be resolved as absolute
 // file URLs, because if they are resolved as absolute URLs, after moving the
 // file location from one directory to another directory, the file URLs will
 // be dead links.
 // b) For those links which have not corresponding local saved files, such as
 // links in A, AREA tags, they will be resolved as absolute URLs.
 // c) We comment all base tags when serialzing DOM for the page.
 // FireFox also uses above way to handle base tag.
 //
 // Problem:
 // This way can not handle the following situation:
 // the base tag is written by JavaScript.
 // For example. The page "www.yahoo.com" use
 // "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL
 // of page when loading page. So when saving page as completed-HTML, we assume
 // that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved
 // completed-HTML page, then the JavaScript will insert a base tag
 // <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to
 // local saved resource files will be resolved as
 // "http://www.yahoo.com/yahoo_files/...", which will cause all saved  resource
 // files can not be loaded correctly. Also the page will be rendered ugly since
 // all saved sub-resource files (such as CSS, JavaScript files) and sub-frame
 // files can not be fetched.
 // Now FireFox, IE and WebKit based Browser all have this problem.
 //
 // Solution:
 // My solution is that we comment old base tag and write new base tag:
 // <base href="." ...> after the previous commented base tag. In WebKit, it
 // always uses the latest "href" attribute of base tag to set document's base
 // URL. Based on this behavior, when we encounter a base tag, we comment it and
 // write a new base tag <base href="."> after the previous commented base tag.
 // The new added base tag can help engine to locate correct base URL for
 // correctly loading local saved resource files. Also I think we need to inherit
 // the base target value from document object when appending new base tag.
 // If there are multiple base tags in original document, we will comment all old
 // base tags and append new base tag after each old base tag because we do not
 // know those old base tags are original content or added by JavaScript. If
 // they are added by JavaScript, it means when loading saved page, the script(s)
 // will still insert base tag(s) to DOM, so the new added base tag(s) can
 // override the incorrect base URL and make sure we alway load correct local
 // saved resource files.

 #include "web/WebFrameSerializerImpl.h"

 #include "core/HTMLNames.h"
 #include "core/dom/Document.h"
 #include "core/dom/DocumentType.h"
 #include "core/dom/Element.h"
 #include "core/editing/serializers/Serialization.h"
 #include "core/frame/FrameSerializer.h"
 #include "core/html/HTMLAllCollection.h"
 #include "core/html/HTMLElement.h"
 #include "core/html/HTMLFormElement.h"
 #include "core/html/HTMLFrameElementBase.h"
 #include "core/html/HTMLFrameOwnerElement.h"
 #include "core/html/HTMLHtmlElement.h"
 #include "core/html/HTMLMetaElement.h"
 #include "core/loader/DocumentLoader.h"
 #include "core/loader/FrameLoader.h"
 #include "public/platform/WebCString.h"
 #include "public/platform/WebVector.h"
 #include "web/WebLocalFrameImpl.h"
 #include "wtf/text/TextEncoding.h"

 namespace blink {

 // Maximum length of data buffer which is used to temporary save generated
 // html content data. This is a soft limit which might be passed if a very large
 // contegious string is found in the html document.
 static const unsigned dataBufferCapacity = 65536;

 WebFrameSerializerImpl::SerializeDomParam::SerializeDomParam(
     const KURL& url,
     const WTF::TextEncoding& textEncoding,
     Document* document)
     : url(url),
       textEncoding(textEncoding),
       document(document),
       isHTMLDocument(document->isHTMLDocument()),
       haveSeenDocType(false),
       haveAddedCharsetDeclaration(false),
       skipMetaElement(nullptr),
       haveAddedXMLProcessingDirective(false),
       haveAddedContentsBeforeEnd(false) {}

 String WebFrameSerializerImpl::preActionBeforeSerializeOpenTag(
     const Element* element,
     SerializeDomParam* param,
     bool* needSkip) {
   StringBuilder result;

   *needSkip = false;
   if (param->isHTMLDocument) {
     // Skip the open tag of original META tag which declare charset since we
     // have overrided the META which have correct charset declaration after
     // serializing open tag of HEAD element.
     DCHECK(element);
     if (isHTMLMetaElement(element) &&
         toHTMLMetaElement(element)->computeEncoding().isValid()) {
       // Found META tag declared charset, we need to skip it when
       // serializing DOM.
       param->skipMetaElement = element;
       *needSkip = true;
     } else if (isHTMLHtmlElement(*element)) {
       // Check something before processing the open tag of HEAD element.
       // First we add doc type declaration if original document has it.
       if (!param->haveSeenDocType) {
         param->haveSeenDocType = true;
         result.append(createMarkup(param->document->doctype()));
       }

       // Add MOTW declaration before html tag.
       // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
       result.append(
           WebFrameSerializer::generateMarkOfTheWebDeclaration(param->url));
     } else if (isHTMLBaseElement(*element)) {
       // Comment the BASE tag when serializing dom.
       result.append("<!--");
     }
   } else {
     // Write XML declaration.
     if (!param->haveAddedXMLProcessingDirective) {
       param->haveAddedXMLProcessingDirective = true;
       // Get encoding info.
       String xmlEncoding = param->document->xmlEncoding();
       if (xmlEncoding.isEmpty())
         xmlEncoding = param->document->encodingName();
       if (xmlEncoding.isEmpty())
         xmlEncoding = UTF8Encoding().name();
       result.append("<?xml version=\"");
       result.append(param->document->xmlVersion());
       result.append("\" encoding=\"");
       result.append(xmlEncoding);
       if (param->document->xmlStandalone())
         result.append("\" standalone=\"yes");
       result.append("\"?>\n");
     }
     // Add doc type declaration if original document has it.
     if (!param->haveSeenDocType) {
       param->haveSeenDocType = true;
       result.append(createMarkup(param->document->doctype()));
     }
   }
   return result.toString();
 }

 String WebFrameSerializerImpl::postActionAfterSerializeOpenTag(
     const Element* element,
     SerializeDomParam* param) {
   StringBuilder result;

   param->haveAddedContentsBeforeEnd = false;
   if (!param->isHTMLDocument)
     return result.toString();
   // Check after processing the open tag of HEAD element
   if (!param->haveAddedCharsetDeclaration && isHTMLHeadElement(*element)) {
     param->haveAddedCharsetDeclaration = true;
     // Check meta element. WebKit only pre-parse the first 512 bytes of the
     // document. If the whole <HEAD> is larger and meta is the end of head
     // part, then this kind of html documents aren't decoded correctly
     // because of this issue. So when we serialize the DOM, we need to make
     // sure the meta will in first child of head tag.
     // See http://bugs.webkit.org/show_bug.cgi?id=16621.
     // First we generate new content for writing correct META element.
     result.append(WebFrameSerializer::generateMetaCharsetDeclaration(
         String(param->textEncoding.name())));

     param->haveAddedContentsBeforeEnd = true;
     // Will search each META which has charset declaration, and skip them all
     // in PreActionBeforeSerializeOpenTag.
   }

   return result.toString();
 }

 String WebFrameSerializerImpl::preActionBeforeSerializeEndTag(
     const Element* element,
     SerializeDomParam* param,
     bool* needSkip) {
   String result;

   *needSkip = false;
   if (!param->isHTMLDocument)
     return result;
   // Skip the end tag of original META tag which declare charset.
   // Need not to check whether it's META tag since we guarantee
   // skipMetaElement is definitely META tag if it's not 0.
   if (param->skipMetaElement == element) {
     *needSkip = true;
   }

   return result;
 }

 // After we finish serializing end tag of a element, we give the target
 // element a chance to do some post work to add some additional data.
 String WebFrameSerializerImpl::postActionAfterSerializeEndTag(
     const Element* element,
     SerializeDomParam* param) {
   StringBuilder result;

   if (!param->isHTMLDocument)
     return result.toString();
   // Comment the BASE tag when serializing DOM.
   if (isHTMLBaseElement(*element)) {
     result.append("-->");
     // Append a new base tag declaration.
     result.append(WebFrameSerializer::generateBaseTagDeclaration(
         param->document->baseTarget()));
   }

   return result.toString();
 }

 void WebFrameSerializerImpl::saveHTMLContentToBuffer(const String& result,
                                                      SerializeDomParam* param) {
   m_dataBuffer.append(result);
   encodeAndFlushBuffer(WebFrameSerializerClient::CurrentFrameIsNotFinished,
                        param, DoNotForceFlush);
 }

 void WebFrameSerializerImpl::encodeAndFlushBuffer(
     WebFrameSerializerClient::FrameSerializationStatus status,
     SerializeDomParam* param,
     FlushOption flushOption) {
   // Data buffer is not full nor do we want to force flush.
   if (flushOption != ForceFlush && m_dataBuffer.length() <= dataBufferCapacity)
     return;

   String content = m_dataBuffer.toString();
   m_dataBuffer.clear();

   CString encodedContent =
       param->textEncoding.encode(content, WTF::EntitiesForUnencodables);

   // Send result to the client.
   m_client->didSerializeDataForFrame(WebCString(encodedContent), status);
 }

 // TODO(yosin): We should utilize |MarkupFormatter| here to share code,
 // especially escaping attribute values, done by |WebEntities| |m_htmlEntities|
 // and |m_xmlEntities|.
 void WebFrameSerializerImpl::appendAttribute(StringBuilder& result,
                                              bool isHTMLDocument,
                                              const String& attrName,
                                              const String& attrValue) {
   result.append(' ');
   result.append(attrName);
   result.append("=\"");
   if (isHTMLDocument)
     result.append(m_htmlEntities.convertEntitiesInString(attrValue));
   else
     result.append(m_xmlEntities.convertEntitiesInString(attrValue));
   result.append('\"');
 }

 void WebFrameSerializerImpl::openTagToString(Element* element,
                                              SerializeDomParam* param) {
   bool needSkip;
   StringBuilder result;
   // Do pre action for open tag.
   result.append(preActionBeforeSerializeOpenTag(element, param, &needSkip));
   if (needSkip)
     return;
   // Add open tag
   result.append('<');
   result.append(element->nodeName().lower());

   // Find out if we need to do frame-specific link rewriting.
   WebFrame* frame = nullptr;
   if (element->isFrameOwnerElement()) {
     frame =
         WebFrame::fromFrame(toHTMLFrameOwnerElement(element)->contentFrame());
   }
   WebString rewrittenFrameLink;
   bool shouldRewriteFrameSrc =
       frame && m_delegate->rewriteFrameSource(frame, &rewrittenFrameLink);
   bool didRewriteFrameSrc = false;

   // Go through all attributes and serialize them.
   for (const auto& it : element->attributes()) {
     const QualifiedName& attrName = it.name();
     String attrValue = it.value();

     // Skip srcdoc attribute if we will emit src attribute (for frames).
     if (shouldRewriteFrameSrc && attrName == HTMLNames::srcdocAttr)
       continue;

     // Rewrite the attribute value if requested.
     if (element->hasLegalLinkAttribute(attrName)) {
       // For links start with "javascript:", we do not change it.
       if (!attrValue.startsWith("javascript:", TextCaseInsensitive)) {
         // Get the absolute link.
         KURL completeURL = param->document->completeURL(attrValue);

         // Check whether we have a local file to link to.
         WebString rewrittenURL;
         if (shouldRewriteFrameSrc) {
           attrValue = rewrittenFrameLink;
           didRewriteFrameSrc = true;
         } else if (m_delegate->rewriteLink(completeURL, &rewrittenURL)) {
           attrValue = rewrittenURL;
         } else {
           attrValue = completeURL;
         }
       }
     }

     appendAttribute(result, param->isHTMLDocument, attrName.toString(),
                     attrValue);
   }

   // For frames where link rewriting was requested, ensure that src attribute
   // is written even if the original document didn't have that attribute
   // (mainly needed for iframes with srcdoc, but with no src attribute).
   if (shouldRewriteFrameSrc && !didRewriteFrameSrc &&
       isHTMLIFrameElement(element)) {
     appendAttribute(result, param->isHTMLDocument,
                     HTMLNames::srcAttr.toString(), rewrittenFrameLink);
   }

   // Do post action for open tag.
   String addedContents = postActionAfterSerializeOpenTag(element, param);
   // Complete the open tag for element when it has child/children.
   if (element->hasChildren() || param->haveAddedContentsBeforeEnd)
     result.append('>');
   // Append the added contents generate in  post action of open tag.
   result.append(addedContents);
   // Save the result to data buffer.
   saveHTMLContentToBuffer(result.toString(), param);
 }

 // Serialize end tag of an specified element.
 void WebFrameSerializerImpl::endTagToString(Element* element,
                                             SerializeDomParam* param) {
   bool needSkip;
   StringBuilder result;
   // Do pre action for end tag.
   result.append(preActionBeforeSerializeEndTag(element, param, &needSkip));
   if (needSkip)
     return;
   // Write end tag when element has child/children.
   if (element->hasChildren() || param->haveAddedContentsBeforeEnd) {
     result.append("</");
     result.append(element->nodeName().lower());
     result.append('>');
   } else {
     // Check whether we have to write end tag for empty element.
     if (param->isHTMLDocument) {
       result.append('>');
       // FIXME: This code is horribly wrong.  WebFrameSerializerImpl must die.
       if (!element->isHTMLElement() ||
           !toHTMLElement(element)->ieForbidsInsertHTML()) {
         // We need to write end tag when it is required.
         result.append("</");
         result.append(element->nodeName().lower());
         result.append('>');
       }
     } else {
       // For xml base document.
       result.append(" />");
     }
   }
   // Do post action for end tag.
   result.append(postActionAfterSerializeEndTag(element, param));
   // Save the result to data buffer.
   saveHTMLContentToBuffer(result.toString(), param);
 }

 void WebFrameSerializerImpl::buildContentForNode(Node* node,
                                                  SerializeDomParam* param) {
   switch (node->getNodeType()) {
     case Node::kElementNode:
       // Process open tag of element.
       openTagToString(toElement(node), param);
       // Walk through the children nodes and process it.
       for (Node* child = node->firstChild(); child;
            child = child->nextSibling())
         buildContentForNode(child, param);
       // Process end tag of element.
       endTagToString(toElement(node), param);
       break;
     case Node::kTextNode:
       saveHTMLContentToBuffer(createMarkup(node), param);
       break;
     case Node::kAttributeNode:
     case Node::kDocumentNode:
     case Node::kDocumentFragmentNode:
       // Should not exist.
       NOTREACHED();
       break;
     // Document type node can be in DOM?
     case Node::kDocumentTypeNode:
       param->haveSeenDocType = true;
     default:
       // For other type node, call default action.
       saveHTMLContentToBuffer(createMarkup(node), param);
       break;
   }
 }

 WebFrameSerializerImpl::WebFrameSerializerImpl(
     WebLocalFrame* frame,
     WebFrameSerializerClient* client,
     WebFrameSerializer::LinkRewritingDelegate* delegate)
     : m_client(client),
       m_delegate(delegate),
       m_htmlEntities(false),
       m_xmlEntities(true) {
   // Must specify available webframe.
   DCHECK(frame);
   m_specifiedWebLocalFrameImpl = toWebLocalFrameImpl(frame);
   // Make sure we have non null client and delegate.
   DCHECK(client);
   DCHECK(delegate);

   DCHECK(m_dataBuffer.isEmpty());
 }

 bool WebFrameSerializerImpl::serialize() {
   bool didSerialization = false;

   Document* document = m_specifiedWebLocalFrameImpl->frame()->document();
   const KURL& url = document->url();

   if (url.isValid()) {
     didSerialization = true;

     const WTF::TextEncoding& textEncoding =
         document->encoding().isValid() ? document->encoding() : UTF8Encoding();
     if (textEncoding.isNonByteBasedEncoding()) {
       const UChar byteOrderMark = 0xFEFF;
       m_dataBuffer.append(byteOrderMark);
     }

     SerializeDomParam param(url, textEncoding, document);

     Element* documentElement = document->documentElement();
     if (documentElement)
       buildContentForNode(documentElement, &param);

     encodeAndFlushBuffer(WebFrameSerializerClient::CurrentFrameIsFinished,
                          &param, ForceFlush);
   } else {
     // Report empty contents for invalid URLs.
     m_client->didSerializeDataForFrame(
         WebCString(), WebFrameSerializerClient::CurrentFrameIsFinished);
   }

   DCHECK(m_dataBuffer.isEmpty());
   return didSerialization;
 }

 }  // namespace blink
	/*
	* Copyright (C) 2009 Google Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met:
	*
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following disclaimer
	* in the documentation and/or other materials provided with the
	* distribution.
	* * Neither the name of Google Inc. nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	// How we handle the base tag better.
	// Current status:
	// At now the normal way we use to handling base tag is
	// a) For those links which have corresponding local saved files, such as
	// savable CSS, JavaScript files, they will be written to relative URLs which
	// point to local saved file. Why those links can not be resolved as absolute
	// file URLs, because if they are resolved as absolute URLs, after moving the
	// file location from one directory to another directory, the file URLs will
	// be dead links.
	// b) For those links which have not corresponding local saved files, such as
	// links in A, AREA tags, they will be resolved as absolute URLs.
	// c) We comment all base tags when serialzing DOM for the page.
	// FireFox also uses above way to handle base tag.
	//
	// Problem:
	// This way can not handle the following situation:
	// the base tag is written by JavaScript.
	// For example. The page "www.yahoo.com" use
	// "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL
	// of page when loading page. So when saving page as completed-HTML, we assume
	// that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved
	// completed-HTML page, then the JavaScript will insert a base tag
	// <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to
	// local saved resource files will be resolved as
	// "http://www.yahoo.com/yahoo_files/...", which will cause all saved resource
	// files can not be loaded correctly. Also the page will be rendered ugly since
	// all saved sub-resource files (such as CSS, JavaScript files) and sub-frame
	// files can not be fetched.
	// Now FireFox, IE and WebKit based Browser all have this problem.
	//
	// Solution:
	// My solution is that we comment old base tag and write new base tag:
	// <base href="." ...> after the previous commented base tag. In WebKit, it
	// always uses the latest "href" attribute of base tag to set document's base
	// URL. Based on this behavior, when we encounter a base tag, we comment it and
	// write a new base tag <base href="."> after the previous commented base tag.
	// The new added base tag can help engine to locate correct base URL for
	// correctly loading local saved resource files. Also I think we need to inherit
	// the base target value from document object when appending new base tag.
	// If there are multiple base tags in original document, we will comment all old
	// base tags and append new base tag after each old base tag because we do not
	// know those old base tags are original content or added by JavaScript. If
	// they are added by JavaScript, it means when loading saved page, the script(s)
	// will still insert base tag(s) to DOM, so the new added base tag(s) can
	// override the incorrect base URL and make sure we alway load correct local
	// saved resource files.

	#include "web/WebFrameSerializerImpl.h"

	#include "core/HTMLNames.h"
	#include "core/dom/Document.h"
	#include "core/dom/DocumentType.h"
	#include "core/dom/Element.h"
	#include "core/editing/serializers/Serialization.h"
	#include "core/frame/FrameSerializer.h"
	#include "core/html/HTMLAllCollection.h"
	#include "core/html/HTMLElement.h"
	#include "core/html/HTMLFormElement.h"
	#include "core/html/HTMLFrameElementBase.h"
	#include "core/html/HTMLFrameOwnerElement.h"
	#include "core/html/HTMLHtmlElement.h"
	#include "core/html/HTMLMetaElement.h"
	#include "core/loader/DocumentLoader.h"
	#include "core/loader/FrameLoader.h"
	#include "public/platform/WebCString.h"
	#include "public/platform/WebVector.h"
	#include "web/WebLocalFrameImpl.h"
	#include "wtf/text/TextEncoding.h"

	namespace blink {

	// Maximum length of data buffer which is used to temporary save generated
	// html content data. This is a soft limit which might be passed if a very large
	// contegious string is found in the html document.
	static const unsigned dataBufferCapacity = 65536;

	WebFrameSerializerImpl::SerializeDomParam::SerializeDomParam(
	const KURL& url,
	const WTF::TextEncoding& textEncoding,
	Document* document)
	: url(url),
	textEncoding(textEncoding),
	document(document),
	isHTMLDocument(document->isHTMLDocument()),
	haveSeenDocType(false),
	haveAddedCharsetDeclaration(false),
	skipMetaElement(nullptr),
	haveAddedXMLProcessingDirective(false),
	haveAddedContentsBeforeEnd(false) {}

	String WebFrameSerializerImpl::preActionBeforeSerializeOpenTag(
	const Element* element,
	SerializeDomParam* param,
	bool* needSkip) {
	StringBuilder result;

	*needSkip = false;
	if (param->isHTMLDocument) {
	// Skip the open tag of original META tag which declare charset since we
	// have overrided the META which have correct charset declaration after
	// serializing open tag of HEAD element.
	DCHECK(element);
	if (isHTMLMetaElement(element) &&
	toHTMLMetaElement(element)->computeEncoding().isValid()) {
	// Found META tag declared charset, we need to skip it when
	// serializing DOM.
	param->skipMetaElement = element;
	*needSkip = true;
	} else if (isHTMLHtmlElement(*element)) {
	// Check something before processing the open tag of HEAD element.
	// First we add doc type declaration if original document has it.
	if (!param->haveSeenDocType) {
	param->haveSeenDocType = true;
	result.append(createMarkup(param->document->doctype()));
	}

	// Add MOTW declaration before html tag.
	// See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
	result.append(
	WebFrameSerializer::generateMarkOfTheWebDeclaration(param->url));
	} else if (isHTMLBaseElement(*element)) {
	// Comment the BASE tag when serializing dom.
	result.append("<!--");
	}
	} else {
	// Write XML declaration.
	if (!param->haveAddedXMLProcessingDirective) {
	param->haveAddedXMLProcessingDirective = true;
	// Get encoding info.
	String xmlEncoding = param->document->xmlEncoding();
	if (xmlEncoding.isEmpty())
	xmlEncoding = param->document->encodingName();
	if (xmlEncoding.isEmpty())
	xmlEncoding = UTF8Encoding().name();
	result.append("<?xml version=\"");
	result.append(param->document->xmlVersion());
	result.append("\" encoding=\"");
	result.append(xmlEncoding);
	if (param->document->xmlStandalone())
	result.append("\" standalone=\"yes");
	result.append("\"?>\n");
	}
	// Add doc type declaration if original document has it.
	if (!param->haveSeenDocType) {
	param->haveSeenDocType = true;
	result.append(createMarkup(param->document->doctype()));
	}
	}
	return result.toString();
	}

	String WebFrameSerializerImpl::postActionAfterSerializeOpenTag(
	const Element* element,
	SerializeDomParam* param) {
	StringBuilder result;

	param->haveAddedContentsBeforeEnd = false;
	if (!param->isHTMLDocument)
	return result.toString();
	// Check after processing the open tag of HEAD element
	if (!param->haveAddedCharsetDeclaration && isHTMLHeadElement(*element)) {
	param->haveAddedCharsetDeclaration = true;
	// Check meta element. WebKit only pre-parse the first 512 bytes of the
	// document. If the whole <HEAD> is larger and meta is the end of head
	// part, then this kind of html documents aren't decoded correctly
	// because of this issue. So when we serialize the DOM, we need to make
	// sure the meta will in first child of head tag.
	// See http://bugs.webkit.org/show_bug.cgi?id=16621.
	// First we generate new content for writing correct META element.
	result.append(WebFrameSerializer::generateMetaCharsetDeclaration(
	String(param->textEncoding.name())));

	param->haveAddedContentsBeforeEnd = true;
	// Will search each META which has charset declaration, and skip them all
	// in PreActionBeforeSerializeOpenTag.
	}

	return result.toString();
	}

	String WebFrameSerializerImpl::preActionBeforeSerializeEndTag(
	const Element* element,
	SerializeDomParam* param,
	bool* needSkip) {
	String result;

	*needSkip = false;
	if (!param->isHTMLDocument)
	return result;
	// Skip the end tag of original META tag which declare charset.
	// Need not to check whether it's META tag since we guarantee
	// skipMetaElement is definitely META tag if it's not 0.
	if (param->skipMetaElement == element) {
	*needSkip = true;
	}

	return result;
	}

	// After we finish serializing end tag of a element, we give the target
	// element a chance to do some post work to add some additional data.
	String WebFrameSerializerImpl::postActionAfterSerializeEndTag(
	const Element* element,
	SerializeDomParam* param) {
	StringBuilder result;

	if (!param->isHTMLDocument)
	return result.toString();
	// Comment the BASE tag when serializing DOM.
	if (isHTMLBaseElement(*element)) {
	result.append("-->");
	// Append a new base tag declaration.
	result.append(WebFrameSerializer::generateBaseTagDeclaration(
	param->document->baseTarget()));
	}

	return result.toString();
	}

	void WebFrameSerializerImpl::saveHTMLContentToBuffer(const String& result,
	SerializeDomParam* param) {
	m_dataBuffer.append(result);
	encodeAndFlushBuffer(WebFrameSerializerClient::CurrentFrameIsNotFinished,
	param, DoNotForceFlush);
	}

	void WebFrameSerializerImpl::encodeAndFlushBuffer(
	WebFrameSerializerClient::FrameSerializationStatus status,
	SerializeDomParam* param,
	FlushOption flushOption) {
	// Data buffer is not full nor do we want to force flush.
	if (flushOption != ForceFlush && m_dataBuffer.length() <= dataBufferCapacity)
	return;

	String content = m_dataBuffer.toString();
	m_dataBuffer.clear();

	CString encodedContent =
	param->textEncoding.encode(content, WTF::EntitiesForUnencodables);

	// Send result to the client.
	m_client->didSerializeDataForFrame(WebCString(encodedContent), status);
	}

	// TODO(yosin): We should utilize \|MarkupFormatter\| here to share code,
	// especially escaping attribute values, done by \|WebEntities\| \|m_htmlEntities\|
	// and \|m_xmlEntities\|.
	void WebFrameSerializerImpl::appendAttribute(StringBuilder& result,
	bool isHTMLDocument,
	const String& attrName,
	const String& attrValue) {
	result.append(' ');
	result.append(attrName);
	result.append("=\"");
	if (isHTMLDocument)
	result.append(m_htmlEntities.convertEntitiesInString(attrValue));
	else
	result.append(m_xmlEntities.convertEntitiesInString(attrValue));
	result.append('\"');
	}

	void WebFrameSerializerImpl::openTagToString(Element* element,
	SerializeDomParam* param) {
	bool needSkip;
	StringBuilder result;
	// Do pre action for open tag.
	result.append(preActionBeforeSerializeOpenTag(element, param, &needSkip));
	if (needSkip)
	return;
	// Add open tag
	result.append('<');
	result.append(element->nodeName().lower());

	// Find out if we need to do frame-specific link rewriting.
	WebFrame* frame = nullptr;
	if (element->isFrameOwnerElement()) {
	frame =
	WebFrame::fromFrame(toHTMLFrameOwnerElement(element)->contentFrame());
	}
	WebString rewrittenFrameLink;
	bool shouldRewriteFrameSrc =
	frame && m_delegate->rewriteFrameSource(frame, &rewrittenFrameLink);
	bool didRewriteFrameSrc = false;

	// Go through all attributes and serialize them.
	for (const auto& it : element->attributes()) {
	const QualifiedName& attrName = it.name();
	String attrValue = it.value();

	// Skip srcdoc attribute if we will emit src attribute (for frames).
	if (shouldRewriteFrameSrc && attrName == HTMLNames::srcdocAttr)
	continue;

	// Rewrite the attribute value if requested.
	if (element->hasLegalLinkAttribute(attrName)) {
	// For links start with "javascript:", we do not change it.
	if (!attrValue.startsWith("javascript:", TextCaseInsensitive)) {
	// Get the absolute link.
	KURL completeURL = param->document->completeURL(attrValue);

	// Check whether we have a local file to link to.
	WebString rewrittenURL;
	if (shouldRewriteFrameSrc) {
	attrValue = rewrittenFrameLink;
	didRewriteFrameSrc = true;
	} else if (m_delegate->rewriteLink(completeURL, &rewrittenURL)) {
	attrValue = rewrittenURL;
	} else {
	attrValue = completeURL;
	}
	}
	}

	appendAttribute(result, param->isHTMLDocument, attrName.toString(),
	attrValue);
	}

	// For frames where link rewriting was requested, ensure that src attribute
	// is written even if the original document didn't have that attribute
	// (mainly needed for iframes with srcdoc, but with no src attribute).
	if (shouldRewriteFrameSrc && !didRewriteFrameSrc &&
	isHTMLIFrameElement(element)) {
	appendAttribute(result, param->isHTMLDocument,
	HTMLNames::srcAttr.toString(), rewrittenFrameLink);
	}

	// Do post action for open tag.
	String addedContents = postActionAfterSerializeOpenTag(element, param);
	// Complete the open tag for element when it has child/children.
	if (element->hasChildren() \|\| param->haveAddedContentsBeforeEnd)
	result.append('>');
	// Append the added contents generate in post action of open tag.
	result.append(addedContents);
	// Save the result to data buffer.
	saveHTMLContentToBuffer(result.toString(), param);
	}

	// Serialize end tag of an specified element.
	void WebFrameSerializerImpl::endTagToString(Element* element,
	SerializeDomParam* param) {
	bool needSkip;
	StringBuilder result;
	// Do pre action for end tag.
	result.append(preActionBeforeSerializeEndTag(element, param, &needSkip));
	if (needSkip)
	return;
	// Write end tag when element has child/children.
	if (element->hasChildren() \|\| param->haveAddedContentsBeforeEnd) {
	result.append("</");
	result.append(element->nodeName().lower());
	result.append('>');
	} else {
	// Check whether we have to write end tag for empty element.
	if (param->isHTMLDocument) {
	result.append('>');
	// FIXME: This code is horribly wrong. WebFrameSerializerImpl must die.
	if (!element->isHTMLElement() \|\|
	!toHTMLElement(element)->ieForbidsInsertHTML()) {
	// We need to write end tag when it is required.
	result.append("</");
	result.append(element->nodeName().lower());
	result.append('>');
	}
	} else {
	// For xml base document.
	result.append(" />");
	}
	}
	// Do post action for end tag.
	result.append(postActionAfterSerializeEndTag(element, param));
	// Save the result to data buffer.
	saveHTMLContentToBuffer(result.toString(), param);
	}

	void WebFrameSerializerImpl::buildContentForNode(Node* node,
	SerializeDomParam* param) {
	switch (node->getNodeType()) {
	case Node::kElementNode:
	// Process open tag of element.
	openTagToString(toElement(node), param);
	// Walk through the children nodes and process it.
	for (Node* child = node->firstChild(); child;
	child = child->nextSibling())
	buildContentForNode(child, param);
	// Process end tag of element.
	endTagToString(toElement(node), param);
	break;
	case Node::kTextNode:
	saveHTMLContentToBuffer(createMarkup(node), param);
	break;
	case Node::kAttributeNode:
	case Node::kDocumentNode:
	case Node::kDocumentFragmentNode:
	// Should not exist.
	NOTREACHED();
	break;
	// Document type node can be in DOM?
	case Node::kDocumentTypeNode:
	param->haveSeenDocType = true;
	default:
	// For other type node, call default action.
	saveHTMLContentToBuffer(createMarkup(node), param);
	break;
	}
	}

	WebFrameSerializerImpl::WebFrameSerializerImpl(
	WebLocalFrame* frame,
	WebFrameSerializerClient* client,
	WebFrameSerializer::LinkRewritingDelegate* delegate)
	: m_client(client),
	m_delegate(delegate),
	m_htmlEntities(false),
	m_xmlEntities(true) {
	// Must specify available webframe.
	DCHECK(frame);
	m_specifiedWebLocalFrameImpl = toWebLocalFrameImpl(frame);
	// Make sure we have non null client and delegate.
	DCHECK(client);
	DCHECK(delegate);

	DCHECK(m_dataBuffer.isEmpty());
	}

	bool WebFrameSerializerImpl::serialize() {
	bool didSerialization = false;

	Document* document = m_specifiedWebLocalFrameImpl->frame()->document();
	const KURL& url = document->url();

	if (url.isValid()) {
	didSerialization = true;

	const WTF::TextEncoding& textEncoding =
	document->encoding().isValid() ? document->encoding() : UTF8Encoding();
	if (textEncoding.isNonByteBasedEncoding()) {
	const UChar byteOrderMark = 0xFEFF;
	m_dataBuffer.append(byteOrderMark);
	}

	SerializeDomParam param(url, textEncoding, document);

	Element* documentElement = document->documentElement();
	if (documentElement)
	buildContentForNode(documentElement, &param);

	encodeAndFlushBuffer(WebFrameSerializerClient::CurrentFrameIsFinished,
	&param, ForceFlush);
	} else {
	// Report empty contents for invalid URLs.
	m_client->didSerializeDataForFrame(
	WebCString(), WebFrameSerializerClient::CurrentFrameIsFinished);
	}

	DCHECK(m_dataBuffer.isEmpty());
	return didSerialization;
	}

	} // namespace blink