blob: fc6d0ef66e061460f3a920ab8f8626495c4719dd [file] [log] [blame]
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef WEBKIT_GLUE_DOM_SERIALIZER_H__
#define WEBKIT_GLUE_DOM_SERIALIZER_H__
#include <string>
#include "base/file_path.h"
#include "base/hash_tables.h"
#include "googleurl/src/gurl.h"
class WebFrame;
class WebFrameImpl;
namespace WebCore {
class Document;
class Element;
class Node;
class String;
class TextEncoding;
}
namespace webkit_glue {
class DomSerializerDelegate;
// Get html data by serializing all frames of current page with lists
// which contain all resource links that have local copy.
// contain all saved auxiliary files included all sub frames and resources.
// This function will find out all frames and serialize them to HTML data.
// We have a data buffer to temporary saving generated html data. We will
// sequentially call WebViewDelegate::SendSerializedHtmlData once the data
// buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData
// for getting more information.
class DomSerializer {
public:
// Do serialization action. Return false means no available frame has been
// serialized, otherwise return true.
bool SerializeDom();
// The parameter specifies which frame need to be serialized.
// The parameter recursive_serialization specifies whether we need to
// serialize all sub frames of the specified frame or not.
// The parameter delegate specifies the pointer of interface
// DomSerializerDelegate provide sink interface which can receive the
// individual chunks of data to be saved.
// The parameter links contain original URLs of all saved links.
// The parameter local_paths contain corresponding local file paths of all
// saved links, which matched with vector:links one by one.
// The parameter local_directory_name is relative path of directory which
// contain all saved auxiliary files included all sub frames and resources.
DomSerializer(WebFrame* webframe,
bool recursive_serialization,
DomSerializerDelegate* delegate,
const std::vector<GURL>& links,
const std::vector<FilePath>& local_paths,
const FilePath& local_directory_name);
// Generate the META for charset declaration.
static std::wstring GenerateMetaCharsetDeclaration(
const std::wstring& charset);
// Generate the MOTW declaration.
static std::string GenerateMarkOfTheWebDeclaration(const GURL& url);
// Generate the default base tag declaration.
static std::wstring GenerateBaseTagDeclaration(
const std::wstring& base_target);
private:
// Specified frame which need to be serialized;
WebFrameImpl* specified_webframeimpl_;
// This hash_map is used to map resource URL of original link to its local
// file path.
typedef base::hash_map<std::string, FilePath> LinkLocalPathMap;
// local_links_ include all pair of local resource path and corresponding
// original link.
LinkLocalPathMap local_links_;
// Pointer of DomSerializerDelegate
DomSerializerDelegate* delegate_;
// Data buffer for saving result of serialized DOM data.
std::string data_buffer_;
// Passing true to recursive_serialization_ indicates we will serialize not
// only the specified frame but also all sub-frames in the specific frame.
// Otherwise we only serialize the specified frame excluded all sub-frames.
bool recursive_serialization_;
// Flag indicates whether we have collected all frames which need to be
// serialized or not;
bool frames_collected_;
// Local directory name of all local resource files.
const FilePath& local_directory_name_;
// Vector for saving all frames which need to be serialized.
std::vector<WebFrameImpl*> frames_;
struct SerializeDomParam {
// Frame URL of current processing document presented by GURL
const GURL& current_frame_gurl;
// Current using text encoding object.
const WebCore::TextEncoding& text_encoding;
// Document object of current frame.
WebCore::Document* doc;
// Local directory name of all local resource files.
const FilePath& directory_name;
// Flag indicates current doc is html document or not. It's a cache value
// of Document.isHTMLDocument().
bool is_html_document;
// Flag which indicate whether we have met document type declaration.
bool has_doctype;
// Flag which indicate whether will process meta issue.
bool has_checked_meta;
// This meta element need to be skipped when serializing DOM.
const WebCore::Element* skip_meta_element;
// Flag indicates we are in script or style tag.
bool is_in_script_or_style_tag;
// Flag indicates whether we have written xml document declaration.
// It is only used in xml document
bool has_doc_declaration;
// Flag indicates whether we have added additional contents before end tag.
// This flag will be re-assigned in each call of function
// PostActionAfterSerializeOpenTag and it could be changed in function
// PreActionBeforeSerializeEndTag if the function adds new contents into
// serialization stream.
bool has_added_contents_before_end;
// Constructor.
SerializeDomParam(
const GURL& current_frame_gurl,
const WebCore::TextEncoding& text_encoding,
WebCore::Document* doc,
const FilePath& directory_name);
private:
DISALLOW_EVIL_CONSTRUCTORS(SerializeDomParam);
};
// Collect all target frames which need to be serialized.
void CollectTargetFrames();
// Before we begin serializing open tag of a element, we give the target
// element a chance to do some work prior to add some additional data.
WebCore::String PreActionBeforeSerializeOpenTag(
const WebCore::Element* element,
SerializeDomParam* param,
bool* need_skip);
// After we finish serializing open tag of a element, we give the target
// element a chance to do some post work to add some additional data.
WebCore::String PostActionAfterSerializeOpenTag(
const WebCore::Element* element,
SerializeDomParam* param);
// Before we begin serializing end tag of a element, we give the target
// element a chance to do some work prior to add some additional data.
WebCore::String PreActionBeforeSerializeEndTag(
const WebCore::Element* element,
SerializeDomParam* param, bool* need_skip);
// After we finish serializing end tag of a element, we give the target
// element a chance to do some post work to add some additional data.
WebCore::String PostActionAfterSerializeEndTag(
const WebCore::Element* element,
SerializeDomParam* param);
// Save generated html content to data buffer.
void SaveHtmlContentToBuffer(const WebCore::String& result,
SerializeDomParam* param);
// Serialize open tag of an specified element.
void OpenTagToString(const WebCore::Element* element,
SerializeDomParam* param);
// Serialize end tag of an specified element.
void EndTagToString(const WebCore::Element* element,
SerializeDomParam* param);
// Build content for a specified node
void BuildContentForNode(const WebCore::Node* node,
SerializeDomParam* param);
DISALLOW_EVIL_CONSTRUCTORS(DomSerializer);
};
} // namespace webkit_glue
#endif // WEBKIT_GLUE_DOM_SERIALIZER_H__