blob: dcf79eee11ef6cbe9c4cd66ec3c46f8c13a3bf18 [file] [log] [blame]
/*
* Copyright (C) 2011 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "platform/mhtml/MHTMLArchive.h"
#include "platform/DateComponents.h"
#include "platform/SerializedResource.h"
#include "platform/SharedBuffer.h"
#include "platform/mhtml/ArchiveResource.h"
#include "platform/mhtml/MHTMLParser.h"
#include "platform/network/mime/MIMETypeRegistry.h"
#include "platform/text/QuotedPrintable.h"
#include "platform/weborigin/SchemeRegistry.h"
#include "platform/wtf/Assertions.h"
#include "platform/wtf/CryptographicallyRandomNumber.h"
#include "platform/wtf/CurrentTime.h"
#include "platform/wtf/DateMath.h"
#include "platform/wtf/text/Base64.h"
#include "platform/wtf/text/StringBuilder.h"
namespace blink {
const char* const kQuotedPrintable = "quoted-printable";
const char* const kBase64 = "base64";
const char* const kBinary = "binary";
static String ReplaceNonPrintableCharacters(const String& text) {
StringBuilder string_builder;
for (size_t i = 0; i < text.length(); ++i) {
if (IsASCIIPrintable(text[i]))
string_builder.Append(text[i]);
else
string_builder.Append('?');
}
return string_builder.ToString();
}
MHTMLArchive::MHTMLArchive() {}
MHTMLArchive* MHTMLArchive::Create(const KURL& url,
PassRefPtr<const SharedBuffer> data) {
// MHTML pages can only be loaded from local URLs, http/https URLs, and
// content URLs(Android specific). The latter is now allowed due to full
// sandboxing enforcement on MHTML pages.
if (!CanLoadArchive(url))
return nullptr;
MHTMLParser parser(std::move(data));
HeapVector<Member<ArchiveResource>> resources = parser.ParseArchive();
if (resources.IsEmpty())
return nullptr; // Invalid MHTML file.
MHTMLArchive* archive = new MHTMLArchive;
size_t resources_count = resources.size();
// The first document suitable resource is the main resource of the top frame.
for (size_t i = 0; i < resources_count; ++i) {
if (archive->MainResource()) {
archive->AddSubresource(resources[i].Get());
continue;
}
const AtomicString& mime_type = resources[i]->MimeType();
bool is_mime_type_suitable_for_main_resource =
MIMETypeRegistry::IsSupportedNonImageMIMEType(mime_type);
// Want to allow image-only MHTML archives, but retain behavior for other
// documents that have already been created expecting the first HTML page to
// be considered the main resource.
if (resources_count == 1 &&
MIMETypeRegistry::IsSupportedImageResourceMIMEType(mime_type)) {
is_mime_type_suitable_for_main_resource = true;
}
// explicitly disallow JS and CSS as the main resource.
if (MIMETypeRegistry::IsSupportedJavaScriptMIMEType(mime_type) ||
MIMETypeRegistry::IsSupportedStyleSheetMIMEType(mime_type))
is_mime_type_suitable_for_main_resource = false;
if (is_mime_type_suitable_for_main_resource)
archive->SetMainResource(resources[i].Get());
else
archive->AddSubresource(resources[i].Get());
}
return archive;
}
bool MHTMLArchive::CanLoadArchive(const KURL& url) {
// MHTML pages can only be loaded from local URLs, http/https URLs, and
// content URLs(Android specific). The latter is now allowed due to full
// sandboxing enforcement on MHTML pages.
if (SchemeRegistry::ShouldTreatURLSchemeAsLocal(url.Protocol()))
return true;
if (url.ProtocolIsInHTTPFamily())
return true;
#if OS(ANDROID)
if (url.ProtocolIs("content"))
return true;
#endif
return false;
}
void MHTMLArchive::GenerateMHTMLHeader(const String& boundary,
const String& title,
const String& mime_type,
Vector<char>& output_buffer) {
DCHECK(!boundary.IsEmpty());
DCHECK(!mime_type.IsEmpty());
DateComponents now;
now.SetMillisecondsSinceEpochForDateTime(CurrentTimeMS());
// TODO(lukasza): Passing individual date/time components seems fragile.
String date_string = MakeRFC2822DateString(
now.WeekDay(), now.MonthDay(), now.Month(), now.FullYear(), now.Hour(),
now.Minute(), now.Second(), 0);
StringBuilder string_builder;
string_builder.Append("From: <Saved by Blink>\r\n");
string_builder.Append("Subject: ");
// We replace non ASCII characters with '?' characters to match IE's behavior.
string_builder.Append(ReplaceNonPrintableCharacters(title));
string_builder.Append("\r\nDate: ");
string_builder.Append(date_string);
string_builder.Append("\r\nMIME-Version: 1.0\r\n");
string_builder.Append("Content-Type: multipart/related;\r\n");
string_builder.Append("\ttype=\"");
string_builder.Append(mime_type);
string_builder.Append("\";\r\n");
string_builder.Append("\tboundary=\"");
string_builder.Append(boundary);
string_builder.Append("\"\r\n\r\n");
// We use utf8() below instead of ascii() as ascii() replaces CRLFs with ??
// (we still only have put ASCII characters in it).
DCHECK(string_builder.ToString().ContainsOnlyASCII());
CString ascii_string = string_builder.ToString().Utf8();
output_buffer.Append(ascii_string.data(), ascii_string.length());
}
void MHTMLArchive::GenerateMHTMLPart(const String& boundary,
const String& content_id,
EncodingPolicy encoding_policy,
const SerializedResource& resource,
Vector<char>& output_buffer) {
DCHECK(!boundary.IsEmpty());
DCHECK(content_id.IsEmpty() || content_id[0] == '<');
StringBuilder string_builder;
string_builder.Append("--");
string_builder.Append(boundary);
string_builder.Append("\r\n");
string_builder.Append("Content-Type: ");
string_builder.Append(resource.mime_type);
string_builder.Append("\r\n");
if (!content_id.IsEmpty()) {
string_builder.Append("Content-ID: ");
string_builder.Append(content_id);
string_builder.Append("\r\n");
}
const char* content_encoding = 0;
if (encoding_policy == kUseBinaryEncoding)
content_encoding = kBinary;
else if (MIMETypeRegistry::IsSupportedJavaScriptMIMEType(
resource.mime_type) ||
MIMETypeRegistry::IsSupportedNonImageMIMEType(resource.mime_type))
content_encoding = kQuotedPrintable;
else
content_encoding = kBase64;
string_builder.Append("Content-Transfer-Encoding: ");
string_builder.Append(content_encoding);
string_builder.Append("\r\n");
if (!resource.url.ProtocolIsAbout()) {
string_builder.Append("Content-Location: ");
string_builder.Append(resource.url.GetString());
string_builder.Append("\r\n");
}
string_builder.Append("\r\n");
CString ascii_string = string_builder.ToString().Utf8();
output_buffer.Append(ascii_string.data(), ascii_string.length());
if (!strcmp(content_encoding, kBinary)) {
const char* data;
size_t position = 0;
while (size_t length = resource.data->GetSomeData(data, position)) {
output_buffer.Append(data, length);
position += length;
}
} else {
// FIXME: ideally we would encode the content as a stream without having to
// fetch it all.
const char* data = resource.data->Data();
size_t data_length = resource.data->size();
Vector<char> encoded_data;
if (!strcmp(content_encoding, kQuotedPrintable)) {
QuotedPrintableEncode(data, data_length, encoded_data);
output_buffer.Append(encoded_data.data(), encoded_data.size());
output_buffer.Append("\r\n", 2u);
} else {
DCHECK(!strcmp(content_encoding, kBase64));
// We are not specifying insertLFs = true below as it would cut the lines
// with LFs and MHTML requires CRLFs.
Base64Encode(data, data_length, encoded_data);
const size_t kMaximumLineLength = 76;
size_t index = 0;
size_t encoded_data_length = encoded_data.size();
do {
size_t line_length =
std::min(encoded_data_length - index, kMaximumLineLength);
output_buffer.Append(encoded_data.data() + index, line_length);
output_buffer.Append("\r\n", 2u);
index += kMaximumLineLength;
} while (index < encoded_data_length);
}
}
}
void MHTMLArchive::GenerateMHTMLFooterForTesting(const String& boundary,
Vector<char>& output_buffer) {
DCHECK(!boundary.IsEmpty());
CString ascii_string = String("--" + boundary + "--\r\n").Utf8();
output_buffer.Append(ascii_string.data(), ascii_string.length());
}
void MHTMLArchive::SetMainResource(ArchiveResource* main_resource) {
main_resource_ = main_resource;
}
void MHTMLArchive::AddSubresource(ArchiveResource* resource) {
const KURL& url = resource->Url();
subresources_.Set(url, resource);
KURL cid_uri = MHTMLParser::ConvertContentIDToURI(resource->ContentID());
if (cid_uri.IsValid())
subresources_.Set(cid_uri, resource);
}
ArchiveResource* MHTMLArchive::SubresourceForURL(const KURL& url) const {
return subresources_.at(url.GetString());
}
DEFINE_TRACE(MHTMLArchive) {
visitor->Trace(main_resource_);
visitor->Trace(subresources_);
}
} // namespace blink