blob: 107a5073e9a3f1a04524ad2df3ea58f363ec27b0 [file] [log] [blame]
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/services/unzip/unzipper_impl.h"
#include <string>
#include <utility>
#include "base/compiler_specific.h"
#include "base/files/file.h"
#include "base/files/file_error_or.h"
#include "base/functional/bind.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "third_party/ced/src/compact_enc_det/compact_enc_det.h"
#include "third_party/zlib/google/redact.h"
#include "third_party/zlib/google/zip.h"
#include "third_party/zlib/google/zip_reader.h"
namespace unzip {
namespace {
bool CreateDirectory(storage::mojom::Directory* output_dir,
const base::FilePath& path) {
base::File::Error error = base::File::FILE_ERROR_IO;
output_dir->CreateDirectory(path, &error);
return error == base::File::FILE_OK;
}
// A file writer that uses a storage::FilesystemProxy.
class Writer : public zip::FileWriterDelegate {
public:
Writer(storage::mojom::Directory* output_dir, base::FilePath path)
: FileWriterDelegate(base::File()),
output_dir_(output_dir),
path_(std::move(path)) {
DCHECK(output_dir_);
}
// Creates the output file.
bool PrepareOutput() override {
base::File::Error error = base::File::FILE_ERROR_IO;
output_dir_->OpenFile(
path_, storage::mojom::FileOpenMode::kCreateAndOpenOnlyIfNotExists,
storage::mojom::FileReadAccess::kReadAllowed,
storage::mojom::FileWriteAccess::kWriteAllowed, &error, &owned_file_);
if (error != base::File::FILE_OK) {
LOG(ERROR) << "Cannot create file to extract " << zip::Redact(path_)
<< ": " << base::File::ErrorToString(error);
return false;
}
return FileWriterDelegate::PrepareOutput();
}
// Deletes the output file.
void OnError() override {
FileWriterDelegate::OnError();
owned_file_.Close();
bool success = false;
output_dir_->DeleteFile(path_, &success);
if (!success) {
LOG(ERROR) << "Cannot delete extracted file " << zip::Redact(path_);
}
}
private:
const mojo::Remote<storage::mojom::Directory> owned_output_dir_;
const raw_ptr<storage::mojom::Directory> output_dir_;
const base::FilePath path_;
};
std::unique_ptr<zip::WriterDelegate> MakeFileWriterDelegate(
storage::mojom::Directory* output_dir,
const base::FilePath& path) {
if (path == path.BaseName()) {
return std::make_unique<Writer>(output_dir, path);
}
base::File::Error error = base::File::FILE_ERROR_IO;
output_dir->CreateDirectory(path.DirName(), &error);
if (error != base::File::Error::FILE_OK) {
return nullptr;
}
return std::make_unique<Writer>(output_dir, path);
}
bool Filter(const mojo::Remote<mojom::UnzipFilter>& filter,
const base::FilePath& path) {
bool result = false;
filter->ShouldUnzipFile(path, &result);
return result;
}
// Reads the given ZIP archive, and returns all the filenames concatenated
// together in one long string capped at ~100KB, without any separator, and in
// the encoding used by the ZIP archive itself. Returns an empty string if the
// ZIP cannot be read.
std::string GetRawFileNamesFromZip(const base::File& zip_file) {
std::string result;
// Open ZIP archive for reading.
zip::ZipReader reader;
if (!reader.OpenFromPlatformFile(zip_file.GetPlatformFile())) {
LOG(ERROR) << "Cannot decode ZIP archive from file handle "
<< zip_file.GetPlatformFile();
return result;
}
// Reserve a ~100KB buffer.
result.reserve(100000);
// Iterate over file entries of the ZIP archive.
while (const zip::ZipReader::Entry* const entry = reader.Next()) {
const std::string& path = entry->path_in_original_encoding;
// Stop if we have enough data in |result|.
if (path.size() > (result.capacity() - result.size())) {
break;
}
// Accumulate data in |result|.
result += path;
}
LOG_IF(ERROR, result.empty()) << "Cannot extract filenames from ZIP archive";
return result;
}
} // namespace
UnzipperImpl::UnzipperImpl() = default;
UnzipperImpl::UnzipperImpl(mojo::PendingReceiver<mojom::Unzipper> receiver)
: receiver_(this, std::move(receiver)) {
receiver_.set_disconnect_handler(base::BindOnce(
&UnzipperImpl::OnReceiverDisconnect, weak_ptr_factory_.GetWeakPtr()));
}
UnzipperImpl::~UnzipperImpl() = default;
Encoding GetEncoding(const base::File& zip_file) {
// Accumulate raw filenames.
const std::string all_names = GetRawFileNamesFromZip(zip_file);
if (all_names.empty()) {
return UNKNOWN_ENCODING;
}
// Detect encoding.
int consumed_bytes = 0;
bool is_reliable = false;
const Encoding encoding = CompactEncDet::DetectEncoding(
all_names.data(), all_names.size(), nullptr, nullptr, nullptr,
UNKNOWN_ENCODING, UNKNOWN_LANGUAGE,
CompactEncDet::QUERY_CORPUS, // Plain text
true, // Exclude 7-bit encodings
&consumed_bytes, &is_reliable);
VLOG(1) << "Detected encoding: " << MimeEncodingName(encoding) << " ("
<< encoding << "), reliable: " << is_reliable
<< ", consumed bytes: " << consumed_bytes;
LOG_IF(ERROR, encoding == UNKNOWN_ENCODING)
<< "Cannot detect encoding of filenames in ZIP archive";
return encoding;
}
void UnzipperImpl::Listener(const mojo::Remote<mojom::UnzipListener>& listener,
uint64_t bytes) {
listener->OnProgress(bytes);
}
bool RunUnzip(base::File zip_file,
mojo::PendingRemote<storage::mojom::Directory> output_dir_remote,
std::string encoding_name,
std::string password,
mojo::PendingRemote<mojom::UnzipFilter> filter_remote,
mojo::PendingRemote<mojom::UnzipListener> listener_remote) {
mojo::Remote<storage::mojom::Directory> output_dir(
std::move(output_dir_remote));
zip::FilterCallback filter_cb;
if (filter_remote) {
filter_cb = base::BindRepeating(
&Filter, mojo::Remote<mojom::UnzipFilter>(std::move(filter_remote)));
}
zip::UnzipProgressCallback progress_cb;
if (listener_remote) {
mojo::Remote<mojom::UnzipListener> listener(std::move(listener_remote));
progress_cb =
base::BindRepeating(&UnzipperImpl::Listener, std::move(listener));
}
return zip::Unzip(
zip_file.GetPlatformFile(),
base::BindRepeating(&MakeFileWriterDelegate, output_dir.get()),
base::BindRepeating(&CreateDirectory, output_dir.get()),
{.encoding = std::move(encoding_name),
.filter = std::move(filter_cb),
.progress = std::move(progress_cb),
.password = std::move(password)});
}
void UnzipperImpl::Unzip(
base::File zip_file,
mojo::PendingRemote<storage::mojom::Directory> output_dir_remote,
mojom::UnzipOptionsPtr set_options,
mojo::PendingRemote<mojom::UnzipFilter> filter_remote,
mojo::PendingRemote<mojom::UnzipListener> listener_remote,
UnzipCallback callback) {
DCHECK(zip_file.IsValid());
std::string encoding_name;
if (set_options->encoding == "auto") {
Encoding encoding = GetEncoding(zip_file);
if (IsShiftJisOrVariant(encoding) || encoding == RUSSIAN_CP866) {
encoding_name = MimeEncodingName(encoding);
}
} else {
encoding_name = set_options->encoding;
}
runner_->PostTaskAndReplyWithResult(
FROM_HERE,
base::BindOnce(&RunUnzip, std::move(zip_file),
std::move(output_dir_remote), std::move(encoding_name),
std::move(set_options->password), std::move(filter_remote),
std::move(listener_remote)),
base::BindOnce(std::move(callback)));
}
void UnzipperImpl::DetectEncoding(base::File zip_file,
DetectEncodingCallback callback) {
DCHECK(zip_file.IsValid());
const Encoding encoding = GetEncoding(zip_file);
std::move(callback).Run(encoding);
}
void UnzipperImpl::GetExtractedInfo(base::File zip_file,
GetExtractedInfoCallback callback) {
DCHECK(zip_file.IsValid());
// Open ZIP archive for reading.
zip::ZipReader reader;
if (!reader.OpenFromPlatformFile(zip_file.GetPlatformFile())) {
LOG(ERROR) << "Cannot decode ZIP archive from file handle "
<< zip_file.GetPlatformFile();
unzip::mojom::InfoPtr info =
unzip::mojom::Info::New(false, 0, false, false);
std::move(callback).Run(std::move(info));
return;
}
int64_t size = 0;
bool valid = true;
bool has_encrypted_content = false;
bool uses_aes_encryption = false;
// Iterate over file entries of the ZIP archive.
while (const zip::ZipReader::Entry* const entry = reader.Next()) {
// Check for (invalid) size stored.
if (entry->original_size < 0 ||
entry->original_size > std::numeric_limits<int64_t>::max() - size) {
LOG(ERROR) << "ZIP bad size info from file handle "
<< zip_file.GetPlatformFile();
valid = false;
break;
}
// Accumulate size (since original_size is signed, ignore invalid sizes).
if (entry->original_size > 0) {
size += entry->original_size;
}
if (entry->is_encrypted) {
has_encrypted_content = true;
if (entry->uses_aes_encryption) {
uses_aes_encryption = true;
}
}
}
unzip::mojom::InfoPtr info = unzip::mojom::Info::New(
valid, size, has_encrypted_content, uses_aes_encryption);
std::move(callback).Run(std::move(info));
}
void UnzipperImpl::OnReceiverDisconnect() {
DCHECK(receiver_.is_bound());
receiver_.reset();
}
} // namespace unzip