blob: 10e62ca03e5f9a0c81aff4a0c198104ee49266ce [file] [log] [blame]
/*
* Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
* Copyright (C) 2007-2009 Torch Mobile, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "wtf/text/TextEncoding.h"
#include "wtf/StdLibExtras.h"
#include "wtf/Threading.h"
#include "wtf/text/CString.h"
#include "wtf/text/TextEncodingRegistry.h"
#include "wtf/text/WTFString.h"
#include <memory>
namespace WTF {
static const TextEncoding& UTF7Encoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF7Encoding,
new TextEncoding("UTF-7"));
return globalUTF7Encoding;
}
TextEncoding::TextEncoding(const char* name)
: m_name(atomicCanonicalTextEncodingName(name)) {
// Aliases are valid, but not "replacement" itself.
if (m_name && isReplacementEncoding(name))
m_name = 0;
}
TextEncoding::TextEncoding(const String& name)
: m_name(atomicCanonicalTextEncodingName(name)) {
// Aliases are valid, but not "replacement" itself.
if (m_name && isReplacementEncoding(name))
m_name = 0;
}
String TextEncoding::decode(const char* data,
size_t length,
bool stopOnError,
bool& sawError) const {
if (!m_name)
return String();
return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError,
sawError);
}
CString TextEncoding::encode(const String& string,
UnencodableHandling handling) const {
if (!m_name)
return CString();
if (string.isEmpty())
return "";
std::unique_ptr<TextCodec> textCodec = newTextCodec(*this);
CString encodedString;
if (string.is8Bit())
encodedString =
textCodec->encode(string.characters8(), string.length(), handling);
else
encodedString =
textCodec->encode(string.characters16(), string.length(), handling);
return encodedString;
}
bool TextEncoding::usesVisualOrdering() const {
if (noExtendedTextEncodingNameUsed())
return false;
static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
return m_name == a;
}
bool TextEncoding::isNonByteBasedEncoding() const {
if (noExtendedTextEncodingNameUsed()) {
return *this == UTF16LittleEndianEncoding() ||
*this == UTF16BigEndianEncoding();
}
return *this == UTF16LittleEndianEncoding() ||
*this == UTF16BigEndianEncoding() || *this == UTF32Encoding() ||
*this == UTF32BigEndianEncoding() ||
*this == UTF32LittleEndianEncoding();
}
bool TextEncoding::isUTF7Encoding() const {
if (noExtendedTextEncodingNameUsed())
return false;
return *this == UTF7Encoding();
}
const TextEncoding& TextEncoding::closestByteBasedEquivalent() const {
if (isNonByteBasedEncoding())
return UTF8Encoding();
return *this;
}
// HTML5 specifies that UTF-8 be used in form submission when a form is
// is a part of a document in UTF-16 probably because UTF-16 is not a
// byte-based encoding and can contain 0x00. By extension, the same
// should be done for UTF-32. In case of UTF-7, it is a byte-based encoding,
// but it's fraught with problems and we'd rather steer clear of it.
const TextEncoding& TextEncoding::encodingForFormSubmission() const {
if (isNonByteBasedEncoding() || isUTF7Encoding())
return UTF8Encoding();
return *this;
}
const TextEncoding& ASCIIEncoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalASCIIEncoding,
new TextEncoding("ASCII"));
return globalASCIIEncoding;
}
const TextEncoding& Latin1Encoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalLatin1Encoding,
new TextEncoding("latin1"));
return globalLatin1Encoding;
}
const TextEncoding& UTF16BigEndianEncoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding,
globalUTF16BigEndianEncoding,
new TextEncoding("UTF-16BE"));
return globalUTF16BigEndianEncoding;
}
const TextEncoding& UTF16LittleEndianEncoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding,
globalUTF16LittleEndianEncoding,
new TextEncoding("UTF-16LE"));
return globalUTF16LittleEndianEncoding;
}
// UTF-32 is UTF-32LE with an implicit BOM.
const TextEncoding& UTF32Encoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF32Encoding,
new TextEncoding("UTF-32"));
return globalUTF32Encoding;
}
const TextEncoding& UTF32BigEndianEncoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding,
globalUTF32BigEndianEncoding,
new TextEncoding("UTF-32BE"));
return globalUTF32BigEndianEncoding;
}
const TextEncoding& UTF32LittleEndianEncoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding,
globalUTF32LittleEndianEncoding,
new TextEncoding("UTF-32LE"));
return globalUTF32LittleEndianEncoding;
}
const TextEncoding& UTF8Encoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF8Encoding,
new TextEncoding("UTF-8"));
ASSERT(globalUTF8Encoding.isValid());
return globalUTF8Encoding;
}
const TextEncoding& WindowsLatin1Encoding() {
DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding,
globalWindowsLatin1Encoding,
new TextEncoding("WinLatin1"));
return globalWindowsLatin1Encoding;
}
} // namespace WTF