| // Copyright 2018 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "third_party/blink/renderer/modules/encoding/text_encoder_stream.h" |
| |
| #include <stdint.h> |
| #include <string.h> |
| |
| #include <memory> |
| #include <optional> |
| #include <utility> |
| |
| #include "third_party/blink/renderer/bindings/core/v8/script_promise.h" |
| #include "third_party/blink/renderer/bindings/core/v8/to_v8_traits.h" |
| #include "third_party/blink/renderer/bindings/core/v8/v8_string_resource.h" |
| #include "third_party/blink/renderer/core/streams/transform_stream_default_controller.h" |
| #include "third_party/blink/renderer/core/streams/transform_stream_transformer.h" |
| #include "third_party/blink/renderer/core/typed_arrays/dom_typed_array.h" |
| #include "third_party/blink/renderer/platform/bindings/exception_state.h" |
| #include "third_party/blink/renderer/platform/bindings/script_state.h" |
| #include "third_party/blink/renderer/platform/wtf/text/text_codec.h" |
| #include "third_party/blink/renderer/platform/wtf/text/text_encoding.h" |
| #include "third_party/blink/renderer/platform/wtf/text/text_encoding_registry.h" |
| #include "v8/include/v8.h" |
| |
| namespace blink { |
| |
| class TextEncoderStream::Transformer final : public TransformStreamTransformer { |
| public: |
| explicit Transformer(ScriptState* script_state) |
| : encoder_(NewTextCodec(Utf8Encoding())), script_state_(script_state) {} |
| |
| Transformer(const Transformer&) = delete; |
| Transformer& operator=(const Transformer&) = delete; |
| |
| // Implements the "encode and enqueue a chunk" algorithm. For efficiency, only |
| // the characters at the end of chunks are special-cased. |
| ScriptPromise<IDLUndefined> Transform( |
| v8::Local<v8::Value> chunk, |
| TransformStreamDefaultController* controller, |
| ExceptionState& exception_state) override { |
| V8StringResource<> input_resource{script_state_->GetIsolate(), chunk}; |
| if (!input_resource.Prepare(exception_state)) { |
| return EmptyPromise(); |
| } |
| const String input = input_resource; |
| if (input.empty()) |
| return ToResolvedUndefinedPromise(script_state_.Get()); |
| |
| const std::optional<UChar> high_surrogate = pending_high_surrogate_; |
| pending_high_surrogate_ = std::nullopt; |
| std::string prefix; |
| std::string result; |
| if (input.Is8Bit()) { |
| if (high_surrogate.has_value()) { |
| // An 8-bit code unit can never be part of an astral character, so no |
| // check is needed. |
| prefix = ReplacementCharacterInUtf8(); |
| } |
| result = |
| encoder_->Encode(input.Span8(), UnencodableHandling::kNoUnencodables); |
| } else { |
| bool have_output = |
| Encode16BitString(input, high_surrogate, &prefix, &result); |
| if (!have_output) |
| return ToResolvedUndefinedPromise(script_state_.Get()); |
| } |
| |
| DOMUint8Array* array = |
| CreateDOMUint8ArrayFromTwoStdStringsConcatenated(prefix, result); |
| controller->enqueue(script_state_, ScriptValue::From(script_state_, array), |
| exception_state); |
| |
| return ToResolvedUndefinedPromise(script_state_.Get()); |
| } |
| |
| // Implements the "encode and flush" algorithm. |
| ScriptPromise<IDLUndefined> Flush( |
| TransformStreamDefaultController* controller, |
| ExceptionState& exception_state) override { |
| if (!pending_high_surrogate_.has_value()) |
| return ToResolvedUndefinedPromise(script_state_.Get()); |
| |
| const std::string replacement_character = ReplacementCharacterInUtf8(); |
| controller->enqueue( |
| script_state_, |
| ScriptValue::From( |
| script_state_, |
| DOMUint8Array::Create(base::as_byte_span(replacement_character))), |
| exception_state); |
| |
| return ToResolvedUndefinedPromise(script_state_.Get()); |
| } |
| |
| ScriptState* GetScriptState() override { return script_state_.Get(); } |
| |
| void Trace(Visitor* visitor) const override { |
| visitor->Trace(script_state_); |
| TransformStreamTransformer::Trace(visitor); |
| } |
| |
| private: |
| static std::string ReplacementCharacterInUtf8() { return "\ufffd"; } |
| |
| static DOMUint8Array* CreateDOMUint8ArrayFromTwoStdStringsConcatenated( |
| const std::string& string1, |
| const std::string& string2) { |
| const wtf_size_t length1 = static_cast<wtf_size_t>(string1.length()); |
| const wtf_size_t length2 = static_cast<wtf_size_t>(string2.length()); |
| DOMUint8Array* const array = DOMUint8Array::Create(length1 + length2); |
| auto [string1_span, string2_span] = array->ByteSpan().split_at(length1); |
| string1_span.copy_from(base::as_byte_span(string1)); |
| string2_span.copy_from(base::as_byte_span(string2)); |
| return array; |
| } |
| |
| // Returns true if either |*prefix| or |*result| have been set to a non-empty |
| // value. |
| bool Encode16BitString(const String& input, |
| std::optional<UChar> high_surrogate, |
| std::string* prefix, |
| std::string* result) { |
| base::span<const UChar> input_span = input.Span16(); |
| DCHECK(!input_span.empty()); |
| if (high_surrogate.has_value()) { |
| const UChar code_unit = input_span.front(); |
| if (code_unit >= 0xDC00 && code_unit <= 0xDFFF) { |
| const UChar astral_character[2] = {high_surrogate.value(), code_unit}; |
| // Third argument is ignored, as above. |
| *prefix = encoder_->Encode(base::span(astral_character), |
| UnencodableHandling::kNoUnencodables); |
| input_span = input_span.subspan<1u>(); |
| if (input_span.empty()) { |
| return true; |
| } |
| } else { |
| *prefix = ReplacementCharacterInUtf8(); |
| } |
| } |
| |
| const UChar final_token = input_span.back(); |
| if (final_token >= 0xD800 && final_token <= 0xDBFF) { |
| pending_high_surrogate_ = final_token; |
| input_span = input_span.first(input_span.size() - 1u); |
| if (input_span.empty()) { |
| return prefix->length() != 0; |
| } |
| } |
| |
| // Third argument is ignored, as above. |
| *result = encoder_->Encode(input_span, |
| UnencodableHandling::kEntitiesForUnencodables); |
| DCHECK_NE(result->length(), 0u); |
| return true; |
| } |
| |
| std::unique_ptr<TextCodec> encoder_; |
| // There is no danger of ScriptState leaking across worlds because a |
| // TextEncoderStream can only be accessed from the world that created it. |
| Member<ScriptState> script_state_; |
| std::optional<UChar> pending_high_surrogate_; |
| }; |
| |
| TextEncoderStream* TextEncoderStream::Create(ScriptState* script_state, |
| ExceptionState& exception_state) { |
| return MakeGarbageCollected<TextEncoderStream>(script_state, exception_state); |
| } |
| |
| TextEncoderStream::~TextEncoderStream() = default; |
| |
| String TextEncoderStream::encoding() const { |
| return "utf-8"; |
| } |
| |
| ReadableStream* TextEncoderStream::readable() const { |
| return transform_->Readable(); |
| } |
| |
| WritableStream* TextEncoderStream::writable() const { |
| return transform_->Writable(); |
| } |
| |
| void TextEncoderStream::Trace(Visitor* visitor) const { |
| visitor->Trace(transform_); |
| ScriptWrappable::Trace(visitor); |
| } |
| |
| TextEncoderStream::TextEncoderStream(ScriptState* script_state, |
| ExceptionState& exception_state) |
| : transform_(TransformStream::Create( |
| script_state, |
| MakeGarbageCollected<Transformer>(script_state), |
| exception_state)) {} |
| |
| } // namespace blink |