blob: 3bb1f982f6361287a82e62073a7ad53d28317b25 [file] [log] [blame]
/*
* Copyright (C) 2012 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/modules/speech/speech_recognition.h"
#include <algorithm>
#include "build/build_config.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom-blink.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom-blink.h"
#include "third_party/blink/renderer/core/frame/local_dom_window.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/page/page.h"
#include "third_party/blink/renderer/modules/speech/speech_recognition_controller.h"
#include "third_party/blink/renderer/modules/speech/speech_recognition_error_event.h"
#include "third_party/blink/renderer/modules/speech/speech_recognition_event.h"
#include "third_party/blink/renderer/platform/bindings/exception_state.h"
#include "third_party/blink/renderer/platform/heap/garbage_collected.h"
namespace blink {
SpeechRecognition* SpeechRecognition::Create(ExecutionContext* context) {
return MakeGarbageCollected<SpeechRecognition>(To<LocalDOMWindow>(context));
}
void SpeechRecognition::start(ExceptionState& exception_state) {
// https://wicg.github.io/nav-speculation/prerendering.html#web-speech-patch
// If this is called in prerendering, it should be deferred.
if (DomWindow() && DomWindow()->document()->IsPrerendering()) {
DomWindow()->document()->AddPostPrerenderingActivationStep(
WTF::BindOnce(&SpeechRecognition::StartInternal,
WrapWeakPersistent(this), /*exception_state=*/nullptr));
return;
}
StartInternal(&exception_state);
}
void SpeechRecognition::stopFunction() {
// https://wicg.github.io/nav-speculation/prerendering.html#web-speech-patch
// If this is called in prerendering, it should be deferred.
if (DomWindow() && DomWindow()->document()->IsPrerendering()) {
DomWindow()->document()->AddPostPrerenderingActivationStep(WTF::BindOnce(
&SpeechRecognition::stopFunction, WrapWeakPersistent(this)));
return;
}
if (!controller_)
return;
if (started_ && !stopping_) {
stopping_ = true;
session_->StopCapture();
}
}
void SpeechRecognition::abort() {
// https://wicg.github.io/nav-speculation/prerendering.html#web-speech-patch
// If this is called in prerendering, it should be deferred.
if (DomWindow() && DomWindow()->document()->IsPrerendering()) {
DomWindow()->document()->AddPostPrerenderingActivationStep(
WTF::BindOnce(&SpeechRecognition::abort, WrapWeakPersistent(this)));
return;
}
if (!controller_)
return;
if (started_ && !stopping_) {
stopping_ = true;
session_->Abort();
}
}
void SpeechRecognition::ResultRetrieved(
WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
auto* it = std::stable_partition(
results.begin(), results.end(),
[](const auto& result) { return !result->is_provisional; });
wtf_size_t provisional_count = static_cast<wtf_size_t>(results.end() - it);
// Add the new results to the previous final results.
HeapVector<Member<SpeechRecognitionResult>> aggregated_results =
std::move(final_results_);
aggregated_results.reserve(aggregated_results.size() + results.size());
for (const auto& result : results) {
HeapVector<Member<SpeechRecognitionAlternative>> alternatives;
alternatives.ReserveInitialCapacity(result->hypotheses.size());
for (const auto& hypothesis : result->hypotheses) {
alternatives.push_back(MakeGarbageCollected<SpeechRecognitionAlternative>(
hypothesis->utterance, hypothesis->confidence));
}
aggregated_results.push_back(SpeechRecognitionResult::Create(
std::move(alternatives), !result->is_provisional));
}
// |aggregated_results| now contains the following (in the given order):
//
// (1) previous final results from |final_results_|
// (2) new final results from |results|
// (3) new provisional results from |results|
// |final_results_| = (1) + (2).
HeapVector<Member<SpeechRecognitionResult>> new_final_results;
new_final_results.ReserveInitialCapacity(aggregated_results.size() -
provisional_count);
new_final_results.AppendRange(aggregated_results.begin(),
aggregated_results.end() - provisional_count);
final_results_ = std::move(new_final_results);
// We dispatch an event with (1) + (2) + (3).
DispatchEvent(*SpeechRecognitionEvent::CreateResult(
aggregated_results.size() - results.size(),
std::move(aggregated_results)));
}
void SpeechRecognition::ErrorOccurred(
mojom::blink::SpeechRecognitionErrorPtr error) {
if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
} else {
// TODO(primiano): message?
DispatchEvent(*SpeechRecognitionErrorEvent::Create(error->code, String()));
}
}
void SpeechRecognition::Started() {
DispatchEvent(*Event::Create(event_type_names::kStart));
}
void SpeechRecognition::AudioStarted() {
DispatchEvent(*Event::Create(event_type_names::kAudiostart));
}
void SpeechRecognition::SoundStarted() {
DispatchEvent(*Event::Create(event_type_names::kSoundstart));
DispatchEvent(*Event::Create(event_type_names::kSpeechstart));
}
void SpeechRecognition::SoundEnded() {
DispatchEvent(*Event::Create(event_type_names::kSpeechend));
DispatchEvent(*Event::Create(event_type_names::kSoundend));
}
void SpeechRecognition::AudioEnded() {
DispatchEvent(*Event::Create(event_type_names::kAudioend));
}
void SpeechRecognition::Ended() {
started_ = false;
stopping_ = false;
session_.reset();
receiver_.reset();
DispatchEvent(*Event::Create(event_type_names::kEnd));
}
const AtomicString& SpeechRecognition::InterfaceName() const {
return event_target_names::kSpeechRecognition;
}
ExecutionContext* SpeechRecognition::GetExecutionContext() const {
return ExecutionContextLifecycleObserver::GetExecutionContext();
}
void SpeechRecognition::ContextDestroyed() {
controller_ = nullptr;
}
bool SpeechRecognition::HasPendingActivity() const {
return started_;
}
void SpeechRecognition::PageVisibilityChanged() {
#if BUILDFLAG(IS_ANDROID)
if (!GetPage()->IsPageVisible())
abort();
#endif
}
void SpeechRecognition::OnConnectionError() {
ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
mojom::blink::SpeechRecognitionErrorCode::kNetwork,
mojom::blink::SpeechAudioErrorDetails::kNone));
Ended();
}
void SpeechRecognition::StartInternal(ExceptionState* exception_state) {
if (!controller_ || !GetExecutionContext())
return;
if (started_) {
// https://wicg.github.io/speech-api/#dom-speechrecognition-start
// The spec says that if the start method is called on an already started
// object (that is, start has previously been called, and no error or end
// event has fired on the object), the user agent must throw an
// "InvalidStateError" DOMException and ignore the call. But, if it's called
// after prerendering activation, `exception_state` is null since it's
// STACK_ALLOCATED and it can't be passed.
if (exception_state) {
exception_state->ThrowDOMException(DOMExceptionCode::kInvalidStateError,
"recognition has already started.");
}
return;
}
final_results_.clear();
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient>
session_client;
// See https://bit.ly/2S0zRAS for task types.
receiver_.Bind(
session_client.InitWithNewPipeAndPassReceiver(),
GetExecutionContext()->GetTaskRunner(TaskType::kMiscPlatformAPI));
receiver_.set_disconnect_handler(WTF::BindOnce(
&SpeechRecognition::OnConnectionError, WrapWeakPersistent(this)));
controller_->Start(
session_.BindNewPipeAndPassReceiver(
GetExecutionContext()->GetTaskRunner(TaskType::kMiscPlatformAPI)),
std::move(session_client), *grammars_, lang_, continuous_,
interim_results_, max_alternatives_);
started_ = true;
}
SpeechRecognition::SpeechRecognition(LocalDOMWindow* window)
: ExecutionContextLifecycleObserver(window),
PageVisibilityObserver(window->GetFrame() ? window->GetFrame()->GetPage()
: nullptr),
grammars_(SpeechGrammarList::Create()), // FIXME: The spec is not clear
// on the default value for the
// grammars attribute.
continuous_(false),
interim_results_(false),
max_alternatives_(1),
controller_(SpeechRecognitionController::From(*window)),
started_(false),
stopping_(false),
receiver_(this, window),
session_(window) {}
SpeechRecognition::~SpeechRecognition() = default;
void SpeechRecognition::Trace(Visitor* visitor) const {
visitor->Trace(grammars_);
visitor->Trace(controller_);
visitor->Trace(final_results_);
visitor->Trace(receiver_);
visitor->Trace(session_);
EventTargetWithInlineData::Trace(visitor);
ExecutionContextLifecycleObserver::Trace(visitor);
PageVisibilityObserver::Trace(visitor);
}
} // namespace blink