blob: 681ecf0c9a0be4b06dd5e75a763e61d609ea7815 [file] [log] [blame]
/*
* Copyright (C) 2012 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/modules/speech/speech_recognition.h"
#include <algorithm>
#include "build/build_config.h"
#include "third_party/blink/renderer/core/dom/document.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/modules/speech/speech_recognition_controller.h"
#include "third_party/blink/renderer/modules/speech/speech_recognition_error.h"
#include "third_party/blink/renderer/modules/speech/speech_recognition_event.h"
#include "third_party/blink/renderer/platform/bindings/exception_state.h"
namespace blink {
SpeechRecognition* SpeechRecognition::Create(ExecutionContext* context) {
Document& document = To<Document>(*context);
return MakeGarbageCollected<SpeechRecognition>(document.GetFrame(), context);
}
void SpeechRecognition::start(ExceptionState& exception_state) {
if (!controller_)
return;
if (started_) {
exception_state.ThrowDOMException(DOMExceptionCode::kInvalidStateError,
"recognition has already started.");
return;
}
final_results_.clear();
mojom::blink::SpeechRecognitionSessionClientPtrInfo session_client;
binding_.Bind(mojo::MakeRequest(&session_client),
GetExecutionContext()->GetInterfaceInvalidator());
binding_.set_connection_error_handler(WTF::Bind(
&SpeechRecognition::OnConnectionError, WrapWeakPersistent(this)));
mojom::blink::SpeechRecognitionSessionRequest session_request =
MakeRequest(&session_, GetExecutionContext()->GetInterfaceInvalidator());
controller_->Start(std::move(session_request), std::move(session_client),
*grammars_, lang_, continuous_, interim_results_,
max_alternatives_);
started_ = true;
}
void SpeechRecognition::stopFunction() {
if (!controller_)
return;
if (started_ && !stopping_) {
stopping_ = true;
session_->StopCapture();
}
}
void SpeechRecognition::abort() {
if (!controller_)
return;
if (started_ && !stopping_) {
stopping_ = true;
session_->Abort();
}
}
void SpeechRecognition::ResultRetrieved(
WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
auto* it = std::stable_partition(
results.begin(), results.end(),
[](const auto& result) { return !result->is_provisional; });
wtf_size_t provisional_count = static_cast<wtf_size_t>(results.end() - it);
// Add the new results to the previous final results.
HeapVector<Member<SpeechRecognitionResult>> aggregated_results =
std::move(final_results_);
aggregated_results.ReserveCapacity(aggregated_results.size() +
results.size());
for (const auto& result : results) {
HeapVector<Member<SpeechRecognitionAlternative>> alternatives;
alternatives.ReserveInitialCapacity(result->hypotheses.size());
for (const auto& hypothesis : result->hypotheses) {
alternatives.push_back(SpeechRecognitionAlternative::Create(
hypothesis->utterance, hypothesis->confidence));
}
aggregated_results.push_back(SpeechRecognitionResult::Create(
std::move(alternatives), !result->is_provisional));
}
// |aggregated_results| now contains the following (in the given order):
//
// (1) previous final results from |final_results_|
// (2) new final results from |results|
// (3) new provisional results from |results|
// |final_results_| = (1) + (2).
HeapVector<Member<SpeechRecognitionResult>> new_final_results;
new_final_results.ReserveInitialCapacity(aggregated_results.size() -
provisional_count);
new_final_results.AppendRange(aggregated_results.begin(),
aggregated_results.end() - provisional_count);
final_results_ = std::move(new_final_results);
// We dispatch an event with (1) + (2) + (3).
DispatchEvent(*SpeechRecognitionEvent::CreateResult(
aggregated_results.size() - results.size(),
std::move(aggregated_results)));
}
void SpeechRecognition::ErrorOccurred(
mojom::blink::SpeechRecognitionErrorPtr error) {
if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
} else {
// TODO(primiano): message?
DispatchEvent(*SpeechRecognitionError::Create(error->code, String()));
}
}
void SpeechRecognition::Started() {
DispatchEvent(*Event::Create(event_type_names::kStart));
}
void SpeechRecognition::AudioStarted() {
DispatchEvent(*Event::Create(event_type_names::kAudiostart));
}
void SpeechRecognition::SoundStarted() {
DispatchEvent(*Event::Create(event_type_names::kSoundstart));
DispatchEvent(*Event::Create(event_type_names::kSpeechstart));
}
void SpeechRecognition::SoundEnded() {
DispatchEvent(*Event::Create(event_type_names::kSpeechend));
DispatchEvent(*Event::Create(event_type_names::kSoundend));
}
void SpeechRecognition::AudioEnded() {
DispatchEvent(*Event::Create(event_type_names::kAudioend));
}
void SpeechRecognition::Ended() {
started_ = false;
stopping_ = false;
session_.reset();
binding_.Close();
DispatchEvent(*Event::Create(event_type_names::kEnd));
}
const AtomicString& SpeechRecognition::InterfaceName() const {
return event_target_names::kSpeechRecognition;
}
ExecutionContext* SpeechRecognition::GetExecutionContext() const {
return ContextLifecycleObserver::GetExecutionContext();
}
void SpeechRecognition::ContextDestroyed(ExecutionContext*) {
controller_ = nullptr;
}
bool SpeechRecognition::HasPendingActivity() const {
return started_;
}
void SpeechRecognition::PageVisibilityChanged() {
#if defined(OS_ANDROID)
if (!GetPage()->IsPageVisible())
abort();
#endif
}
void SpeechRecognition::OnConnectionError() {
ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
mojom::blink::SpeechRecognitionErrorCode::kNetwork,
mojom::blink::SpeechAudioErrorDetails::kNone));
Ended();
}
SpeechRecognition::SpeechRecognition(LocalFrame* frame,
ExecutionContext* context)
: ContextLifecycleObserver(context),
PageVisibilityObserver(frame ? frame->GetPage() : nullptr),
grammars_(SpeechGrammarList::Create()), // FIXME: The spec is not clear
// on the default value for the
// grammars attribute.
continuous_(false),
interim_results_(false),
max_alternatives_(1),
controller_(SpeechRecognitionController::From(frame)),
started_(false),
stopping_(false),
binding_(this) {}
SpeechRecognition::~SpeechRecognition() = default;
void SpeechRecognition::Trace(blink::Visitor* visitor) {
visitor->Trace(grammars_);
visitor->Trace(controller_);
visitor->Trace(final_results_);
EventTargetWithInlineData::Trace(visitor);
ContextLifecycleObserver::Trace(visitor);
PageVisibilityObserver::Trace(visitor);
}
} // namespace blink