blob: 3226a568d43a7b702dd7b77a7ba1794b7af0fec8 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/renderer/speech_recognition_dispatcher.h"
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <utility>
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "content/public/common/service_names.mojom.h"
#include "content/public/renderer/render_thread.h"
#include "content/renderer/render_frame_impl.h"
#include "services/service_manager/public/cpp/connector.h"
#include "third_party/blink/public/platform/web_string.h"
#include "third_party/blink/public/platform/web_vector.h"
#include "third_party/blink/public/web/web_speech_grammar.h"
#include "third_party/blink/public/web/web_speech_recognition_params.h"
#include "third_party/blink/public/web/web_speech_recognition_result.h"
using blink::WebVector;
using blink::WebString;
using blink::WebSpeechGrammar;
using blink::WebSpeechRecognitionHandle;
using blink::WebSpeechRecognitionResult;
using blink::WebSpeechRecognitionParams;
using blink::WebSpeechRecognizerClient;
namespace content {
SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
RenderFrame* render_frame)
: RenderFrameObserver(render_frame) {}
SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() = default;
void SpeechRecognitionDispatcher::OnDestruct() {
delete this;
}
void SpeechRecognitionDispatcher::WasHidden() {
#if defined(OS_ANDROID)
for (const auto& it : session_map_) {
it.second->Abort();
}
#endif
}
void SpeechRecognitionDispatcher::Start(
const WebSpeechRecognitionHandle& handle,
const WebSpeechRecognitionParams& params,
const WebSpeechRecognizerClient& recognizer_client) {
DCHECK(recognizer_client_.IsNull() ||
recognizer_client_ == recognizer_client);
recognizer_client_ = recognizer_client;
blink::mojom::StartSpeechRecognitionRequestParamsPtr msg_params =
blink::mojom::StartSpeechRecognitionRequestParams::New();
for (const WebSpeechGrammar& grammar : params.Grammars()) {
msg_params->grammars.push_back(blink::mojom::SpeechRecognitionGrammar::New(
grammar.Src(), grammar.Weight()));
}
msg_params->language = params.Language().Utf8();
msg_params->max_hypotheses = static_cast<uint32_t>(params.MaxAlternatives());
msg_params->continuous = params.Continuous();
msg_params->interim_results = params.InterimResults();
msg_params->origin = params.Origin();
blink::mojom::SpeechRecognitionSessionClientPtrInfo client_ptr_info;
blink::mojom::SpeechRecognitionSessionClientRequest client_request =
mojo::MakeRequest(&client_ptr_info);
bindings_.AddBinding(std::make_unique<SpeechRecognitionSessionClientImpl>(
this, handle, recognizer_client_),
std::move(client_request));
blink::mojom::SpeechRecognitionSessionPtr session_client;
blink::mojom::SpeechRecognitionSessionRequest request =
mojo::MakeRequest(&session_client);
AddHandle(handle, std::move(session_client));
msg_params->client = std::move(client_ptr_info);
msg_params->session_request = std::move(request);
GetSpeechRecognitionHost().Start(std::move(msg_params));
}
void SpeechRecognitionDispatcher::Stop(
const WebSpeechRecognitionHandle& handle,
const WebSpeechRecognizerClient& recognizer_client) {
// Ignore a |stop| issued without a matching |start|.
if (recognizer_client_ != recognizer_client || !HandleExists(handle))
return;
GetSession(handle)->StopCapture();
}
void SpeechRecognitionDispatcher::Abort(
const WebSpeechRecognitionHandle& handle,
const WebSpeechRecognizerClient& recognizer_client) {
// Ignore an |abort| issued without a matching |start|.
if (recognizer_client_ != recognizer_client || !HandleExists(handle))
return;
GetSession(handle)->Abort();
}
static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode(
blink::mojom::SpeechRecognitionErrorCode e) {
switch (e) {
case blink::mojom::SpeechRecognitionErrorCode::kNone:
NOTREACHED();
return WebSpeechRecognizerClient::kOtherError;
case blink::mojom::SpeechRecognitionErrorCode::kNoSpeech:
return WebSpeechRecognizerClient::kNoSpeechError;
case blink::mojom::SpeechRecognitionErrorCode::kAborted:
return WebSpeechRecognizerClient::kAbortedError;
case blink::mojom::SpeechRecognitionErrorCode::kAudioCapture:
return WebSpeechRecognizerClient::kAudioCaptureError;
case blink::mojom::SpeechRecognitionErrorCode::kNetwork:
return WebSpeechRecognizerClient::kNetworkError;
case blink::mojom::SpeechRecognitionErrorCode::kNotAllowed:
return WebSpeechRecognizerClient::kNotAllowedError;
case blink::mojom::SpeechRecognitionErrorCode::kServiceNotAllowed:
return WebSpeechRecognizerClient::kServiceNotAllowedError;
case blink::mojom::SpeechRecognitionErrorCode::kBadGrammar:
return WebSpeechRecognizerClient::kBadGrammarError;
case blink::mojom::SpeechRecognitionErrorCode::kLanguageNotSupported:
return WebSpeechRecognizerClient::kLanguageNotSupportedError;
case blink::mojom::SpeechRecognitionErrorCode::kNoMatch:
NOTREACHED();
return WebSpeechRecognizerClient::kOtherError;
}
NOTREACHED();
return WebSpeechRecognizerClient::kOtherError;
}
void SpeechRecognitionDispatcher::AddHandle(
const blink::WebSpeechRecognitionHandle& handle,
blink::mojom::SpeechRecognitionSessionPtr session) {
DCHECK(!HandleExists(handle));
session_map_[handle] = std::move(session);
}
bool SpeechRecognitionDispatcher::HandleExists(
const WebSpeechRecognitionHandle& handle) {
return session_map_.find(handle) != session_map_.end();
}
void SpeechRecognitionDispatcher::RemoveHandle(
const blink::WebSpeechRecognitionHandle& handle) {
session_map_.erase(handle);
}
blink::mojom::SpeechRecognitionSession* SpeechRecognitionDispatcher::GetSession(
const blink::WebSpeechRecognitionHandle& handle) {
DCHECK(HandleExists(handle));
return session_map_[handle].get();
}
blink::mojom::SpeechRecognizer&
SpeechRecognitionDispatcher::GetSpeechRecognitionHost() {
if (!speech_recognition_host_) {
render_frame()->GetRemoteInterfaces()->GetInterface(
mojo::MakeRequest(&speech_recognition_host_));
}
return *speech_recognition_host_;
}
// ------------ SpeechRecognitionSessionClientImpl
// ------------------------------------
SpeechRecognitionSessionClientImpl::SpeechRecognitionSessionClientImpl(
SpeechRecognitionDispatcher* dispatcher,
const blink::WebSpeechRecognitionHandle& handle,
const blink::WebSpeechRecognizerClient& client)
: parent_dispatcher_(dispatcher), handle_(handle), web_client_(client) {}
void SpeechRecognitionSessionClientImpl::Started() {
web_client_.DidStart(handle_);
}
void SpeechRecognitionSessionClientImpl::AudioStarted() {
web_client_.DidStartAudio(handle_);
}
void SpeechRecognitionSessionClientImpl::SoundStarted() {
web_client_.DidStartSound(handle_);
}
void SpeechRecognitionSessionClientImpl::SoundEnded() {
web_client_.DidEndSound(handle_);
}
void SpeechRecognitionSessionClientImpl::AudioEnded() {
web_client_.DidEndAudio(handle_);
}
void SpeechRecognitionSessionClientImpl::ErrorOccurred(
const blink::mojom::SpeechRecognitionErrorPtr error) {
if (error->code == blink::mojom::SpeechRecognitionErrorCode::kNoMatch) {
web_client_.DidReceiveNoMatch(handle_, WebSpeechRecognitionResult());
} else {
web_client_.DidReceiveError(handle_,
WebString(), // TODO(primiano): message?
WebKitErrorCode(error->code));
}
}
void SpeechRecognitionSessionClientImpl::Ended() {
parent_dispatcher_->RemoveHandle(handle_);
web_client_.DidEnd(handle_);
}
void SpeechRecognitionSessionClientImpl::ResultRetrieved(
std::vector<blink::mojom::SpeechRecognitionResultPtr> results) {
size_t provisional_count =
std::count_if(results.begin(), results.end(),
[](const blink::mojom::SpeechRecognitionResultPtr& result) {
return result->is_provisional;
});
WebVector<WebSpeechRecognitionResult> provisional(provisional_count);
WebVector<WebSpeechRecognitionResult> final(results.size() -
provisional_count);
int provisional_index = 0, final_index = 0;
for (const blink::mojom::SpeechRecognitionResultPtr& result : results) {
WebSpeechRecognitionResult* webkit_result =
result->is_provisional ? &provisional[provisional_index++]
: &final[final_index++];
const size_t num_hypotheses = result->hypotheses.size();
WebVector<WebString> transcripts(num_hypotheses);
WebVector<float> confidences(num_hypotheses);
for (size_t i = 0; i < num_hypotheses; ++i) {
transcripts[i] = WebString::FromUTF16(result->hypotheses[i]->utterance);
confidences[i] = static_cast<float>(result->hypotheses[i]->confidence);
}
webkit_result->Assign(transcripts, confidences, !result->is_provisional);
}
web_client_.DidReceiveResults(handle_, final, provisional);
}
} // namespace content