blob: 9782f78c3b2de2a93a1f9783460ebaf8a77afcd4 [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/vr/speech_recognizer.h"
#include "base/bind.h"
#include "base/logging.h"
#include "base/memory/ref_counted.h"
#include "base/run_loop.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/post_task.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/timer/mock_timer.h"
#include "chrome/browser/vr/browser_ui_interface.h"
#include "chrome/browser/vr/test/mock_browser_ui_interface.h"
#include "content/public/browser/browser_task_traits.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/test/test_url_loader_factory.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace vr {
static const int kTestSessionId = 1;
const char kTestInterimResult[] = "kitten";
const char kTestResult[] = "cat";
const char kTestResultMultiple[] = "cat video";
enum FakeRecognitionEvent {
RECOGNITION_START = 0,
RECOGNITION_END,
NETWORK_ERROR,
SOUND_START,
SOUND_END,
AUDIO_START,
AUDIO_END,
INTERIM_RESULT,
FINAL_RESULT,
MULTIPLE_FINAL_RESULT,
};
// A SharedURLLoaderFactory that hangs.
class FakeSharedURLLoaderFactory : public network::SharedURLLoaderFactory {
public:
FakeSharedURLLoaderFactory() {}
// network::mojom::URLLoaderFactory:
void Clone(network::mojom::URLLoaderFactoryRequest request) override {
test_url_loader_factory_.Clone(std::move(request));
}
void CreateLoaderAndStart(network::mojom::URLLoaderRequest loader,
int32_t routing_id,
int32_t request_id,
uint32_t options,
const network::ResourceRequest& request,
network::mojom::URLLoaderClientPtr client,
const net::MutableNetworkTrafficAnnotationTag&
traffic_annotation) override {
test_url_loader_factory_.CreateLoaderAndStart(
std::move(loader), routing_id, request_id, options, request,
std::move(client), traffic_annotation);
}
// network::SharedURLLoaderFactory:
std::unique_ptr<network::SharedURLLoaderFactoryInfo> Clone() override {
NOTREACHED();
return nullptr;
}
private:
friend class base::RefCounted<FakeSharedURLLoaderFactory>;
~FakeSharedURLLoaderFactory() override {}
network::TestURLLoaderFactory test_url_loader_factory_;
DISALLOW_COPY_AND_ASSIGN(FakeSharedURLLoaderFactory);
};
// Returns a SharedURLLoaderFactory that hangs.
class FakeSharedURLLoaderFactoryInfo
: public network::SharedURLLoaderFactoryInfo {
public:
FakeSharedURLLoaderFactoryInfo() {}
~FakeSharedURLLoaderFactoryInfo() override {}
protected:
friend class network::SharedURLLoaderFactory;
// network::SharedURLLoaderFactoryInfo:
scoped_refptr<network::SharedURLLoaderFactory> CreateFactory() override {
return base::MakeRefCounted<FakeSharedURLLoaderFactory>();
}
private:
DISALLOW_COPY_AND_ASSIGN(FakeSharedURLLoaderFactoryInfo);
};
class FakeSpeechRecognitionManager : public content::SpeechRecognitionManager {
public:
FakeSpeechRecognitionManager() {}
~FakeSpeechRecognitionManager() override {}
// SpeechRecognitionManager methods.
int CreateSession(
const content::SpeechRecognitionSessionConfig& config) override {
session_ctx_ = config.initial_context;
session_config_ = config;
session_id_ = kTestSessionId;
return session_id_;
}
void StartSession(int session_id) override {}
void AbortSession(int session_id) override {
DCHECK(session_id_ == session_id);
session_id_ = 0;
}
void AbortAllSessionsForRenderFrame(int render_process_id,
int render_frame_id) override {}
void StopAudioCaptureForSession(int session_id) override {}
const content::SpeechRecognitionSessionConfig& GetSessionConfig(
int session_id) const override {
DCHECK(session_id_ == session_id);
return session_config_;
}
content::SpeechRecognitionSessionContext GetSessionContext(
int session_id) const override {
DCHECK(session_id_ == session_id);
return session_ctx_;
}
void FakeSpeechRecognitionEvent(FakeRecognitionEvent event) {
if (!content::BrowserThread::CurrentlyOn(content::BrowserThread::IO)) {
base::PostTaskWithTraits(
FROM_HERE, {content::BrowserThread::IO},
base::BindOnce(
&FakeSpeechRecognitionManager::FakeSpeechRecognitionEvent,
base::Unretained(this), event));
return;
}
DCHECK(GetActiveListener());
blink::mojom::SpeechRecognitionError error(
blink::mojom::SpeechRecognitionErrorCode::kNetwork,
blink::mojom::SpeechAudioErrorDetails::kNone);
switch (event) {
case RECOGNITION_START:
GetActiveListener()->OnRecognitionStart(kTestSessionId);
break;
case RECOGNITION_END:
GetActiveListener()->OnRecognitionEnd(kTestSessionId);
break;
case NETWORK_ERROR:
GetActiveListener()->OnRecognitionError(kTestSessionId, error);
break;
case SOUND_START:
GetActiveListener()->OnSoundStart(kTestSessionId);
break;
case INTERIM_RESULT:
SendFakeInterimResults();
break;
case FINAL_RESULT:
SendFakeFinalResults();
break;
case MULTIPLE_FINAL_RESULT:
SendFakeMultipleFinalResults();
break;
default:
NOTREACHED();
}
}
void SendFakeInterimResults() {
if (!session_id_)
return;
SendRecognitionResult(kTestInterimResult, true);
}
void SendFakeFinalResults() {
if (!session_id_)
return;
SendRecognitionResult(kTestResult, false);
FakeSpeechRecognitionEvent(RECOGNITION_END);
session_id_ = 0;
}
void SendFakeMultipleFinalResults() {
if (!session_id_)
return;
SendRecognitionResult(kTestResult, false);
SendRecognitionResult(kTestResultMultiple, false);
FakeSpeechRecognitionEvent(RECOGNITION_END);
session_id_ = 0;
}
private:
void SendRecognitionResult(const char* string, bool is_provisional) {
content::SpeechRecognitionEventListener* listener = GetActiveListener();
if (!listener)
return;
listener->OnAudioStart(session_id_);
listener->OnAudioEnd(session_id_);
blink::mojom::SpeechRecognitionResultPtr result =
blink::mojom::SpeechRecognitionResult::New();
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
base::ASCIIToUTF16(string), 1.0));
result->is_provisional = is_provisional;
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(std::move(result));
listener->OnRecognitionResults(session_id_, results);
}
content::SpeechRecognitionEventListener* GetActiveListener() {
DCHECK(session_id_ != 0);
return session_config_.event_listener.get();
}
int session_id_ = 0;
content::SpeechRecognitionSessionContext session_ctx_;
content::SpeechRecognitionSessionConfig session_config_;
DISALLOW_COPY_AND_ASSIGN(FakeSpeechRecognitionManager);
};
class MockVoiceSearchDelegate : public VoiceResultDelegate {
public:
MockVoiceSearchDelegate() = default;
~MockVoiceSearchDelegate() override = default;
MOCK_METHOD1(OnVoiceResults, void(const base::string16& result));
private:
DISALLOW_COPY_AND_ASSIGN(MockVoiceSearchDelegate);
};
class SpeechRecognizerTest : public testing::Test {
public:
SpeechRecognizerTest()
: fake_speech_recognition_manager_(new FakeSpeechRecognitionManager()),
ui_(new MockBrowserUiInterface),
delegate_(new MockVoiceSearchDelegate),
speech_recognizer_(new SpeechRecognizer(
delegate_.get(),
ui_.get(),
std::make_unique<FakeSharedURLLoaderFactoryInfo>(),
"en" /* accept_language */,
"en" /* locale */)) {
SpeechRecognizer::SetManagerForTest(fake_speech_recognition_manager_.get());
}
~SpeechRecognizerTest() override {
SpeechRecognizer::SetManagerForTest(nullptr);
}
protected:
std::unique_ptr<FakeSpeechRecognitionManager>
fake_speech_recognition_manager_;
std::unique_ptr<MockBrowserUiInterface> ui_;
std::unique_ptr<MockVoiceSearchDelegate> delegate_;
std::unique_ptr<SpeechRecognizer> speech_recognizer_;
private:
DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerTest);
};
TEST_F(SpeechRecognizerTest, ReceivedCorrectSpeechResult) {
testing::Sequence s;
EXPECT_CALL(*ui_, SetSpeechRecognitionEnabled(true)).InSequence(s);
EXPECT_CALL(*ui_, SetRecognitionResult(base::ASCIIToUTF16(kTestResult)))
.InSequence(s);
EXPECT_CALL(*delegate_, OnVoiceResults(base::ASCIIToUTF16(kTestResult)))
.Times(1)
.InSequence(s);
EXPECT_CALL(*ui_, SetSpeechRecognitionEnabled(false)).InSequence(s);
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
// This should not trigger SetRecognitionResult as we don't show interim
// result.
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(INTERIM_RESULT);
base::RunLoop().RunUntilIdle();
// This should trigger SetRecognitionResult as we received final result.
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(FINAL_RESULT);
base::RunLoop().RunUntilIdle();
}
// Test for crbug.com/785051. It is possible that we receive multiple final
// results in one recognition session. We should only navigate once in this
// case.
TEST_F(SpeechRecognizerTest, MultipleResultsTriggerNavigation) {
testing::Sequence s;
EXPECT_CALL(*ui_, SetSpeechRecognitionEnabled(true)).InSequence(s);
EXPECT_CALL(*ui_,
SetRecognitionResult(base::ASCIIToUTF16(kTestResultMultiple)))
.InSequence(s);
EXPECT_CALL(*delegate_,
OnVoiceResults(base::ASCIIToUTF16(kTestResultMultiple)))
.Times(1)
.InSequence(s);
EXPECT_CALL(*ui_, SetSpeechRecognitionEnabled(false)).InSequence(s);
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(
MULTIPLE_FINAL_RESULT);
base::RunLoop().RunUntilIdle();
}
TEST_F(SpeechRecognizerTest, ReceivedSpeechRecognitionStates) {
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
testing::Sequence s;
EXPECT_CALL(*ui_,
OnSpeechRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING))
.InSequence(s);
EXPECT_CALL(*ui_,
OnSpeechRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR))
.InSequence(s);
EXPECT_CALL(*ui_, OnSpeechRecognitionStateChanged(SPEECH_RECOGNITION_END))
.InSequence(s);
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(
RECOGNITION_START);
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(NETWORK_ERROR);
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(RECOGNITION_END);
base::RunLoop().RunUntilIdle();
}
TEST_F(SpeechRecognizerTest, NoSoundTimeout) {
testing::Sequence s;
EXPECT_CALL(*ui_, SetSpeechRecognitionEnabled(true)).InSequence(s);
EXPECT_CALL(*ui_,
OnSpeechRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH))
.InSequence(s);
EXPECT_CALL(*ui_, OnSpeechRecognitionStateChanged(SPEECH_RECOGNITION_END))
.InSequence(s);
EXPECT_CALL(*ui_, SetSpeechRecognitionEnabled(false)).InSequence(s);
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
auto mock_timer = std::make_unique<base::MockOneShotTimer>();
base::MockOneShotTimer* timer_ptr = mock_timer.get();
speech_recognizer_->SetSpeechTimerForTest(std::move(mock_timer));
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(SOUND_START);
base::RunLoop().RunUntilIdle();
// This should trigger a SPEECH_RECOGNITION_READY state notification.
timer_ptr->Fire();
base::RunLoop().RunUntilIdle();
}
// This test that it is safe to reset speech_recognizer_ on UI thread after post
// a task to start speech recognition on IO thread.
TEST_F(SpeechRecognizerTest, SafeToResetAfterStart) {
EXPECT_CALL(*ui_,
OnSpeechRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING));
EXPECT_CALL(*ui_, SetRecognitionResult(base::ASCIIToUTF16(kTestResult)))
.Times(0);
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(
RECOGNITION_START);
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(FINAL_RESULT);
// Reset shouldn't crash the test.
speech_recognizer_.reset(nullptr);
base::RunLoop().RunUntilIdle();
}
// This test that calling start after stop should still work as expected.
TEST_F(SpeechRecognizerTest, RestartAfterStop) {
EXPECT_CALL(*ui_, SetRecognitionResult(base::ASCIIToUTF16(kTestResult)))
.Times(1);
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(FINAL_RESULT);
speech_recognizer_->Stop();
base::RunLoop().RunUntilIdle();
speech_recognizer_->Start();
base::RunLoop().RunUntilIdle();
fake_speech_recognition_manager_->FakeSpeechRecognitionEvent(FINAL_RESULT);
base::RunLoop().RunUntilIdle();
}
} // namespace vr