blob: 2d514b74851b460169a2648aacb666b9339e8c54 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/ash/accessibility/dictation.h"
#include <memory>
#include "ash/accessibility/accessibility_controller_impl.h"
#include "ash/shell.h"
#include "base/strings/utf_string_conversions.h"
#include "base/timer/timer.h"
#include "chrome/browser/ash/accessibility/accessibility_manager.h"
#include "chrome/browser/speech/cros_speech_recognition_service_factory.h"
#include "chrome/browser/speech/fake_speech_recognition_service.h"
#include "chrome/browser/ui/browser.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "components/soda/soda_installer.h"
#include "components/soda/soda_installer_impl_chromeos.h"
#include "content/public/test/browser_test.h"
#include "content/public/test/fake_speech_recognition_manager.h"
#include "media/mojo/mojom/speech_recognition_service.mojom.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "ui/accessibility/accessibility_features.h"
#include "ui/base/ime/chromeos/ime_bridge.h"
#include "ui/base/ime/chromeos/mock_ime_input_context_handler.h"
#include "ui/base/ime/dummy_text_input_client.h"
#include "ui/base/ime/input_method_base.h"
#include "ui/events/test/event_generator.h"
namespace ash {
namespace {
const char kFirstSpeechResult[] = "help";
const char16_t kFirstSpeechResult16[] = u"help";
const char kSecondSpeechResult[] = "help oh";
const char16_t kSecondSpeechResult16[] = u"help oh";
const char kFinalSpeechResult[] = "hello world";
const char16_t kFinalSpeechResult16[] = u"hello world";
const int kNoSpeechTimeoutInSeconds = 10;
const int kShortNoSpeechTimeoutInSeconds = 5;
const int kVeryShortNoSpeechTimeoutInSeconds = 2;
} // namespace
enum DictationListeningTestVariant {
kTestDefaultListening,
kTestWithLongerListening
};
enum DictationNetworkTestVariant { kNetworkRecognition, kOnDeviceRecognition };
class DictationTest : public InProcessBrowserTest,
public ::testing::WithParamInterface<
std::pair<DictationListeningTestVariant,
DictationNetworkTestVariant>> {
protected:
DictationTest() {
input_context_handler_ = std::make_unique<ui::MockIMEInputContextHandler>();
empty_composition_text_ =
ui::MockIMEInputContextHandler::UpdateCompositionTextArg()
.composition_text;
}
~DictationTest() override = default;
DictationTest(const DictationTest&) = delete;
DictationTest& operator=(const DictationTest&) = delete;
// InProcessBrowserTest:
void SetUp() override {
if (GetParam().second == kNetworkRecognition) {
// Use a fake speech recognition manager so that we don't end up with an
// error finding the audio input device when running on a headless
// environment.
fake_speech_recognition_manager_ =
std::make_unique<content::FakeSpeechRecognitionManager>();
// Don't send a fake response from the fake manager. The fake manager can
// only send one final response before shutting off. We will do more
// granular testing of multiple not-final and final results by sending
// OnSpeechResult callbacks to Dictation directly.
fake_speech_recognition_manager_->set_should_send_fake_response(false);
content::SpeechRecognitionManager::SetManagerForTesting(
fake_speech_recognition_manager_.get());
}
InProcessBrowserTest::SetUp();
}
void SetUpCommandLine(base::CommandLine* command_line) override {
std::vector<base::Feature> enabled_features;
std::vector<base::Feature> disabled_features;
if (GetParam().first == kTestWithLongerListening) {
enabled_features.push_back(
features::kExperimentalAccessibilityDictationListening);
} else {
disabled_features.push_back(
features::kExperimentalAccessibilityDictationListening);
}
if (GetParam().second == kOnDeviceRecognition) {
enabled_features.push_back(
features::kExperimentalAccessibilityDictationOffline);
} else {
disabled_features.push_back(
features::kExperimentalAccessibilityDictationOffline);
}
scoped_feature_list_.InitWithFeatures(enabled_features, disabled_features);
}
void SetUpOnMainThread() override {
// Fake that SODA is installed so Dictation uses OnDeviceSpeechRecognizer.
// Do this here, since SetUpOnMainThread is run after the browser process
// initializes (which is when the global SodaInstaller gets created).
// Lastly, do this before Dictation is enabled so that we don't initiate a
// SODA download when Dictation is enabled.
speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting();
ui::IMEBridge::Get()->SetInputContextHandler(input_context_handler_.get());
generator_ = std::make_unique<ui::test::EventGenerator>(
ash::Shell::Get()->GetPrimaryRootWindow());
ash::Shell::Get()
->accessibility_controller()
->dictation()
.SetDialogAccepted();
ash::Shell::Get()->accessibility_controller()->dictation().SetEnabled(true);
if (GetParam().second == kOnDeviceRecognition) {
// Replaces normal CrosSpeechRecognitionService with a fake one.
CrosSpeechRecognitionServiceFactory::GetInstanceForTest()
->SetTestingFactoryAndUse(
browser()->profile(),
base::BindRepeating(
&DictationTest::CreateTestSpeechRecognitionService,
base::Unretained(this)));
}
}
std::unique_ptr<KeyedService> CreateTestSpeechRecognitionService(
content::BrowserContext* context) {
std::unique_ptr<speech::FakeSpeechRecognitionService> fake_service =
std::make_unique<speech::FakeSpeechRecognitionService>();
fake_service_ = fake_service.get();
return std::move(fake_service);
}
AccessibilityManager* GetManager() { return AccessibilityManager::Get(); }
void EnableChromeVox() { GetManager()->EnableSpokenFeedback(true); }
void SendSpeechResult(const char* result, bool is_final) {
if (GetParam().second == kNetworkRecognition) {
if (!is_final) {
// FakeSpeechRecognitionManager can only send final results,
// so if this isn't final just send to Dictation directly.
GetManager()->dictation_->OnSpeechResult(base::ASCIIToUTF16(result),
is_final, absl::nullopt);
} else {
base::RunLoop loop;
fake_speech_recognition_manager_->SetFakeResult(result);
fake_speech_recognition_manager_->SendFakeResponse(
false /* end recognition */, loop.QuitClosure());
loop.Run();
}
} else {
EXPECT_TRUE(fake_service_->is_capturing_audio());
base::RunLoop loop;
fake_service_->SendSpeechRecognitionResult(
media::SpeechRecognitionResult(result, is_final));
loop.RunUntilIdle();
}
}
void WaitForRecognitionStarted() {
if (GetParam().second == kNetworkRecognition) {
// Wait for interaction on UI thread.
fake_speech_recognition_manager_->WaitForRecognitionStarted();
} else {
fake_service_->WaitForRecognitionStarted();
// Only one thread, use a RunLoop to ensure mojom messages are done.
base::RunLoop().RunUntilIdle();
}
}
void WaitForRecognitionEnded() {
if (GetParam().second == kNetworkRecognition) {
// Wait for interaction on UI thread.
fake_speech_recognition_manager_->WaitForRecognitionEnded();
}
// Wait for mojom / callbacks to propagate.
base::RunLoop().RunUntilIdle();
}
void NotifyTextInputStateChanged(ui::TextInputClient* client) {
GetManager()->dictation_->OnTextInputStateChanged(client);
}
bool IsDictationOff() {
return !GetManager()->dictation_ ||
GetManager()->dictation_->current_state_ == SPEECH_RECOGNIZER_OFF;
}
base::OneShotTimer* GetTimer() {
if (!GetManager()->dictation_)
return nullptr;
return &(GetManager()->dictation_->speech_timeout_);
}
base::TimeDelta GetNoSpeechTimeout() {
if (GetParam().first == kTestDefaultListening) {
return base::TimeDelta::FromSeconds(GetParam().second ==
kNetworkRecognition
? kShortNoSpeechTimeoutInSeconds
: kNoSpeechTimeoutInSeconds);
}
return base::TimeDelta::FromSeconds(kNoSpeechTimeoutInSeconds);
}
base::TimeDelta GetNoNewSpeechTimeout() {
return base::TimeDelta::FromSeconds(GetParam().second == kNetworkRecognition
? kVeryShortNoSpeechTimeoutInSeconds
: kShortNoSpeechTimeoutInSeconds);
}
void ToggleDictation() {
// We are trying to toggle on if Dictation is currently off.
bool will_toggle_on = IsDictationOff();
generator_->PressAndReleaseKey(ui::VKEY_D, ui::EF_COMMAND_DOWN);
if (will_toggle_on) {
// SpeechRecognition may be turned on asynchronously. Wait for it to
// complete before moving on to ensures that we are ready to receive
// speech. In Dictation, a tone is played when recognition starts,
// indicating to the user that they can begin speaking.
WaitForRecognitionStarted();
}
// Now wait for the callbacks to propagate on the UI thread.
base::RunLoop().RunUntilIdle();
}
ui::CompositionText GetLastCompositionText() {
return input_context_handler_->last_update_composition_arg()
.composition_text;
}
std::unique_ptr<ui::MockIMEInputContextHandler> input_context_handler_;
std::unique_ptr<ui::test::EventGenerator> generator_;
ui::CompositionText empty_composition_text_;
// For network recognition.
std::unique_ptr<content::FakeSpeechRecognitionManager>
fake_speech_recognition_manager_;
// For on-device recognition.
// Unowned.
speech::FakeSpeechRecognitionService* fake_service_;
base::test::ScopedFeatureList scoped_feature_list_;
};
INSTANTIATE_TEST_SUITE_P(
TestWithDefaultAndLongerListening,
DictationTest,
::testing::Values(
std::pair<DictationListeningTestVariant, DictationNetworkTestVariant>(
kTestDefaultListening,
kNetworkRecognition),
std::pair<DictationListeningTestVariant, DictationNetworkTestVariant>(
kTestWithLongerListening,
kNetworkRecognition),
std::pair<DictationListeningTestVariant, DictationNetworkTestVariant>(
kTestDefaultListening,
kOnDeviceRecognition),
std::pair<DictationListeningTestVariant, DictationNetworkTestVariant>(
kTestWithLongerListening,
kOnDeviceRecognition)));
IN_PROC_BROWSER_TEST_P(DictationTest, RecognitionEnds) {
ToggleDictation();
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
SendSpeechResult(kFirstSpeechResult, false /* is_final */);
EXPECT_EQ(kFirstSpeechResult16, GetLastCompositionText().text);
SendSpeechResult(kSecondSpeechResult, false /* is_final */);
EXPECT_EQ(kSecondSpeechResult16, GetLastCompositionText().text);
SendSpeechResult(kFinalSpeechResult, true /* is_final */);
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
EXPECT_EQ(1, input_context_handler_->commit_text_call_count());
EXPECT_EQ(kFinalSpeechResult16, input_context_handler_->last_commit_text());
if (GetParam().first == kTestDefaultListening) {
EXPECT_TRUE(IsDictationOff());
} else {
EXPECT_FALSE(IsDictationOff());
base::OneShotTimer* timer = GetTimer();
ASSERT_TRUE(timer);
EXPECT_EQ(timer->GetCurrentDelay(), GetNoSpeechTimeout());
}
}
IN_PROC_BROWSER_TEST_P(DictationTest, RecognitionEndsWithChromeVoxEnabled) {
AccessibilityManager* manager = GetManager();
EnableChromeVox();
EXPECT_TRUE(manager->IsSpokenFeedbackEnabled());
// Toggle Dictation on directly.
GetManager()->ToggleDictation();
WaitForRecognitionStarted();
// Now wait for the callbacks to propagate on the UI thread.
base::RunLoop().RunUntilIdle();
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
SendSpeechResult(kFirstSpeechResult, false /* is_final */);
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
SendSpeechResult(kSecondSpeechResult, false /* is_final */);
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
SendSpeechResult(kFinalSpeechResult, true /* is_final */);
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
EXPECT_EQ(1, input_context_handler_->commit_text_call_count());
EXPECT_EQ(kFinalSpeechResult16, input_context_handler_->last_commit_text());
if (GetParam().first == kTestDefaultListening) {
EXPECT_TRUE(IsDictationOff());
} else {
EXPECT_FALSE(IsDictationOff());
base::OneShotTimer* timer = GetTimer();
ASSERT_TRUE(timer);
EXPECT_EQ(timer->GetCurrentDelay(), GetNoSpeechTimeout());
}
}
IN_PROC_BROWSER_TEST_P(DictationTest, RecognitionEndsWithNoSpeech) {
ToggleDictation();
EXPECT_FALSE(IsDictationOff());
base::OneShotTimer* timer = GetTimer();
ASSERT_TRUE(timer);
EXPECT_EQ(timer->GetCurrentDelay(), GetNoSpeechTimeout());
// Firing the timer, which simluates waiting for some time with no events,
// should end dictation.
timer->FireNow();
EXPECT_TRUE(IsDictationOff());
}
IN_PROC_BROWSER_TEST_P(DictationTest, RecognitionEndsWithoutFinalizedSpeech) {
ToggleDictation();
EXPECT_FALSE(IsDictationOff());
SendSpeechResult(kFirstSpeechResult, false /* is_final */);
base::OneShotTimer* timer = GetTimer();
ASSERT_TRUE(timer);
// If this is the test with continuous listening, use the normal timeout,
// otherwise use the shorter timeout for no new speech.
EXPECT_EQ(timer->GetCurrentDelay(), GetParam().first == kTestDefaultListening
? GetNoNewSpeechTimeout()
: GetNoSpeechTimeout());
// Firing the timer, which simluates waiting for some time without new speech,
// should end dictation.
timer->FireNow();
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
EXPECT_TRUE(IsDictationOff());
EXPECT_EQ(1, input_context_handler_->commit_text_call_count());
EXPECT_EQ(kFirstSpeechResult16, input_context_handler_->last_commit_text());
}
IN_PROC_BROWSER_TEST_P(DictationTest, UserEndsDictationBeforeSpeech) {
ToggleDictation();
ToggleDictation();
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
EXPECT_EQ(0, input_context_handler_->commit_text_call_count());
}
IN_PROC_BROWSER_TEST_P(DictationTest, UserEndsDictation) {
ToggleDictation();
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
SendSpeechResult(kFinalSpeechResult, false /* is_final */);
EXPECT_EQ(kFinalSpeechResult16, GetLastCompositionText().text);
ToggleDictation();
EXPECT_EQ(1, input_context_handler_->commit_text_call_count());
EXPECT_EQ(kFinalSpeechResult16, input_context_handler_->last_commit_text());
}
IN_PROC_BROWSER_TEST_P(DictationTest, UserEndsDictationWhenChromeVoxEnabled) {
AccessibilityManager* manager = GetManager();
EnableChromeVox();
EXPECT_TRUE(manager->IsSpokenFeedbackEnabled());
// Toggle Dictation on directly.
GetManager()->ToggleDictation();
WaitForRecognitionStarted();
// Now wait for the callbacks to propagate on the UI thread.
base::RunLoop().RunUntilIdle();
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
SendSpeechResult(kFinalSpeechResult, false /* is_final */);
EXPECT_EQ(GetLastCompositionText().text, empty_composition_text_.text);
// Toggle Dictation off.
GetManager()->ToggleDictation();
base::RunLoop().RunUntilIdle();
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
EXPECT_EQ(1, input_context_handler_->commit_text_call_count());
EXPECT_EQ(kFinalSpeechResult16, input_context_handler_->last_commit_text());
}
IN_PROC_BROWSER_TEST_P(DictationTest, SwitchInputContext) {
// Turn on dictation and say something.
ToggleDictation();
SendSpeechResult(kFirstSpeechResult, true /* is final */);
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
// Speech goes to the default IMEInputContextHandler.
EXPECT_EQ(kFirstSpeechResult16, input_context_handler_->last_commit_text());
// Simulate a remote app instantiating a new IMEInputContextHandler, like
// the keyboard shortcut viewer app creating a second InputMethodChromeOS.
ui::MockIMEInputContextHandler input_context_handler2;
ui::IMEBridge::Get()->SetInputContextHandler(&input_context_handler2);
if (GetParam().first == kTestDefaultListening) {
// Wait for speech to stop, then turn it on again.
WaitForRecognitionEnded();
EXPECT_TRUE(IsDictationOff());
// Turn on dictation and say something else.
ToggleDictation();
}
SendSpeechResult(kSecondSpeechResult, true /* is final*/);
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
std::u16string expected = kSecondSpeechResult16;
if (GetParam().first != kTestDefaultListening)
expected = u" " + expected;
// Speech goes to the new IMEInputContextHandler.
EXPECT_EQ(expected, input_context_handler2.last_commit_text());
ui::IMEBridge::Get()->SetInputContextHandler(nullptr);
}
IN_PROC_BROWSER_TEST_P(DictationTest, ChangeInputField) {
// Turn on dictation and start speaking.
ToggleDictation();
SendSpeechResult(kFinalSpeechResult, false /* is_final */);
// Change the input state to a new client.
std::unique_ptr<ui::TextInputClient> new_client =
std::make_unique<ui::DummyTextInputClient>();
NotifyTextInputStateChanged(new_client.get());
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
// Check that dictation has turned off.
EXPECT_EQ(1, input_context_handler_->commit_text_call_count());
EXPECT_EQ(kFinalSpeechResult16, input_context_handler_->last_commit_text());
}
IN_PROC_BROWSER_TEST_P(DictationTest, MightListenForMultipleResults) {
// Turn on dictation and send a final result.
ToggleDictation();
SendSpeechResult("Purple", true /* is final */);
// Wait for interim results to be finalized.
base::RunLoop().RunUntilIdle();
EXPECT_EQ(u"Purple", input_context_handler_->last_commit_text());
if (GetParam().first == kTestDefaultListening) {
// Dictation should turn off.
WaitForRecognitionEnded();
EXPECT_TRUE(IsDictationOff());
return;
}
EXPECT_FALSE(IsDictationOff());
SendSpeechResult("pink", true /* is final */);
EXPECT_EQ(2, input_context_handler_->commit_text_call_count());
// Space in front of the result.
EXPECT_EQ(u" pink", input_context_handler_->last_commit_text());
SendSpeechResult(" blue", true /* is final */);
EXPECT_EQ(3, input_context_handler_->commit_text_call_count());
// Only one space in front of the result.
EXPECT_EQ(u" blue", input_context_handler_->last_commit_text());
ToggleDictation();
// No change expected after toggle.
EXPECT_EQ(3, input_context_handler_->commit_text_call_count());
}
} // namespace ash