| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <string.h> |
| |
| #include <list> |
| #include <memory> |
| |
| #include "base/bind.h" |
| #include "base/location.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/run_loop.h" |
| #include "base/single_thread_task_runner.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/sys_byteorder.h" |
| #include "base/task/post_task.h" |
| #include "base/threading/thread_task_runner_handle.h" |
| #include "build/build_config.h" |
| #include "content/browser/speech/proto/google_streaming_api.pb.h" |
| #include "content/browser/speech/speech_recognition_engine.h" |
| #include "content/browser/speech/speech_recognition_manager_impl.h" |
| #include "content/browser/speech/speech_recognizer_impl.h" |
| #include "content/public/browser/browser_task_traits.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/notification_types.h" |
| #include "content/public/browser/web_contents.h" |
| #include "content/public/test/browser_test_utils.h" |
| #include "content/public/test/content_browser_test.h" |
| #include "content/public/test/content_browser_test_utils.h" |
| #include "content/public/test/test_navigation_observer.h" |
| #include "content/public/test/test_utils.h" |
| #include "content/shell/browser/shell.h" |
| #include "media/audio/audio_system.h" |
| #include "media/base/audio_capturer_source.h" |
| #include "net/test/embedded_test_server/controllable_http_response.h" |
| #include "net/test/embedded_test_server/embedded_test_server.h" |
| #include "testing/gmock/include/gmock/gmock.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| |
| using base::RunLoop; |
| using CaptureCallback = media::AudioCapturerSource::CaptureCallback; |
| |
| namespace content { |
| |
| namespace { |
| |
| // TODO(https://crbug.com/841818) Use FakeSystemInfo instead. |
| class MockAudioSystem : public media::AudioSystem { |
| public: |
| MockAudioSystem() = default; |
| |
| // AudioSystem implementation. |
| void GetInputStreamParameters(const std::string& device_id, |
| OnAudioParamsCallback on_params_cb) override { |
| DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| |
| // Posting callback to allow current SpeechRecognizerImpl dispatching event |
| // to complete before transitioning to the next FSM state. |
| base::PostTaskWithTraits( |
| FROM_HERE, {content::BrowserThread::IO}, |
| base::BindOnce(std::move(on_params_cb), |
| media::AudioParameters::UnavailableDeviceParams())); |
| } |
| |
| MOCK_METHOD2(GetOutputStreamParameters, |
| void(const std::string& device_id, |
| OnAudioParamsCallback on_params_cb)); |
| MOCK_METHOD1(HasInputDevices, void(OnBoolCallback on_has_devices_cb)); |
| MOCK_METHOD1(HasOutputDevices, void(OnBoolCallback on_has_devices_cb)); |
| MOCK_METHOD2(GetDeviceDescriptions, |
| void(bool for_input, |
| OnDeviceDescriptionsCallback on_descriptions_cp)); |
| MOCK_METHOD2(GetAssociatedOutputDeviceID, |
| void(const std::string& input_device_id, |
| OnDeviceIdCallback on_device_id_cb)); |
| MOCK_METHOD2(GetInputDeviceInfo, |
| void(const std::string& input_device_id, |
| OnInputDeviceInfoCallback on_input_device_info_cb)); |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(MockAudioSystem); |
| }; |
| |
| class MockCapturerSource : public media::AudioCapturerSource { |
| public: |
| using StartCallback = |
| base::OnceCallback<void(const media::AudioParameters& audio_parameters, |
| CaptureCallback* capture_callback)>; |
| using StopCallback = base::OnceCallback<void()>; |
| |
| MockCapturerSource(StartCallback start_callback, StopCallback stop_callback) { |
| start_callback_ = std::move(start_callback); |
| stop_callback_ = std::move(stop_callback); |
| } |
| |
| void Initialize(const media::AudioParameters& params, |
| CaptureCallback* callback) { |
| audio_parameters_ = params; |
| capture_callback_ = callback; |
| } |
| |
| void Start() override { |
| std::move(start_callback_).Run(audio_parameters_, capture_callback_); |
| } |
| |
| void Stop() override { std::move(stop_callback_).Run(); } |
| |
| MOCK_METHOD1(SetAutomaticGainControl, void(bool enable)); |
| MOCK_METHOD1(SetVolume, void(double volume)); |
| MOCK_METHOD1(SetOutputDeviceForAec, |
| void(const std::string& output_device_id)); |
| |
| protected: |
| ~MockCapturerSource() override = default; |
| |
| private: |
| StartCallback start_callback_; |
| StopCallback stop_callback_; |
| CaptureCallback* capture_callback_; |
| media::AudioParameters audio_parameters_; |
| |
| DISALLOW_COPY_AND_ASSIGN(MockCapturerSource); |
| }; |
| |
| std::string MakeGoodResponse() { |
| proto::SpeechRecognitionEvent proto_event; |
| proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS); |
| proto::SpeechRecognitionResult* proto_result = proto_event.add_result(); |
| blink::mojom::SpeechRecognitionResultPtr result = |
| blink::mojom::SpeechRecognitionResult::New(); |
| result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( |
| base::UTF8ToUTF16("Pictures of the moon"), 1.0F)); |
| proto_result->set_final(!result->is_provisional); |
| for (size_t i = 0; i < result->hypotheses.size(); ++i) { |
| proto::SpeechRecognitionAlternative* proto_alternative = |
| proto_result->add_alternative(); |
| const blink::mojom::SpeechRecognitionHypothesisPtr& hypothesis = |
| result->hypotheses[i]; |
| proto_alternative->set_confidence(hypothesis->confidence); |
| proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance)); |
| } |
| |
| std::string msg_string; |
| proto_event.SerializeToString(&msg_string); |
| |
| // Prepend 4 byte prefix length indication to the protobuf message as |
| // envisaged by the google streaming recognition webservice protocol. |
| uint32_t prefix = |
| base::HostToNet32(base::checked_cast<uint32_t>(msg_string.size())); |
| msg_string.insert(0, reinterpret_cast<char*>(&prefix), sizeof(prefix)); |
| return msg_string; |
| } |
| |
| } // namespace |
| |
| class SpeechRecognitionBrowserTest : public ContentBrowserTest { |
| public: |
| enum StreamingServerState { |
| kIdle, |
| kTestAudioCapturerSourceOpened, |
| kTestAudioCapturerSourceClosed, |
| }; |
| |
| // Helper methods used by test fixtures. |
| GURL GetTestUrlFromFragment(const std::string& fragment) { |
| return GURL(GetTestUrl("speech", "web_speech_recognition.html").spec() + |
| "#" + fragment); |
| } |
| |
| std::string GetPageFragment() { |
| return shell()->web_contents()->GetLastCommittedURL().ref(); |
| } |
| |
| const StreamingServerState &streaming_server_state() { |
| return streaming_server_state_; |
| } |
| |
| protected: |
| // ContentBrowserTest methods. |
| void SetUpOnMainThread() override { |
| streaming_server_state_ = kIdle; |
| |
| ASSERT_TRUE(SpeechRecognitionManagerImpl::GetInstance()); |
| audio_system_ = std::make_unique<MockAudioSystem>(); |
| audio_capturer_source_ = base::MakeRefCounted<MockCapturerSource>( |
| base::BindOnce(&SpeechRecognitionBrowserTest::OnCapturerSourceStart, |
| base::Unretained(this)), |
| base::BindOnce(&SpeechRecognitionBrowserTest::OnCapturerSourceStop, |
| base::Unretained(this))); |
| SpeechRecognizerImpl::SetAudioEnvironmentForTesting( |
| audio_system_.get(), |
| static_cast<media::AudioCapturerSource*>(audio_capturer_source_.get())); |
| } |
| |
| void TearDownOnMainThread() override { |
| SpeechRecognizerImpl::SetAudioEnvironmentForTesting(nullptr, nullptr); |
| } |
| |
| private: |
| void OnCapturerSourceStart(const media::AudioParameters& audio_parameters, |
| CaptureCallback* capture_callback) { |
| DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| ASSERT_EQ(kIdle, streaming_server_state_); |
| streaming_server_state_ = kTestAudioCapturerSourceOpened; |
| |
| const int capture_packet_interval_ms = |
| (1000 * audio_parameters.frames_per_buffer()) / |
| audio_parameters.sample_rate(); |
| ASSERT_EQ(SpeechRecognitionEngine::kAudioPacketIntervalMs, |
| capture_packet_interval_ms); |
| FeedAudioCapturerSource(audio_parameters, capture_callback, 500 /* ms */, |
| /*noise=*/false); |
| FeedAudioCapturerSource(audio_parameters, capture_callback, 1000 /* ms */, |
| /*noise=*/true); |
| FeedAudioCapturerSource(audio_parameters, capture_callback, 1000 /* ms */, |
| /*noise=*/false); |
| } |
| |
| void OnCapturerSourceStop() { |
| DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| ASSERT_EQ(kTestAudioCapturerSourceOpened, streaming_server_state_); |
| streaming_server_state_ = kTestAudioCapturerSourceClosed; |
| |
| // Reset capturer source so SpeechRecognizerImpl destructor doesn't call |
| // AudioCaptureSourcer::Stop() again. |
| SpeechRecognizerImpl::SetAudioEnvironmentForTesting(nullptr, nullptr); |
| |
| base::PostTaskWithTraits( |
| FROM_HERE, {content::BrowserThread::UI}, |
| base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse, |
| base::Unretained(this))); |
| } |
| |
| void SendResponse() {} |
| |
| static void FeedSingleBufferToAudioCapturerSource( |
| const media::AudioParameters& audio_params, |
| CaptureCallback* capture_callback, |
| size_t buffer_size, |
| bool fill_with_noise) { |
| DCHECK(capture_callback); |
| std::unique_ptr<uint8_t[]> audio_buffer(new uint8_t[buffer_size]); |
| if (fill_with_noise) { |
| for (size_t i = 0; i < buffer_size; ++i) |
| audio_buffer[i] = |
| static_cast<uint8_t>(127 * sin(i * 3.14F / (16 * buffer_size))); |
| } else { |
| memset(audio_buffer.get(), 0, buffer_size); |
| } |
| |
| std::unique_ptr<media::AudioBus> audio_bus = |
| media::AudioBus::Create(audio_params); |
| audio_bus->FromInterleaved<media::SignedInt16SampleTypeTraits>( |
| reinterpret_cast<int16_t*>(&audio_buffer.get()[0]), |
| audio_bus->frames()); |
| capture_callback->Capture(audio_bus.get(), 0, 0.0, false); |
| } |
| |
| void FeedAudioCapturerSource(const media::AudioParameters& audio_params, |
| CaptureCallback* capture_callback, |
| int duration_ms, |
| bool feed_with_noise) { |
| const size_t buffer_size = |
| audio_params.GetBytesPerBuffer(media::kSampleFormatS16); |
| const int ms_per_buffer = audio_params.GetBufferDuration().InMilliseconds(); |
| // We can only simulate durations that are integer multiples of the |
| // buffer size. In this regard see |
| // SpeechRecognitionEngine::GetDesiredAudioChunkDurationMs(). |
| ASSERT_EQ(0, duration_ms % ms_per_buffer); |
| |
| const int n_buffers = duration_ms / ms_per_buffer; |
| for (int i = 0; i < n_buffers; ++i) { |
| base::ThreadTaskRunnerHandle::Get()->PostTask( |
| FROM_HERE, |
| base::BindOnce(&FeedSingleBufferToAudioCapturerSource, audio_params, |
| capture_callback, buffer_size, feed_with_noise)); |
| } |
| } |
| |
| std::unique_ptr<media::AudioSystem> audio_system_; |
| scoped_refptr<MockCapturerSource> audio_capturer_source_; |
| StreamingServerState streaming_server_state_; |
| }; |
| |
| // Simply loads the test page and checks if it was able to create a Speech |
| // Recognition object in JavaScript, to make sure the Web Speech API is enabled. |
| // Flaky on all platforms. http://crbug.com/396414. |
| IN_PROC_BROWSER_TEST_F(SpeechRecognitionBrowserTest, DISABLED_Precheck) { |
| NavigateToURLBlockUntilNavigationsComplete( |
| shell(), GetTestUrlFromFragment("precheck"), 2); |
| |
| EXPECT_EQ(kIdle, streaming_server_state()); |
| EXPECT_EQ("success", GetPageFragment()); |
| } |
| |
| // Flaky on mac, see https://crbug.com/794645. |
| #if defined(OS_MACOSX) |
| #define MAYBE_OneShotRecognition DISABLED_OneShotRecognition |
| #else |
| #define MAYBE_OneShotRecognition OneShotRecognition |
| #endif |
| IN_PROC_BROWSER_TEST_F(SpeechRecognitionBrowserTest, MAYBE_OneShotRecognition) { |
| // Set up a test server, with two response handlers. |
| net::test_server::ControllableHttpResponse upstream_response( |
| embedded_test_server(), "/foo/up?", true /* relative_url_is_prefix */); |
| net::test_server::ControllableHttpResponse downstream_response( |
| embedded_test_server(), "/foo/down?", true /* relative_url_is_prefix */); |
| ASSERT_TRUE(embedded_test_server()->Start()); |
| // Use a base path that doesn't end in a slash to mimic the default URL. |
| std::string web_service_base_url = |
| embedded_test_server()->base_url().spec() + "foo"; |
| SpeechRecognitionEngine::set_web_service_base_url_for_tests( |
| web_service_base_url.c_str()); |
| |
| // Need to watch for two navigations. Can't use |
| // NavigateToURLBlockUntilNavigationsComplete so that the |
| // ControllableHttpResponses can be used to wait for the test server to see |
| // the network requests, and response to them. |
| TestNavigationObserver navigation_observer(shell()->web_contents(), 2); |
| shell()->LoadURL(GetTestUrlFromFragment("oneshot")); |
| |
| // Wait for the upstream HTTP request to be completely received, and return an |
| // empty response. |
| upstream_response.WaitForRequest(); |
| EXPECT_FALSE(upstream_response.http_request()->content.empty()); |
| EXPECT_EQ(net::test_server::METHOD_POST, |
| upstream_response.http_request()->method); |
| EXPECT_EQ("chunked", |
| upstream_response.http_request()->headers.at("Transfer-Encoding")); |
| EXPECT_EQ("audio/x-flac; rate=16000", |
| upstream_response.http_request()->headers.at("Content-Type")); |
| upstream_response.Send("HTTP/1.1 200 OK\r\n\r\n"); |
| upstream_response.Done(); |
| |
| // Wait for the downstream HTTP request to be received, and response with a |
| // valid response. |
| downstream_response.WaitForRequest(); |
| EXPECT_EQ(net::test_server::METHOD_GET, |
| downstream_response.http_request()->method); |
| downstream_response.Send("HTTP/1.1 200 OK\r\n\r\n" + MakeGoodResponse()); |
| downstream_response.Done(); |
| |
| navigation_observer.Wait(); |
| |
| EXPECT_EQ(kTestAudioCapturerSourceClosed, streaming_server_state()); |
| EXPECT_EQ("goodresult1", GetPageFragment()); |
| |
| // Remove reference to URL string that's on the stack. |
| SpeechRecognitionEngine::set_web_service_base_url_for_tests(nullptr); |
| } |
| |
| } // namespace content |