| // Copyright 2021 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef CHROME_SERVICES_SPEECH_AUDIO_SOURCE_FETCHER_IMPL_H_ |
| #define CHROME_SERVICES_SPEECH_AUDIO_SOURCE_FETCHER_IMPL_H_ |
| |
| #include <memory> |
| |
| #include "base/memory/raw_ptr.h" |
| #include "base/memory/weak_ptr.h" |
| #include "base/sequence_checker.h" |
| #include "chrome/services/speech/audio_source_consumer.h" |
| #include "media/base/audio_bus.h" |
| #include "media/base/audio_capturer_source.h" |
| #include "media/base/converting_audio_fifo.h" |
| #include "media/mojo/common/audio_data_s16_converter.h" |
| #include "media/mojo/mojom/audio_logging.mojom.h" |
| #include "media/mojo/mojom/speech_recognition_service.mojom.h" |
| #include "mojo/public/cpp/bindings/receiver.h" |
| #include "mojo/public/cpp/bindings/remote.h" |
| |
| namespace speech { |
| |
| class SpeechRecognitionRecognizerImpl; |
| |
| // Class to get device audio data and send it to a |
| // SpeechRecognitionRecognizerImpl for transcription. Runs on the IO thread in |
| // the Browser process in Chrome OS and in the Speech Recognition Service |
| // utility process on Chrome or web speech fallback. |
| class AudioSourceFetcherImpl |
| : public media::mojom::AudioSourceFetcher, |
| public media::AudioCapturerSource::CaptureCallback, |
| public media::AudioDataS16Converter, |
| public media::mojom::AudioLog { |
| public: |
| AudioSourceFetcherImpl( |
| std::unique_ptr<AudioSourceConsumer> recognition_recognizer, |
| bool is_multi_channel_supported, |
| bool is_server_based); |
| ~AudioSourceFetcherImpl() override; |
| AudioSourceFetcherImpl(const AudioSourceFetcherImpl&) = delete; |
| AudioSourceFetcherImpl& operator=(const AudioSourceFetcherImpl&) = delete; |
| |
| static void Create( |
| mojo::PendingReceiver<media::mojom::AudioSourceFetcher> receiver, |
| std::unique_ptr<AudioSourceConsumer> recognition_recognizer, |
| bool is_multi_channel_supported, |
| bool is_server_based); |
| |
| // media::mojom::AudioSourceFetcher: |
| void Start( |
| mojo::PendingRemote<media::mojom::AudioStreamFactory> stream_factory, |
| const std::string& device_id, |
| const ::media::AudioParameters& audio_parameters) override; |
| void Stop() override; |
| |
| // media::AudioCapturerSource::CaptureCallback: |
| void OnCaptureStarted() final {} |
| void Capture(const media::AudioBus* audio_source, |
| base::TimeTicks audio_capture_time, |
| double volume, |
| bool key_pressed) final; |
| void OnCaptureError(media::AudioCapturerSource::ErrorCode code, |
| const std::string& message) final; |
| void OnCaptureMuted(bool is_muted) final {} |
| // media::mojom::AudioLog |
| void OnCreated(const media::AudioParameters& params, |
| const std::string& device_id) override; |
| void OnStarted() override; |
| void OnStopped() override; |
| void OnClosed() override; |
| void OnError() override; |
| void OnSetVolume(double volume) override; |
| void OnLogMessage(const std::string& message) override; |
| void OnProcessingStateChanged(const std::string& message) override; |
| |
| // The output callback for ConvertingAudioFifo. |
| void OnAudioFinishedConvert(media::AudioBus* output_bus); |
| |
| void set_audio_capturer_source_for_tests( |
| media::AudioCapturerSource* audio_capturer_source_for_tests) { |
| audio_capturer_source_for_tests_ = audio_capturer_source_for_tests; |
| } |
| |
| private: |
| using SendAudioToSpeechRecognitionServiceCallback = |
| base::RepeatingCallback<void(media::mojom::AudioDataS16Ptr audio_data)>; |
| using SendAudioToResampleCallback = base::RepeatingCallback<void( |
| std::unique_ptr<media::AudioBus> audio_data)>; |
| |
| void SendAudioToSpeechRecognitionService( |
| media::mojom::AudioDataS16Ptr buffer); |
| |
| void SendAudioToResample(std::unique_ptr<media::AudioBus> audio_data); |
| |
| void SendAudioEndToSpeechRecognitionService(); |
| |
| media::AudioCapturerSource* GetAudioCapturerSource(); |
| |
| // Sends audio to the speech recognition recognizer. |
| SendAudioToSpeechRecognitionServiceCallback send_audio_callback_; |
| |
| // Audio capturer source for microphone recording. |
| scoped_refptr<media::AudioCapturerSource> audio_capturer_source_; |
| raw_ptr<media::AudioCapturerSource> audio_capturer_source_for_tests_ = |
| nullptr; |
| |
| // Audio parameters will be used when recording audio. |
| media::AudioParameters audio_parameters_; |
| |
| // Device ID used to record audio. |
| std::string device_id_; |
| |
| // Owned AudioSourceConsumer |
| std::unique_ptr<AudioSourceConsumer> audio_consumer_; |
| |
| // Whether audio capture is started. |
| bool is_started_; |
| |
| mojo::Receiver<media::mojom::AudioLog> audio_log_receiver_{this}; |
| |
| // Used to resample the audio when using server based speech recognition. Null |
| // when using SODA. |
| std::unique_ptr<media::ConvertingAudioFifo> converter_; |
| |
| // The output params for resampling for the server based speech recognition. |
| absl::optional<media::AudioParameters> server_based_recognition_params_ = |
| absl::nullopt; |
| bool is_multi_channel_supported_; |
| bool is_server_based_; |
| |
| // A callback to push audio data into `converter_`. |
| SendAudioToResampleCallback resample_callback_; |
| |
| SEQUENCE_CHECKER(sequence_checker_); |
| |
| base::WeakPtrFactory<AudioSourceFetcherImpl> weak_factory_{this}; |
| }; |
| |
| } // namespace speech |
| |
| #endif // CHROME_SERVICES_SPEECH_AUDIO_SOURCE_FETCHER_IMPL_H_ |