chrome/renderer/media/chrome_speech_recognition_client.h - chromium/src - Git at Google

 // Copyright 2020 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_
 #define CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_

 #include <memory>
 #include <string>

 #include "base/memory/weak_ptr.h"
 #include "base/sequence_checker.h"
 #include "base/synchronization/lock.h"
 #include "content/public/renderer/render_frame_observer.h"
 #include "media/base/audio_buffer.h"
 #include "media/base/speech_recognition_client.h"
 #include "media/mojo/common/audio_data_s16_converter.h"
 #include "media/mojo/mojom/speech_recognition.mojom.h"
 #include "mojo/public/cpp/bindings/receiver.h"
 #include "mojo/public/cpp/bindings/remote.h"

 namespace content {
 class RenderFrame;
 }  // namespace content

 namespace media {
 class ReconfigurableAudioBusPoolImpl;
 }  // namespace media

 class ChromeSpeechRecognitionClient
     : public content::RenderFrameObserver,
       public media::SpeechRecognitionClient,
       public media::mojom::SpeechRecognitionBrowserObserver,
       public media::AudioDataS16Converter {
  public:
   using SendAudioToSpeechRecognitionServiceCallback =
       base::RepeatingCallback<void(media::mojom::AudioDataS16Ptr audio_data)>;
   using InitializeCallback = base::RepeatingCallback<void()>;

   explicit ChromeSpeechRecognitionClient(content::RenderFrame* render_frame);
   ChromeSpeechRecognitionClient(const ChromeSpeechRecognitionClient&) = delete;
   ChromeSpeechRecognitionClient& operator=(
       const ChromeSpeechRecognitionClient&) = delete;
   ~ChromeSpeechRecognitionClient() override;

   // content::RenderFrameObserver
   void OnDestruct() override;

   // media::SpeechRecognitionClient
   void AddAudio(scoped_refptr<media::AudioBuffer> buffer) override;

   // Must call Reconfigure() first and can't be called concurrently with
   // Reconfigure().
   void AddAudio(const media::AudioBus& audio_bus) override;

   bool IsSpeechRecognitionAvailable() override;
   void SetOnReadyCallback(
       SpeechRecognitionClient::OnReadyCallback callback) override;

   // Must be called on the main owning sequence. Must be called before the first
   // call to AddAudio(media::AudioBus*), cannot be called concurrently with
   // AddAudio().
   void Reconfigure(const media::AudioParameters& audio_parameters) override;

   // Callback executed when the recognizer is bound. Sets the flag indicating
   // whether the speech recognition service supports multichannel audio.
   void OnRecognizerBound(bool is_multichannel_supported);

   // media::mojom::SpeechRecognitionBrowserObserver
   void SpeechRecognitionAvailabilityChanged(
       bool is_speech_recognition_available) override;
   void SpeechRecognitionLanguageChanged(const std::string& language) override;
   void SpeechRecognitionMaskOffensiveWordsChanged(
       bool mask_offensive_words) override;

  private:
   using AddAudioCallback = base::RepeatingCallback<
       void(std::unique_ptr<media::AudioBus>, int, media::ChannelLayout)>;

   // Initialize the speech recognition client and construct all of the mojo
   // pipes.
   void Initialize();

   // Resets the mojo pipe to the speech recognition recognizer and speech
   // recognition service. Maintains the pipe to the browser so that it may be
   // notified when to reinitialize the pipes.
   void Reset();

   // Processes an audio bus on on the main sequence.
   void AddAudioBusOnMainSequence(std::unique_ptr<media::AudioBus> audio_bus,
                                  int sample_rate,
                                  media::ChannelLayout channel_layout);

   void SendAudioToSpeechRecognitionService(
       media::mojom::AudioDataS16Ptr audio_data);

   // Called when the speech recognition context or the speech recognition
   // recognizer is disconnected. Sends an error message to the UI and halts
   // future transcriptions.
   void OnRecognizerDisconnected();

   ChromeSpeechRecognitionClient::InitializeCallback initialize_callback_;

   media::SpeechRecognitionClient::OnReadyCallback on_ready_callback_;

   base::RepeatingClosure reset_callback_;

   // Sends audio to the speech recognition thread on the renderer thread.
   SendAudioToSpeechRecognitionServiceCallback send_audio_callback_;

   mojo::Receiver<media::mojom::SpeechRecognitionBrowserObserver>
       speech_recognition_availability_observer_{this};
   mojo::Remote<media::mojom::SpeechRecognitionClientBrowserInterface>
       speech_recognition_client_browser_interface_;

   mojo::Remote<media::mojom::SpeechRecognitionContext>
       speech_recognition_context_;
   mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
       speech_recognition_recognizer_;

   AddAudioCallback add_audio_on_main_sequence_callback_;
   std::unique_ptr<media::ReconfigurableAudioBusPoolImpl> audio_bus_pool_;
   SEQUENCE_CHECKER(main_sequence_checker_);

   // Cached audio parameters used with media::AudioBus.
   media::AudioParameters audio_parameters_;

   // Whether all mojo pipes are bound to the speech recognition service.
   bool GUARDED_BY(is_recognizer_bound_lock_) is_recognizer_bound_ = false;

   // Protects `is_recognizer_bound_` when it's accessed from the main and
   // rendering threads concurrently.
   mutable base::Lock is_recognizer_bound_lock_;

   // A flag indicating whether the speech recognition service supports
   // multichannel audio.
   bool is_multichannel_supported_ = false;

   base::WeakPtrFactory<ChromeSpeechRecognitionClient> weak_factory_{this};
 };

 #endif  // CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_
	// Copyright 2020 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_
	#define CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_

	#include <memory>
	#include <string>

	#include "base/memory/weak_ptr.h"
	#include "base/sequence_checker.h"
	#include "base/synchronization/lock.h"
	#include "content/public/renderer/render_frame_observer.h"
	#include "media/base/audio_buffer.h"
	#include "media/base/speech_recognition_client.h"
	#include "media/mojo/common/audio_data_s16_converter.h"
	#include "media/mojo/mojom/speech_recognition.mojom.h"
	#include "mojo/public/cpp/bindings/receiver.h"
	#include "mojo/public/cpp/bindings/remote.h"

	namespace content {
	class RenderFrame;
	} // namespace content

	namespace media {
	class ReconfigurableAudioBusPoolImpl;
	} // namespace media

	class ChromeSpeechRecognitionClient
	: public content::RenderFrameObserver,
	public media::SpeechRecognitionClient,
	public media::mojom::SpeechRecognitionBrowserObserver,
	public media::AudioDataS16Converter {
	public:
	using SendAudioToSpeechRecognitionServiceCallback =
	base::RepeatingCallback<void(media::mojom::AudioDataS16Ptr audio_data)>;
	using InitializeCallback = base::RepeatingCallback<void()>;

	explicit ChromeSpeechRecognitionClient(content::RenderFrame* render_frame);
	ChromeSpeechRecognitionClient(const ChromeSpeechRecognitionClient&) = delete;
	ChromeSpeechRecognitionClient& operator=(
	const ChromeSpeechRecognitionClient&) = delete;
	~ChromeSpeechRecognitionClient() override;

	// content::RenderFrameObserver
	void OnDestruct() override;

	// media::SpeechRecognitionClient
	void AddAudio(scoped_refptr<media::AudioBuffer> buffer) override;

	// Must call Reconfigure() first and can't be called concurrently with
	// Reconfigure().
	void AddAudio(const media::AudioBus& audio_bus) override;

	bool IsSpeechRecognitionAvailable() override;
	void SetOnReadyCallback(
	SpeechRecognitionClient::OnReadyCallback callback) override;

	// Must be called on the main owning sequence. Must be called before the first
	// call to AddAudio(media::AudioBus*), cannot be called concurrently with
	// AddAudio().
	void Reconfigure(const media::AudioParameters& audio_parameters) override;

	// Callback executed when the recognizer is bound. Sets the flag indicating
	// whether the speech recognition service supports multichannel audio.
	void OnRecognizerBound(bool is_multichannel_supported);

	// media::mojom::SpeechRecognitionBrowserObserver
	void SpeechRecognitionAvailabilityChanged(
	bool is_speech_recognition_available) override;
	void SpeechRecognitionLanguageChanged(const std::string& language) override;
	void SpeechRecognitionMaskOffensiveWordsChanged(
	bool mask_offensive_words) override;

	private:
	using AddAudioCallback = base::RepeatingCallback<
	void(std::unique_ptr<media::AudioBus>, int, media::ChannelLayout)>;

	// Initialize the speech recognition client and construct all of the mojo
	// pipes.
	void Initialize();

	// Resets the mojo pipe to the speech recognition recognizer and speech
	// recognition service. Maintains the pipe to the browser so that it may be
	// notified when to reinitialize the pipes.
	void Reset();

	// Processes an audio bus on on the main sequence.
	void AddAudioBusOnMainSequence(std::unique_ptr<media::AudioBus> audio_bus,
	int sample_rate,
	media::ChannelLayout channel_layout);

	void SendAudioToSpeechRecognitionService(
	media::mojom::AudioDataS16Ptr audio_data);

	// Called when the speech recognition context or the speech recognition
	// recognizer is disconnected. Sends an error message to the UI and halts
	// future transcriptions.
	void OnRecognizerDisconnected();

	ChromeSpeechRecognitionClient::InitializeCallback initialize_callback_;

	media::SpeechRecognitionClient::OnReadyCallback on_ready_callback_;

	base::RepeatingClosure reset_callback_;

	// Sends audio to the speech recognition thread on the renderer thread.
	SendAudioToSpeechRecognitionServiceCallback send_audio_callback_;

	mojo::Receiver<media::mojom::SpeechRecognitionBrowserObserver>
	speech_recognition_availability_observer_{this};
	mojo::Remote<media::mojom::SpeechRecognitionClientBrowserInterface>
	speech_recognition_client_browser_interface_;

	mojo::Remote<media::mojom::SpeechRecognitionContext>
	speech_recognition_context_;
	mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
	speech_recognition_recognizer_;

	AddAudioCallback add_audio_on_main_sequence_callback_;
	std::unique_ptr<media::ReconfigurableAudioBusPoolImpl> audio_bus_pool_;
	SEQUENCE_CHECKER(main_sequence_checker_);

	// Cached audio parameters used with media::AudioBus.
	media::AudioParameters audio_parameters_;

	// Whether all mojo pipes are bound to the speech recognition service.
	bool GUARDED_BY(is_recognizer_bound_lock_) is_recognizer_bound_ = false;

	// Protects `is_recognizer_bound_` when it's accessed from the main and
	// rendering threads concurrently.
	mutable base::Lock is_recognizer_bound_lock_;

	// A flag indicating whether the speech recognition service supports
	// multichannel audio.
	bool is_multichannel_supported_ = false;

	base::WeakPtrFactory<ChromeSpeechRecognitionClient> weak_factory_{this};
	};

	#endif // CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_