| // Copyright 2021 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/speech/cros_speech_recognition_service.h" |
| |
| #include <memory> |
| #include <string> |
| #include <utility> |
| |
| #include "ash/constants/ash_features.h" |
| #include "base/containers/flat_map.h" |
| #include "base/files/file_path.h" |
| #include "base/functional/bind.h" |
| #include "base/notimplemented.h" |
| #include "base/types/optional_util.h" |
| #include "chrome/services/speech/audio_source_fetcher_impl.h" |
| #include "chrome/services/speech/buildflags/buildflags.h" |
| #include "chrome/services/speech/cros_speech_recognition_recognizer_impl.h" |
| #include "components/live_caption/pref_names.h" |
| #include "components/soda/constants.h" |
| #include "components/soda/soda_installer.h" |
| #include "components/user_prefs/user_prefs.h" |
| #include "content/public/browser/browser_context.h" |
| #include "content/public/browser/browser_task_traits.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/storage_partition.h" |
| #include "media/base/media_switches.h" |
| #include "media/mojo/mojom/speech_recognition.mojom-shared.h" |
| #include "media/mojo/mojom/speech_recognition.mojom.h" |
| #include "media/mojo/mojom/speech_recognition_service.mojom.h" |
| #include "services/network/public/cpp/shared_url_loader_factory.h" |
| |
| #if BUILDFLAG(ENABLE_SERVER_BASED_RECOGNITION_RECOGNIZER) |
| #include "chrome/services/speech/internal/server_based_recognition_recognizer.h" |
| #endif // BUILDFLAG(ENABLE_SERVER_BASED_RECOGNITION_RECOGNIZER) |
| |
| namespace speech { |
| |
| namespace { |
| |
| constexpr char kInvalidSpeechRecogntionOptions[] = |
| "Invalid SpeechRecognitionOptions provided"; |
| |
| void PopulateFilePaths( |
| base::FilePath& binary_path, |
| base::flat_map<std::string, base::FilePath>& config_paths) { |
| speech::SodaInstaller* soda_installer = speech::SodaInstaller::GetInstance(); |
| |
| binary_path = soda_installer->GetSodaBinaryPath(); |
| for (const auto& language_code : soda_installer->InstalledLanguages()) { |
| config_paths[GetLanguageName(language_code)] = |
| soda_installer->GetLanguagePath(GetLanguageName(language_code)); |
| } |
| } |
| |
| std::unique_ptr<CrosSpeechRecognitionRecognizerImpl> |
| CreateCrosSpeechRecognitionRecognizer( |
| mojo::PendingRemote<media::mojom::SpeechRecognitionRecognizerClient> client, |
| media::mojom::SpeechRecognitionOptionsPtr options, |
| const base::FilePath& binary_path, |
| const base::flat_map<std::string, base::FilePath>& config_paths, |
| const std::string& primary_language_name, |
| const bool mask_offensive_words) { |
| return std::make_unique<CrosSpeechRecognitionRecognizerImpl>( |
| std::move(client), std::move(options), binary_path, config_paths, |
| primary_language_name, mask_offensive_words); |
| } |
| |
| } // namespace |
| |
| CrosSpeechRecognitionService::CrosSpeechRecognitionService( |
| content::BrowserContext* context) |
| : ChromeSpeechRecognitionService(context) { |
| cros_speech_recognition_recognizer_cb_ = |
| base::BindRepeating(CreateCrosSpeechRecognitionRecognizer); |
| } |
| |
| CrosSpeechRecognitionService::~CrosSpeechRecognitionService() = default; |
| |
| void CrosSpeechRecognitionService::BindSpeechRecognitionContext( |
| mojo::PendingReceiver<media::mojom::SpeechRecognitionContext> receiver) { |
| speech_recognition_contexts_.Add(this, std::move(receiver)); |
| } |
| |
| void CrosSpeechRecognitionService::BindAudioSourceSpeechRecognitionContext( |
| mojo::PendingReceiver<media::mojom::AudioSourceSpeechRecognitionContext> |
| receiver) { |
| audio_source_speech_recognition_contexts_.Add(this, std::move(receiver)); |
| } |
| |
| void CrosSpeechRecognitionService::BindRecognizer( |
| mojo::PendingReceiver<media::mojom::SpeechRecognitionRecognizer> receiver, |
| mojo::PendingRemote<media::mojom::SpeechRecognitionRecognizerClient> client, |
| media::mojom::SpeechRecognitionOptionsPtr options, |
| BindRecognizerCallback callback) { |
| // This binding is used by LiveCaption and it can't be server based |
| // recognition. |
| if (options->is_server_based || |
| options->recognizer_client_type != |
| media::mojom::RecognizerClientType::kLiveCaption) { |
| mojo::ReportBadMessage(kInvalidSpeechRecogntionOptions); |
| return; |
| } |
| |
| base::FilePath binary_path; |
| base::flat_map<std::string, base::FilePath> config_paths; |
| // The options should have locale set, but if they don't, pull from the prefs. |
| std::string language_name; |
| if (options->language) { |
| language_name = options->language.value(); |
| } else { |
| PrefService* profile_prefs = user_prefs::UserPrefs::Get(context()); |
| language_name = prefs::GetLiveCaptionLanguageCode(profile_prefs); |
| } |
| |
| PopulateFilePaths(binary_path, config_paths); |
| |
| // TODO(crbug.com/40924425): Implement offensive word mask on ChromeOS so that |
| // mask_offensive_words is not hard-coded. |
| CrosSpeechRecognitionRecognizerImpl::Create( |
| std::move(receiver), std::move(client), std::move(options), binary_path, |
| config_paths, language_name, /* mask_offensive_words= */ false); |
| std::move(callback).Run( |
| CrosSpeechRecognitionRecognizerImpl::IsMultichannelSupported()); |
| } |
| |
| void CrosSpeechRecognitionService::BindWebSpeechRecognizer( |
| mojo::PendingReceiver<media::mojom::SpeechRecognitionSession> |
| session_receiver, |
| mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> |
| session_client, |
| mojo::PendingReceiver<media::mojom::SpeechRecognitionAudioForwarder> |
| audio_forwarder, |
| int channel_count, |
| int sample_rate, |
| media::mojom::SpeechRecognitionOptionsPtr options, |
| bool continuous) { |
| NOTIMPLEMENTED(); |
| } |
| |
| void CrosSpeechRecognitionService::BindAudioSourceFetcher( |
| mojo::PendingReceiver<media::mojom::AudioSourceFetcher> fetcher_receiver, |
| mojo::PendingRemote<media::mojom::SpeechRecognitionRecognizerClient> client, |
| media::mojom::SpeechRecognitionOptionsPtr options, |
| BindRecognizerCallback callback) { |
| if (!options->is_server_based) { |
| base::FilePath binary_path; |
| base::flat_map<std::string, base::FilePath> config_paths; |
| PopulateFilePaths(binary_path, config_paths); |
| |
| // The options should have locale set, but if they don't, pull from the |
| // prefs. |
| std::string language_name; |
| if (options->language) { |
| language_name = options->language.value(); |
| } else { |
| PrefService* profile_prefs = user_prefs::UserPrefs::Get(context()); |
| language_name = prefs::GetLiveCaptionLanguageCode(profile_prefs); |
| } |
| // `mask_offensive_words` is always true for |
| // `RecognizerClientType::kSchoolTools`. |
| // TODO(crbug.com/40924425): Implement offensive word mask on ChromeOS for |
| // live caption as well so that mask_offensive_words is set according to the |
| // settings for `RecognizerClientType::kLiveCaption`. |
| bool mask_offensive_words = |
| options->recognizer_client_type == |
| media::mojom::RecognizerClientType::kSchoolTools; |
| // CrosSpeechRecognitionService runs on browser UI thread. |
| // Create AudioSourceFetcher on browser IO thread to avoid UI jank. |
| // Note that its CrosSpeechRecognitionRecognizer must also run |
| // on the IO thread. If CrosSpeechRecognitionService is moved away from |
| // browser UI thread, we can call AudioSourceFetcherImpl::Create directly. |
| content::GetIOThreadTaskRunner({})->PostTask( |
| FROM_HERE, |
| base::BindOnce( |
| &CrosSpeechRecognitionService:: |
| CreateAudioSourceFetcherForOnDeviceRecognitionOnIOThread, |
| weak_factory_.GetWeakPtr(), std::move(fetcher_receiver), |
| std::move(client), std::move(options), binary_path, config_paths, |
| language_name, mask_offensive_words)); |
| std::move(callback).Run( |
| CrosSpeechRecognitionRecognizerImpl::IsMultichannelSupported()); |
| return; |
| } |
| #if BUILDFLAG(ENABLE_SERVER_BASED_RECOGNITION_RECOGNIZER) |
| if (!ash::features::IsInternalServerSideSpeechRecognitionEnabled()) { |
| // A request is made for a service that has not been enabled. |
| mojo::ReportBadMessage(kInvalidSpeechRecogntionOptions); |
| return; |
| } |
| content::GetIOThreadTaskRunner({})->PostTask( |
| FROM_HERE, |
| base::BindOnce( |
| &CrosSpeechRecognitionService:: |
| CreateAudioSourceFetcherForServerBasedRecognitionOnIOThread, |
| weak_factory_.GetWeakPtr(), std::move(fetcher_receiver), |
| std::move(client), std::move(options), |
| context() |
| ->GetDefaultStoragePartition() |
| ->GetURLLoaderFactoryForBrowserProcessIOThread())); |
| std::move(callback).Run(/*is_multichannel_supported=*/false); |
| return; |
| #else |
| mojo::ReportBadMessage(kInvalidSpeechRecogntionOptions); |
| #endif // BUILDFLAG(ENABLE_SERVER_BASED_RECOGNITION_RECOGNIZER) |
| } |
| |
| void CrosSpeechRecognitionService:: |
| SetCreateCrosSpeechRecognitionRecognizerCbForTesting( |
| CreateCrosSpeechRecognitionRecognizerCb callback) { |
| cros_speech_recognition_recognizer_cb_ = std::move(callback); |
| } |
| |
| void CrosSpeechRecognitionService:: |
| CreateAudioSourceFetcherForOnDeviceRecognitionOnIOThread( |
| mojo::PendingReceiver<media::mojom::AudioSourceFetcher> |
| fetcher_receiver, |
| mojo::PendingRemote<media::mojom::SpeechRecognitionRecognizerClient> |
| client, |
| media::mojom::SpeechRecognitionOptionsPtr options, |
| const base::FilePath& binary_path, |
| const base::flat_map<std::string, base::FilePath>& config_paths, |
| const std::string& primary_language_name, |
| const bool mask_offensive_words) { |
| DCHECK_CURRENTLY_ON(content::BrowserThread::IO); |
| DCHECK(!options->is_server_based); |
| AudioSourceFetcherImpl::Create( |
| std::move(fetcher_receiver), |
| cros_speech_recognition_recognizer_cb_.Run( |
| std::move(client), std::move(options), binary_path, config_paths, |
| primary_language_name, mask_offensive_words), |
| CrosSpeechRecognitionRecognizerImpl::IsMultichannelSupported(), |
| /*is_server_based=*/false); |
| } |
| |
| void CrosSpeechRecognitionService:: |
| CreateAudioSourceFetcherForServerBasedRecognitionOnIOThread( |
| mojo::PendingReceiver<media::mojom::AudioSourceFetcher> |
| fetcher_receiver, |
| mojo::PendingRemote<media::mojom::SpeechRecognitionRecognizerClient> |
| client, |
| media::mojom::SpeechRecognitionOptionsPtr options, |
| std::unique_ptr<network::PendingSharedURLLoaderFactory> |
| pending_loader_factory) { |
| #if BUILDFLAG(ENABLE_SERVER_BASED_RECOGNITION_RECOGNIZER) |
| DCHECK_CURRENTLY_ON(content::BrowserThread::IO); |
| DCHECK(options->is_server_based); |
| AudioSourceFetcherImpl::Create( |
| std::move(fetcher_receiver), |
| std::make_unique<ServerBasedRecognitionRecognizer>( |
| std::move(client), std::move(options), |
| network::SharedURLLoaderFactory::Create( |
| std::move(pending_loader_factory))), |
| /*is_multichannel_supported=*/false, /*is_server_based=*/true); |
| #endif // BUILDFLAG(ENABLE_SERVER_BASED_RECOGNITION_RECOGNIZER) |
| } |
| |
| } // namespace speech |