content/browser/speech/tts_mac.mm - chromium/src.git - Git at Google

 // Copyright 2012 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #import "content/browser/speech/tts_mac.h"

 #import <AVFAudio/AVFAudio.h>
 #import <AppKit/AppKit.h>
 #include <objc/runtime.h>

 #include <algorithm>
 #include <string>

 #include "base/apple/foundation_util.h"
 #include "base/functional/bind.h"
 #include "base/memory/raw_ptr.h"
 #include "base/no_destructor.h"
 #include "base/strings/sys_string_conversions.h"
 #include "base/values.h"
 #include "content/public/browser/tts_controller.h"

 namespace {

 constexpr int kNoLength = -1;
 constexpr char kNoError[] = "";

 std::vector<content::VoiceData>& VoicesRef() {
   static base::NoDestructor<std::vector<content::VoiceData>> voices([]() {
     [NSNotificationCenter.defaultCenter
         addObserverForName:NSApplicationWillBecomeActiveNotification
                     object:nil
                      queue:nil
                 usingBlock:^(NSNotification* notification) {
                   // The user might have switched to Settings or some other app
                   // to change voices or locale settings. Avoid a stale cache by
                   // forcing a rebuild of the voices vector after the app
                   // becomes active.
                   VoicesRef().clear();
                 }];
     return std::vector<content::VoiceData>();
   }());

   return *voices;
 }

 AVSpeechSynthesisVoice* GetSystemDefaultVoice() {
   // This should be
   //
   //   [AVSpeechSynthesisVoice voiceWithLanguage:nil]
   //
   // but that has a bug (https://crbug.com/1484940#c9, FB13197951). In short,
   // while passing nil to -[AVSpeechSynthesisVoice voiceWithLanguage:] does
   // indeed return "the default voice for the system’s language and region",
   // that's not necessarily the voice that the user selected in System Settings
   // > Accessibility > Spoken Content, and that user voice selection is the only
   // one that matters. The first two workarounds below behave correctly.
   NSUserDefaults* accessibility_defaults =
       [[NSUserDefaults alloc] initWithSuiteName:@"com.apple.Accessibility"];

   // SpokenContentDefaultVoiceSelectionsByLanguage is an array that maps a
   // language code to a dictionary of voice selection details.
   //
   // SpokenContentDefaultVoiceSelectionsByLanguage Structure:
   // @[
   //   @"en", // System language code (NSString).
   //   @{
   //     @"_type": @"Speech.VoiceSelection",  // Type identifier (NSString).
   //     @"_version": @0,  // Voice format version (NSNumber).
   //     @"boundLanguage": @"en",  // Language bound to this voice (NSString).
   //     @"voiceId":
   //         @"com.apple.voice.compact.en-IE.Moira" // Unique ID (NSString).
   //   }
   // ]
   NSArray* spoken_default_voice_settings = [accessibility_defaults
       arrayForKey:@"SpokenContentDefaultVoiceSelectionsByLanguage"];

   AVSpeechSynthesisVoice* voice = nil;

   // Attempt 1: Get the user-selected voice from accessibility defaults.
   //
   // Process `spoken_default_voice_settings` only if the voice selection data
   // (expected second value in SpokenContentDefaultVoiceSelectionsByLanguage) is
   // present. This is a precautionary check.
   if (spoken_default_voice_settings.count > 1) {
     NSDictionary* selected_voice_settings = spoken_default_voice_settings[1];
     NSString* selected_voice_id = selected_voice_settings[@"voiceId"];
     voice = [AVSpeechSynthesisVoice voiceWithIdentifier:selected_voice_id];
   }

   // Attempt 2: Get the user-selected voice from the deprecated
   // NSSpeechSynthesizer API.
   if (!voice) {
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
   NSString* default_voice_identifier = NSSpeechSynthesizer.defaultVoice;
 #pragma clang diagnostic pop
   voice = [AVSpeechSynthesisVoice voiceWithIdentifier:default_voice_identifier];
   }

   // Fallback to the default voice for the system's language and location if we
   // are unable to get the user-selected voice. This is the next most-specific
   // voice preference that we are able to retrieve using supported APIs.
   if (!voice) {
     return [AVSpeechSynthesisVoice voiceWithLanguage:nil];
   }

   return voice;
 }

 std::vector<content::VoiceData>& Voices() {
   std::vector<content::VoiceData>& voices = VoicesRef();
   if (!voices.empty()) {
     return voices;
   }

   NSMutableArray* av_speech_voices =
       [[AVSpeechSynthesisVoice.speechVoices sortedArrayUsingDescriptors:@[
         [NSSortDescriptor sortDescriptorWithKey:@"name" ascending:YES]
       ]] mutableCopy];
   AVSpeechSynthesisVoice* default_voice = GetSystemDefaultVoice();
   if (default_voice) {
     [av_speech_voices removeObject:default_voice];
     [av_speech_voices insertObject:default_voice atIndex:0];
   }

   // For the case of multiple voices with the same name but of a different
   // language, the old API (NSSpeechSynthesizer) would append locale information
   // to the names, while this current API does not. Because returning a bunch of
   // voices with the same name isn't helpful, count how often each name is used,
   // so that later on, locale information can be appended if necessary for
   // disambiguation.
   NSMutableDictionary<NSString*, NSNumber*>* name_counts =
       [NSMutableDictionary dictionary];
   for (AVSpeechSynthesisVoice* av_speech_voice in av_speech_voices) {
     NSString* voice_name = av_speech_voice.name;
     if (!voice_name) {
       // AVSpeechSynthesisVoice.name is not a nullable property, but there are
       // crashes (https://crbug.com/1459235) where -setObject:forKeyedSubscript:
       // is being passed a nil key, and the only place that happens in this
       // function is below.
       continue;
     }
     if (NSNumber* count = name_counts[voice_name]) {
       name_counts[voice_name] = @(count.intValue + 1);
     } else {
       name_counts[voice_name] = @1;
     }
   }

   voices.reserve(av_speech_voices.count);
   for (AVSpeechSynthesisVoice* av_speech_voice in av_speech_voices) {
     NSString* voice_name = av_speech_voice.name;
     if (!voice_name) {
       // AVSpeechSynthesisVoice.name is not a nullable property, but there are
       // crashes (https://crbug.com/1459235) where it seems like it's returning
       // nil. Without a name, a voice is useless, so skip it.
       continue;
     }

     voices.emplace_back();
     content::VoiceData& data = voices.back();

     if (name_counts[voice_name].intValue > 1) {
       // The language property on a voice is a BCP 47 code (i.e. "en-US") while
       // an NSLocale locale identifier isn't (i.e. "en_US"). However, using the
       // BCP 47 code as if it were a locale identifier works just fine (tested
       // back to 10.15).
       NSString* localized_language = [NSLocale.autoupdatingCurrentLocale
           localizedStringForLocaleIdentifier:av_speech_voice.language];
       voice_name = [NSString
           stringWithFormat:@"%@ (%@)", voice_name, localized_language];
     }

     data.native = true;
     data.native_voice_identifier =
         base::SysNSStringToUTF8(av_speech_voice.identifier);
     data.name = base::SysNSStringToUTF8(voice_name);
     data.lang = base::SysNSStringToUTF8(av_speech_voice.language);

     data.events.insert(content::TTS_EVENT_START);
     data.events.insert(content::TTS_EVENT_END);
     data.events.insert(content::TTS_EVENT_WORD);
     data.events.insert(content::TTS_EVENT_PAUSE);
     data.events.insert(content::TTS_EVENT_RESUME);
   }

   return voices;
 }

 AVSpeechUtterance* MakeUtterance(int utterance_id,
                                  const std::string& utterance_string) {
   AVSpeechUtterance* utterance = [AVSpeechUtterance
       speechUtteranceWithString:base::SysUTF8ToNSString(utterance_string)];
   objc_setAssociatedObject(utterance, @selector(identifier), @(utterance_id),
                            OBJC_ASSOCIATION_RETAIN);
   return utterance;
 }

 int GetUtteranceId(AVSpeechUtterance* utterance) {
   NSNumber* identifier = base::apple::ObjCCast<NSNumber>(
       objc_getAssociatedObject(utterance, @selector(identifier)));
   if (identifier) {
     return identifier.intValue;
   }
   return TtsPlatformImplMac::kInvalidUtteranceId;
 }

 }  // namespace

 // static
 content::TtsPlatformImpl* content::TtsPlatformImpl::GetInstance() {
   return TtsPlatformImplMac::GetInstance();
 }

 TtsPlatformImplMac::~TtsPlatformImplMac() = default;

 bool TtsPlatformImplMac::PlatformImplSupported() {
   return true;
 }

 bool TtsPlatformImplMac::PlatformImplInitialized() {
   return true;
 }

 void TtsPlatformImplMac::Speak(
     int utterance_id,
     const std::string& utterance,
     const std::string& lang,
     const content::VoiceData& voice,
     const content::UtteranceContinuousParameters& params,
     base::OnceCallback<void(bool)> on_speak_finished) {
   // Parse SSML and process speech. TODO(crbug.com/40273591):
   // AVSpeechUtterance has an initializer -initWithSSMLRepresentation:. Should
   // that be used instead?
   content::TtsController::GetInstance()->StripSSML(
       utterance, base::BindOnce(&TtsPlatformImplMac::ProcessSpeech,
                                 base::Unretained(this), utterance_id, lang,
                                 voice, params, std::move(on_speak_finished)));
 }

 void TtsPlatformImplMac::ProcessSpeech(
     int utterance_id,
     const std::string& lang,
     const content::VoiceData& voice,
     const content::UtteranceContinuousParameters& params,
     base::OnceCallback<void(bool)> on_speak_finished,
     const std::string& parsed_utterance) {
   utterance_ = parsed_utterance;
   paused_ = false;
   utterance_id_ = utterance_id;

   AVSpeechUtterance* speech_utterance =
       MakeUtterance(utterance_id, parsed_utterance);
   if (!speech_utterance) {
     std::move(on_speak_finished).Run(false);
     return;
   }

   speech_utterance.voice = [AVSpeechSynthesisVoice
       voiceWithIdentifier:base::SysUTF8ToNSString(
                               voice.native_voice_identifier)];

   if (params.rate >= 0.0) {
     // The two relevant APIs have different ranges:
     // - Web Speech API is [.1, 10] with default 1
     // - AVSpeechSynthesizer is [0, 1] with default .5
     //
     // Speeds in the Web Speech API other than 1 (the default rate) are meant to
     // be multiples of the default speaking rate.
     //
     // The mapping of AVSpeechSynthesizer speeds was done experimentally, using
     // the fourth paragraph of _A Tale of Two Cities_. With the "Samantha"
     // voice, AVSpeechUtteranceDefaultSpeechRate takes about 80s to read the
     // paragraph, while AVSpeechUtteranceMaximumSpeechRate takes about 20s.
     // Therefore, map
     //
     // 1 → AVSpeechUtteranceDefaultSpeechRate
     // 4 → AVSpeechUtteranceMaximumSpeechRate
     //
     // and cap anything higher.
     //
     // References:
     //
     // https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesisUtterance/rate
     // https://github.com/WebKit/WebKit/blob/main/Source/WebCore/platform/cocoa/PlatformSpeechSynthesizerCocoa.mm
     //  ^ This is the WebKit implementation. It appears to have a bug in
     //    scaling, where a Web Speech API rate of 2 is scaled to
     //    AVSpeechUtteranceMaximumSpeechRate and the value passed to the
     //    AVSpeechSynthesizer goes up from there. A bug was filed about this:
     //    https://bugs.webkit.org/show_bug.cgi?id=258587

     float rate = params.rate;
     if (rate < 1) {
       // If a slower than normal rate is requested, scale the default speech
       // rate down proportionally.
       rate *= AVSpeechUtteranceDefaultSpeechRate;
     } else {
       // Scale the AVSpeech rate headroom proportionally to match the excess
       // above 1 in the Speech API, capping at a Web Speech API value of 4.
       const float kWebSpeechDefault = 1;
       const float kWebSpeechMaxSupported = 4;
       const float kAVSpeechRateHeadroom = AVSpeechUtteranceMaximumSpeechRate -
                                           AVSpeechUtteranceDefaultSpeechRate;
       const float excess = rate - kWebSpeechDefault;
       const float capped_excess =
           std::min(excess, (kWebSpeechMaxSupported - kWebSpeechDefault));
       const float headroom_proportion =
           capped_excess / (kWebSpeechMaxSupported - kWebSpeechDefault);
       rate = AVSpeechUtteranceDefaultSpeechRate +
              headroom_proportion * kAVSpeechRateHeadroom;
     }

     speech_utterance.rate = rate;
   }

   if (params.pitch >= 0.0) {
     speech_utterance.pitchMultiplier = params.pitch;
   }

   if (params.volume >= 0.0) {
     speech_utterance.volume = params.volume;
   }

   [speech_synthesizer_ speakUtterance:speech_utterance];
   std::move(on_speak_finished).Run(true);
 }

 bool TtsPlatformImplMac::StopSpeaking() {
   [speech_synthesizer_ stopSpeakingAtBoundary:AVSpeechBoundaryImmediate];
   paused_ = false;
   return true;
 }

 void TtsPlatformImplMac::Pause() {
   if (!paused_) {
     [speech_synthesizer_ pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
     paused_ = true;
     content::TtsController::GetInstance()->OnTtsEvent(
         utterance_id_, content::TTS_EVENT_PAUSE, last_char_index_, kNoLength,
         kNoError);
   }
 }

 void TtsPlatformImplMac::Resume() {
   if (paused_) {
     [speech_synthesizer_ continueSpeaking];
     paused_ = false;
     content::TtsController::GetInstance()->OnTtsEvent(
         utterance_id_, content::TTS_EVENT_RESUME, last_char_index_, kNoLength,
         kNoError);
   }
 }

 bool TtsPlatformImplMac::IsSpeaking() {
   return speech_synthesizer_.speaking;
 }

 void TtsPlatformImplMac::GetVoices(std::vector<content::VoiceData>* outVoices) {
   *outVoices = Voices();
 }

 void TtsPlatformImplMac::OnSpeechEvent(int utterance_id,
                                        content::TtsEventType event_type,
                                        int char_index,
                                        int char_length,
                                        const std::string& error_message) {
   // Don't send events from an utterance that's already completed.
   if (utterance_id != utterance_id_) {
     return;
   }

   if (event_type == content::TTS_EVENT_END) {
     char_index = utterance_.size();
   }

   content::TtsController::GetInstance()->OnTtsEvent(
       utterance_id_, event_type, char_index, char_length, error_message);
   last_char_index_ = char_index;
 }

 TtsPlatformImplMac::TtsPlatformImplMac()
     : speech_synthesizer_([[AVSpeechSynthesizer alloc] init]),
       delegate_([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]) {
   speech_synthesizer_.delegate = delegate_;
 }

 // static
 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
   static base::NoDestructor<TtsPlatformImplMac> tts_platform;
   return tts_platform.get();
 }

 // static
 std::vector<content::VoiceData>& TtsPlatformImplMac::VoicesRefForTesting() {
   return VoicesRef();
 }

 @implementation ChromeTtsDelegate {
   raw_ptr<TtsPlatformImplMac> _ttsImplMac;  // weak.
 }

 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
   if ((self = [super init])) {
     _ttsImplMac = ttsImplMac;
   }
   return self;
 }

 - (void)speechSynthesizer:(AVSpeechSynthesizer*)synthesizer
     didStartSpeechUtterance:(AVSpeechUtterance*)utterance {
   _ttsImplMac->OnSpeechEvent(GetUtteranceId(utterance),
                              content::TTS_EVENT_START, /*char_index=*/0,
                              kNoLength, kNoError);
 }

 - (void)speechSynthesizer:(AVSpeechSynthesizer*)synthesizer
     didFinishSpeechUtterance:(AVSpeechUtterance*)utterance {
   _ttsImplMac->OnSpeechEvent(GetUtteranceId(utterance), content::TTS_EVENT_END,
                              /*char_index=*/0, kNoLength, kNoError);
 }

 - (void)speechSynthesizer:(AVSpeechSynthesizer*)synthesizer
     willSpeakRangeOfSpeechString:(NSRange)characterRange
                        utterance:(AVSpeechUtterance*)utterance {
   // Ignore bogus ranges. The Mac speech synthesizer is a bit buggy and
   // occasionally returns a number way out of range.
   if (characterRange.location > utterance.speechString.length ||
       characterRange.length == 0) {
     return;
   }
   _ttsImplMac->OnSpeechEvent(GetUtteranceId(utterance), content::TTS_EVENT_WORD,
                              characterRange.location, characterRange.length,
                              kNoError);
 }

 @end
	// Copyright 2012 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#import "content/browser/speech/tts_mac.h"

	#import <AVFAudio/AVFAudio.h>
	#import <AppKit/AppKit.h>
	#include <objc/runtime.h>

	#include <algorithm>
	#include <string>

	#include "base/apple/foundation_util.h"
	#include "base/functional/bind.h"
	#include "base/memory/raw_ptr.h"
	#include "base/no_destructor.h"
	#include "base/strings/sys_string_conversions.h"
	#include "base/values.h"
	#include "content/public/browser/tts_controller.h"

	namespace {

	constexpr int kNoLength = -1;
	constexpr char kNoError[] = "";

	std::vector<content::VoiceData>& VoicesRef() {
	static base::NoDestructor<std::vector<content::VoiceData>> voices([]() {
	[NSNotificationCenter.defaultCenter
	addObserverForName:NSApplicationWillBecomeActiveNotification
	object:nil
	queue:nil
	usingBlock:^(NSNotification* notification) {
	// The user might have switched to Settings or some other app
	// to change voices or locale settings. Avoid a stale cache by
	// forcing a rebuild of the voices vector after the app
	// becomes active.
	VoicesRef().clear();
	}];
	return std::vector<content::VoiceData>();
	}());

	return *voices;
	}

	AVSpeechSynthesisVoice* GetSystemDefaultVoice() {
	// This should be
	//
	// [AVSpeechSynthesisVoice voiceWithLanguage:nil]
	//
	// but that has a bug (https://crbug.com/1484940#c9, FB13197951). In short,
	// while passing nil to -[AVSpeechSynthesisVoice voiceWithLanguage:] does
	// indeed return "the default voice for the system’s language and region",
	// that's not necessarily the voice that the user selected in System Settings
	// > Accessibility > Spoken Content, and that user voice selection is the only
	// one that matters. The first two workarounds below behave correctly.
	NSUserDefaults* accessibility_defaults =
	[[NSUserDefaults alloc] initWithSuiteName:@"com.apple.Accessibility"];

	// SpokenContentDefaultVoiceSelectionsByLanguage is an array that maps a
	// language code to a dictionary of voice selection details.
	//
	// SpokenContentDefaultVoiceSelectionsByLanguage Structure:
	// @[
	// @"en", // System language code (NSString).
	// @{
	// @"_type": @"Speech.VoiceSelection", // Type identifier (NSString).
	// @"_version": @0, // Voice format version (NSNumber).
	// @"boundLanguage": @"en", // Language bound to this voice (NSString).
	// @"voiceId":
	// @"com.apple.voice.compact.en-IE.Moira" // Unique ID (NSString).
	// }
	// ]
	NSArray* spoken_default_voice_settings = [accessibility_defaults
	arrayForKey:@"SpokenContentDefaultVoiceSelectionsByLanguage"];

	AVSpeechSynthesisVoice* voice = nil;

	// Attempt 1: Get the user-selected voice from accessibility defaults.
	//
	// Process `spoken_default_voice_settings` only if the voice selection data
	// (expected second value in SpokenContentDefaultVoiceSelectionsByLanguage) is
	// present. This is a precautionary check.
	if (spoken_default_voice_settings.count > 1) {
	NSDictionary* selected_voice_settings = spoken_default_voice_settings[1];
	NSString* selected_voice_id = selected_voice_settings[@"voiceId"];
	voice = [AVSpeechSynthesisVoice voiceWithIdentifier:selected_voice_id];
	}

	// Attempt 2: Get the user-selected voice from the deprecated
	// NSSpeechSynthesizer API.
	if (!voice) {
	#pragma clang diagnostic push
	#pragma clang diagnostic ignored "-Wdeprecated-declarations"
	NSString* default_voice_identifier = NSSpeechSynthesizer.defaultVoice;
	#pragma clang diagnostic pop
	voice = [AVSpeechSynthesisVoice voiceWithIdentifier:default_voice_identifier];
	}

	// Fallback to the default voice for the system's language and location if we
	// are unable to get the user-selected voice. This is the next most-specific
	// voice preference that we are able to retrieve using supported APIs.
	if (!voice) {
	return [AVSpeechSynthesisVoice voiceWithLanguage:nil];
	}

	return voice;
	}

	std::vector<content::VoiceData>& Voices() {
	std::vector<content::VoiceData>& voices = VoicesRef();
	if (!voices.empty()) {
	return voices;
	}

	NSMutableArray* av_speech_voices =
	[[AVSpeechSynthesisVoice.speechVoices sortedArrayUsingDescriptors:@[
	[NSSortDescriptor sortDescriptorWithKey:@"name" ascending:YES]
	]] mutableCopy];
	AVSpeechSynthesisVoice* default_voice = GetSystemDefaultVoice();
	if (default_voice) {
	[av_speech_voices removeObject:default_voice];
	[av_speech_voices insertObject:default_voice atIndex:0];
	}

	// For the case of multiple voices with the same name but of a different
	// language, the old API (NSSpeechSynthesizer) would append locale information
	// to the names, while this current API does not. Because returning a bunch of
	// voices with the same name isn't helpful, count how often each name is used,
	// so that later on, locale information can be appended if necessary for
	// disambiguation.
	NSMutableDictionary<NSString, NSNumber>* name_counts =
	[NSMutableDictionary dictionary];
	for (AVSpeechSynthesisVoice* av_speech_voice in av_speech_voices) {
	NSString* voice_name = av_speech_voice.name;
	if (!voice_name) {
	// AVSpeechSynthesisVoice.name is not a nullable property, but there are
	// crashes (https://crbug.com/1459235) where -setObject:forKeyedSubscript:
	// is being passed a nil key, and the only place that happens in this
	// function is below.
	continue;
	}
	if (NSNumber* count = name_counts[voice_name]) {
	name_counts[voice_name] = @(count.intValue + 1);
	} else {
	name_counts[voice_name] = @1;
	}
	}

	voices.reserve(av_speech_voices.count);
	for (AVSpeechSynthesisVoice* av_speech_voice in av_speech_voices) {
	NSString* voice_name = av_speech_voice.name;
	if (!voice_name) {
	// AVSpeechSynthesisVoice.name is not a nullable property, but there are
	// crashes (https://crbug.com/1459235) where it seems like it's returning
	// nil. Without a name, a voice is useless, so skip it.
	continue;
	}

	voices.emplace_back();
	content::VoiceData& data = voices.back();

	if (name_counts[voice_name].intValue > 1) {
	// The language property on a voice is a BCP 47 code (i.e. "en-US") while
	// an NSLocale locale identifier isn't (i.e. "en_US"). However, using the
	// BCP 47 code as if it were a locale identifier works just fine (tested
	// back to 10.15).
	NSString* localized_language = [NSLocale.autoupdatingCurrentLocale
	localizedStringForLocaleIdentifier:av_speech_voice.language];
	voice_name = [NSString
	stringWithFormat:@"%@ (%@)", voice_name, localized_language];
	}

	data.native = true;
	data.native_voice_identifier =
	base::SysNSStringToUTF8(av_speech_voice.identifier);
	data.name = base::SysNSStringToUTF8(voice_name);
	data.lang = base::SysNSStringToUTF8(av_speech_voice.language);

	data.events.insert(content::TTS_EVENT_START);
	data.events.insert(content::TTS_EVENT_END);
	data.events.insert(content::TTS_EVENT_WORD);
	data.events.insert(content::TTS_EVENT_PAUSE);
	data.events.insert(content::TTS_EVENT_RESUME);
	}

	return voices;
	}

	AVSpeechUtterance* MakeUtterance(int utterance_id,
	const std::string& utterance_string) {
	AVSpeechUtterance* utterance = [AVSpeechUtterance
	speechUtteranceWithString:base::SysUTF8ToNSString(utterance_string)];
	objc_setAssociatedObject(utterance, @selector(identifier), @(utterance_id),
	OBJC_ASSOCIATION_RETAIN);
	return utterance;
	}

	int GetUtteranceId(AVSpeechUtterance* utterance) {
	NSNumber* identifier = base::apple::ObjCCast<NSNumber>(
	objc_getAssociatedObject(utterance, @selector(identifier)));
	if (identifier) {
	return identifier.intValue;
	}
	return TtsPlatformImplMac::kInvalidUtteranceId;
	}

	} // namespace

	// static
	content::TtsPlatformImpl* content::TtsPlatformImpl::GetInstance() {
	return TtsPlatformImplMac::GetInstance();
	}

	TtsPlatformImplMac::~TtsPlatformImplMac() = default;

	bool TtsPlatformImplMac::PlatformImplSupported() {
	return true;
	}

	bool TtsPlatformImplMac::PlatformImplInitialized() {
	return true;
	}

	void TtsPlatformImplMac::Speak(
	int utterance_id,
	const std::string& utterance,
	const std::string& lang,
	const content::VoiceData& voice,
	const content::UtteranceContinuousParameters& params,
	base::OnceCallback<void(bool)> on_speak_finished) {
	// Parse SSML and process speech. TODO(crbug.com/40273591):
	// AVSpeechUtterance has an initializer -initWithSSMLRepresentation:. Should
	// that be used instead?
	content::TtsController::GetInstance()->StripSSML(
	utterance, base::BindOnce(&TtsPlatformImplMac::ProcessSpeech,
	base::Unretained(this), utterance_id, lang,
	voice, params, std::move(on_speak_finished)));
	}

	void TtsPlatformImplMac::ProcessSpeech(
	int utterance_id,
	const std::string& lang,
	const content::VoiceData& voice,
	const content::UtteranceContinuousParameters& params,
	base::OnceCallback<void(bool)> on_speak_finished,
	const std::string& parsed_utterance) {
	utterance_ = parsed_utterance;
	paused_ = false;
	utterance_id_ = utterance_id;

	AVSpeechUtterance* speech_utterance =
	MakeUtterance(utterance_id, parsed_utterance);
	if (!speech_utterance) {
	std::move(on_speak_finished).Run(false);
	return;
	}

	speech_utterance.voice = [AVSpeechSynthesisVoice
	voiceWithIdentifier:base::SysUTF8ToNSString(
	voice.native_voice_identifier)];

	if (params.rate >= 0.0) {
	// The two relevant APIs have different ranges:
	// - Web Speech API is [.1, 10] with default 1
	// - AVSpeechSynthesizer is [0, 1] with default .5
	//
	// Speeds in the Web Speech API other than 1 (the default rate) are meant to
	// be multiples of the default speaking rate.
	//
	// The mapping of AVSpeechSynthesizer speeds was done experimentally, using
	// the fourth paragraph of _A Tale of Two Cities_. With the "Samantha"
	// voice, AVSpeechUtteranceDefaultSpeechRate takes about 80s to read the
	// paragraph, while AVSpeechUtteranceMaximumSpeechRate takes about 20s.
	// Therefore, map
	//
	// 1 → AVSpeechUtteranceDefaultSpeechRate
	// 4 → AVSpeechUtteranceMaximumSpeechRate
	//
	// and cap anything higher.
	//
	// References:
	//
	// https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesisUtterance/rate
	// https://github.com/WebKit/WebKit/blob/main/Source/WebCore/platform/cocoa/PlatformSpeechSynthesizerCocoa.mm
	// ^ This is the WebKit implementation. It appears to have a bug in
	// scaling, where a Web Speech API rate of 2 is scaled to
	// AVSpeechUtteranceMaximumSpeechRate and the value passed to the
	// AVSpeechSynthesizer goes up from there. A bug was filed about this:
	// https://bugs.webkit.org/show_bug.cgi?id=258587

	float rate = params.rate;
	if (rate < 1) {
	// If a slower than normal rate is requested, scale the default speech
	// rate down proportionally.
	rate *= AVSpeechUtteranceDefaultSpeechRate;
	} else {
	// Scale the AVSpeech rate headroom proportionally to match the excess
	// above 1 in the Speech API, capping at a Web Speech API value of 4.
	const float kWebSpeechDefault = 1;
	const float kWebSpeechMaxSupported = 4;
	const float kAVSpeechRateHeadroom = AVSpeechUtteranceMaximumSpeechRate -
	AVSpeechUtteranceDefaultSpeechRate;
	const float excess = rate - kWebSpeechDefault;
	const float capped_excess =
	std::min(excess, (kWebSpeechMaxSupported - kWebSpeechDefault));
	const float headroom_proportion =
	capped_excess / (kWebSpeechMaxSupported - kWebSpeechDefault);
	rate = AVSpeechUtteranceDefaultSpeechRate +
	headroom_proportion * kAVSpeechRateHeadroom;
	}

	speech_utterance.rate = rate;
	}

	if (params.pitch >= 0.0) {
	speech_utterance.pitchMultiplier = params.pitch;
	}

	if (params.volume >= 0.0) {
	speech_utterance.volume = params.volume;
	}

	[speech_synthesizer_ speakUtterance:speech_utterance];
	std::move(on_speak_finished).Run(true);
	}

	bool TtsPlatformImplMac::StopSpeaking() {
	[speech_synthesizer_ stopSpeakingAtBoundary:AVSpeechBoundaryImmediate];
	paused_ = false;
	return true;
	}

	void TtsPlatformImplMac::Pause() {
	if (!paused_) {
	[speech_synthesizer_ pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
	paused_ = true;
	content::TtsController::GetInstance()->OnTtsEvent(
	utterance_id_, content::TTS_EVENT_PAUSE, last_char_index_, kNoLength,
	kNoError);
	}
	}

	void TtsPlatformImplMac::Resume() {
	if (paused_) {
	[speech_synthesizer_ continueSpeaking];
	paused_ = false;
	content::TtsController::GetInstance()->OnTtsEvent(
	utterance_id_, content::TTS_EVENT_RESUME, last_char_index_, kNoLength,
	kNoError);
	}
	}

	bool TtsPlatformImplMac::IsSpeaking() {
	return speech_synthesizer_.speaking;
	}

	void TtsPlatformImplMac::GetVoices(std::vector<content::VoiceData>* outVoices) {
	*outVoices = Voices();
	}

	void TtsPlatformImplMac::OnSpeechEvent(int utterance_id,
	content::TtsEventType event_type,
	int char_index,
	int char_length,
	const std::string& error_message) {
	// Don't send events from an utterance that's already completed.
	if (utterance_id != utterance_id_) {
	return;
	}

	if (event_type == content::TTS_EVENT_END) {
	char_index = utterance_.size();
	}

	content::TtsController::GetInstance()->OnTtsEvent(
	utterance_id_, event_type, char_index, char_length, error_message);
	last_char_index_ = char_index;
	}

	TtsPlatformImplMac::TtsPlatformImplMac()
	: speech_synthesizer_([[AVSpeechSynthesizer alloc] init]),
	delegate_([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]) {
	speech_synthesizer_.delegate = delegate_;
	}

	// static
	TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
	static base::NoDestructor<TtsPlatformImplMac> tts_platform;
	return tts_platform.get();
	}

	// static
	std::vector<content::VoiceData>& TtsPlatformImplMac::VoicesRefForTesting() {
	return VoicesRef();
	}

	@implementation ChromeTtsDelegate {
	raw_ptr<TtsPlatformImplMac> _ttsImplMac; // weak.
	}

	- (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
	if ((self = [super init])) {
	_ttsImplMac = ttsImplMac;
	}
	return self;
	}

	- (void)speechSynthesizer:(AVSpeechSynthesizer*)synthesizer
	didStartSpeechUtterance:(AVSpeechUtterance*)utterance {
	_ttsImplMac->OnSpeechEvent(GetUtteranceId(utterance),
	content::TTS_EVENT_START, /char_index=/0,
	kNoLength, kNoError);
	}

	- (void)speechSynthesizer:(AVSpeechSynthesizer*)synthesizer
	didFinishSpeechUtterance:(AVSpeechUtterance*)utterance {
	_ttsImplMac->OnSpeechEvent(GetUtteranceId(utterance), content::TTS_EVENT_END,
	/char_index=/0, kNoLength, kNoError);
	}

	- (void)speechSynthesizer:(AVSpeechSynthesizer*)synthesizer
	willSpeakRangeOfSpeechString:(NSRange)characterRange
	utterance:(AVSpeechUtterance*)utterance {
	// Ignore bogus ranges. The Mac speech synthesizer is a bit buggy and
	// occasionally returns a number way out of range.
	if (characterRange.location > utterance.speechString.length \|\|
	characterRange.length == 0) {
	return;
	}
	_ttsImplMac->OnSpeechEvent(GetUtteranceId(utterance), content::TTS_EVENT_WORD,
	characterRange.location, characterRange.length,
	kNoError);
	}

	@end