chrome-extension/espeak_tts_engine.js - chromiumos/third_party/espeak-ng - Git at Google

 // Copyright 2018 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a the GPL license that can be
 // found in the LICENSE file.

 /**
  * @fileoverview eSpeak text-to-speech extension.
  *
  * There are three components to the extension:
  *
  * The open-source eSpeak-NG engine is compiled using Emscripten into a
  * WebAssembly module that's run in a web worker. The interface to it is
  * stored in this.tts_. This class sends it asynchronous queries for
  * a list of voices and commands to start generating speech. Callbacks
  * from the engine queue up events like word callbacks, and generate
  * audio data.
  *
  * The implementation of the chrome.ttsEngine API is how we get
  * commands to speak and stop. We also use this API to publish the
  * current set of enabled voices.
  *
  * Finally, the audio data generated by the engine is output using
  * a private mojo interface. This is an intermediate solution as will
  * serve as motivation for a new chrome.TtsEngine api.
  */
 class EspeakTtsEngine {
   constructor() {
     /**
      * The callback provided by a call to chrome.ttsEngine.onSpeak, which is
      * our channel to send status updates about the progress of speech.
      */
     this.ttsEngineApiCallback_ = null;

     /**
      * The string containing the text we're currently speaking.
      */
     this.utterance_ = null;

     /**
      * An id to track the current utterance to, for example, ensure audio data
      * for previous utterances is ignored.
      */
     this.utteranceId_ = 0;

     /**
      * The handle to the WASM Espeak-NG module.
      */
     this.tts_ = null;

     /** Sample rate requested for Espeak to generate audio. */
     this.sampleRate_;

     /**
      * The size, in samples, of each audio frame. When using an
      * AudioWorkletProcessor this is always 128 samples.
      */
     this.bufferLen_;

     /**
      * The current buffer of audio samples from the engine. When each
      * buffer fills up it's sent to the audio worklet processor thread.
      */
     this.currentBuffer_ = null;

     /**
      * The current zero-based index into |this.currentBuffer_| where the
      * next audio sample should be written.
      */
     this.currentBufferIndex_ = 0;

     /**
      * Whether or not we're currently playing audio.
      * This is not set to true until after we've got at least one
      * buffer worth of samples from the engine and sent it to be played.
      */
     this.playing_ = false;

     /**
      * The voice data for the languages that are supported by eSpeak
      * and enabled by the user. The elements of this array are in the
      * format needed by the chrome.ttsEngine API.
      */
     this.ttsEngineApiVoices_ = [];

     /**
      * The data about each enabled language in eSpeak's format.
      * Used when we get a speech request that specifies a language
      * code but not a specific complete voice name.
      */
     this.internalVoiceInfos_ = [];

     /**
      * The data about every supported eSpeak language, in eSpeak's
      * format. Used by the options page to present all languages to
      * the user and let them choose which ones to enable.
      */
     this.langInfoArray_ = [];

     /**
      * Function to call when langInfoArray_ is populated.
      */
     this.pendingGetLangInfoArrayCallback_ = null;

     /**
      * The default voice to use when no metadata is provided at all.
      */
     this.defaultEspeakVoiceName_ = null;

     /**
      * The timestamp when speech started. Used to fire events like
      * word callbacks.
      */
     this.startTime_ = null;

     /**
      * Callback to run after initialization.
      */
     this.pendingCallback_ = null;

     /**
      * True if initialization is done.
      */
     this.initialized_ = false;

     // Initialize the WASM module and call updateVoices when it's ready.
     this.tts_ = new eSpeakNG('js/espeakng.worker.js', (function() {
                                                         // This will call
                                                         // this.finishInitialization_()
                                                         // when done, which will
                                                         // initialize the audio
                                                         // worklet.
                                                         this.updateVoices();
                                                       }).bind(this));

     // Start listening to chrome.ttsEngine requests.
     chrome.ttsEngine.onSpeak.addListener(this.onSpeak.bind(this));
     chrome.ttsEngine.onStop.addListener(this.onStop.bind(this));
   }

   /**
    * Return the array of all languages supported by eSpeak. Called by
    * the options page.
    */
   getLangInfoArray(callback) {
     if (this.langInfoArray_.length > 0) {
       callback(this.langInfoArray_);
       return;
     }

     this.pendingGetLangInfoArrayCallback_ = callback;
   };

   /**
    * Function called on startup and when the set of enabled voices
    * may have changed.
    */
   updateVoices() {
     console.log('updateVoices');
     this.tts_.list_voices(
         (function(langInfoArray) {
           this.langInfoArray_ = langInfoArray;
           // Remove Japanese because the voice cannot pronounce many characters.
           for (var i = 0; i < this.langInfoArray_.length; ++i) {
             if (this.langInfoArray_[i].identifier === 'jpx/ja') {
               this.langInfoArray_.splice(i, 1);
               break;
             }
           }
           this.ttsEngineApiVoices_ = [];
           this.internalVoiceInfos_ = [];
           this.langsRemaining_ = this.langInfoArray_.length;
           this.langInfoArray_.forEach(
               (function(langInfo) {
                 isEspeakLanguageEnabled(
                     langInfo,
                     (function(enabled) {
                       if (enabled) {
                         var voiceName = 'eSpeak ' + langInfo.name;
                         var ttsEngineApiVoice = {
                           voiceName: voiceName,
                           lang: langInfo.languages[0].name,
                           remote: false,
                           eventTypes:
                               ['start', 'end', 'word', 'sentence', 'error']
                         };
                         this.ttsEngineApiVoices_.push(ttsEngineApiVoice);

                         var internalVoiceInfo = {
                           voiceName: voiceName,
                           espeakVoiceName: langInfo.name,
                           languages: langInfo.languages
                         };
                         this.internalVoiceInfos_.push(internalVoiceInfo);
                       }

                       this.langsRemaining_--;
                       if (this.langsRemaining_ == 0) {
                         this.finishInitialization_();
                       }
                     }).bind(this));
               }).bind(this));
           if (this.pendingGetLangInfoArrayCallback_) {
             this.pendingGetLangInfoArrayCallback_(this.langInfoArray_);
             this.pendingGetLangInfoArrayCallback_ = null;
           }
         }).bind(this));
   };

   /**
    * Called after asynchronously getting the list of languages
    * supported by eSpeak and filtering it to only the ones
    * currently enabled and converting that to the format expected
    * by the chrome.ttsEngine API.
    *
    * Calls chrome.ttsEngine.updateVoices to advertise the set of
    * voices currently enabled, and updates the default voice to
    * be used if someone requests speech without specifying any
    * other metadata.
    */
   finishUpdatingVoices_() {
     console.log('finishUpdatingVoices_');
     chrome.ttsEngine.updateVoices(this.ttsEngineApiVoices_);
     console.log('Loaded ' + this.ttsEngineApiVoices_.length + ' voices');

     this.defaultEspeakVoiceName_ =
         this.getBestEspeakVoice('', navigator.language);
   };

   /**
    * Finish initialization by initializing the playback system.
    * This is the final step, then we can start speaking.
    */
   finishInitialization_() {
     chrome.mojoPrivate.requireAsync('chromeos.tts.stream_factory')
         .then(factory => {
           factory.createPlaybackTtsStream().then(result => {
             this.ttsStream_ = result.stream;
             this.sampleRate_ = result.sampleRate;
             this.bufferLen_ = result.bufferSize;

             this.finishUpdatingVoices_();

             // Initialization is now complete.
             this.initialized_ = true;
             if (this.pendingCallback_) {
               this.pendingCallback_();
               this.pendingCallback_ = null;
             }
           });
         });
   }

   /**
    * Called by the client to stop speech.
    */
   onStop() {
     this.ttsStream_.stop();
     this.onStopInternal();
   }

   onStopInternal() {
     this.pendingCallback_ = null;

     this.playing_ = false;
     this.ttsEngineApiCallback_ = null;
   };

   /**
    * Called by the client to start speech synthesis.
    *
    * @param {string} utterance The utterance to say.
    * @param {object} options The options affecting the speech, like language,
    *   pitch, rate, etc.
    * @param {function(object)} callback The function to receive messages from
    *     the
    * engine.
    */
   onSpeak(utterance, options, callback) {
     if (this.ttsStream_ && !this.ttsStream_.ptr.isBound()) {
       window.close();
       return;
     }

     console.log('Will speak: "' + utterance + '" lang="' + options.lang + '"');
     this.utteranceId_++;
     this.pendingCallback_ = null;

     if (!this.initialized_) {
       // We got a call to speak before we're initialized. Enqueue this until
       // we're ready.
       this.pendingCallback_ =
           this.onSpeak.bind(this, utterance, options, callback);
       return;
     }

     this.ttsEngineApiCallback_ = callback;
     this.utterance_ = utterance;

     var espeakVoiceName =
         this.getBestEspeakVoice(options.voiceName, options.lang);
     this.tts_.set_voice(espeakVoiceName);

     this.tts_.set_systemSampleRate(this.sampleRate_);

     // Chrome TTS rates range from 0.1 to 10.0, with 1.0 as the default.
     // eSpeak rates range from 80 to 450, with 175 as the default.
     var rate = Math.min(Math.max(Math.floor(options.rate * 175), 80), 450);
     this.tts_.set_rate(rate);

     // Chrome TTS pitches range from 0.0 to 2.0, with 1.0 as the default.
     // eSpeak pitches range from 0 to 99, with 50 as the default.
     var pitch = Math.min(Math.max(Math.floor(options.pitch * 50), 0), 99);
     this.tts_.set_pitch(pitch);

     var volume = Math.min(Math.max(options.volume, 0.0), 1.0);
     this.ttsStream_.setVolume(volume);
     const owner = this;
     this.ttsStream_.play().then(ttsEventObserver => {
       new mojo.Binding(chromeos.tts.mojom.TtsEventObserver, new class {
         onStart() {
           owner.scheduleTimepointEvent(0, 'start');
         }

         onTimepoint(charIndex) {
           owner.scheduleTimepointEvent(charIndex, 'word');
         }

         onEnd() {
           owner.scheduleTimepointEvent(owner.utterance_.length, 'end');
         }

         onError() {}
       }(), ttsEventObserver.eventObserver);
     });

     var handleEvents =
         function(utteranceId, samples, events) {
       if (this.utteranceId_ != utteranceId) {
         return;
       }

       // 'end' type events can still be followed by audio data. This is a more
       // reliable way the native code signals end.
       let isEnd = events === undefined;
       let charIndex = -1;

       if (events) {
         for (const event of events) {
           charIndex = event.text_position;

           switch (event.type) {
             case 'sentence':
             case 'word':
             case 'end':
               break;
           }
         }
       }
       var sampleArray = new Float32Array(samples);
       this.processSamples(sampleArray, charIndex, isEnd);
     }

         this.tts_.synthesize(
             utterance, (handleEvents).bind(this, this.utteranceId_));
   };

   /**
    * Given a voice name and language, determine the best eSpeak voice
    * to use. Sometimes a partial match needs to be used, for example
    * because the language requested is just 'en', but eSpeak has
    * 'en-US', 'en-GB', etc.
    */
   getBestEspeakVoice(desiredVoiceName, desiredLang) {
     var exactMatchEspeakVoiceName = null;
     var langMatchEspeakVoiceName = null;
     var langMatch = '';
     this.internalVoiceInfos_.forEach(
         (function(voice) {
           if (desiredVoiceName == voice.voiceName)
             exactMatchEspeakVoiceName = voice.espeakVoiceName;
           voice.languages.forEach(
               (function(lang) {
                 if (desiredLang.toLowerCase() == lang.name.toLowerCase() &&
                     lang.name.length > langMatch.length) {
                   langMatch = lang.name;
                   langMatchEspeakVoiceName = voice.voiceName;
                 }
               }).bind(this));
         }).bind(this));

     if (exactMatchEspeakVoiceName) return exactMatchEspeakVoiceName;

     if (langMatchEspeakVoiceName) return langMatchEspeakVoiceName;

     return this.defaultEspeakVoiceName_;
   };

   /**
    * Called when we get samples back from the WebAssembly TTS engine.
    * Fills up buffers with samples, and as each buffer fills, sends it to be
    * played.
    *
    * If these are the first samples for a new utterance, starts speech
    * by connceting the audio node.
    */
   processSamples(samples, charIndex, isEnd) {
     if (!this.ttsEngineApiCallback_) {
       return;
     }

     var i = 0;
     var len = samples.length;

     var didSendBuffers = false;
     while (i < len) {
       var chunkLen =
           Math.min(this.bufferLen_ - this.currentBufferIndex_, len - i);
       if (!this.currentBuffer_) {
         this.currentBuffer_ = new Float32Array(this.bufferLen_);
       }
       this.currentBuffer_.set(
           samples.subarray(i, i + chunkLen), this.currentBufferIndex_);
       i += chunkLen;
       this.currentBufferIndex_ += chunkLen;
       if (this.currentBufferIndex_ == this.bufferLen_) {
         this.ttsStream_.sendAudioBuffer(this.currentBuffer_, charIndex, isEnd);
         didSendBuffers = true;
         charIndex = -1;

         this.currentBufferIndex_ = 0;
         this.currentBuffer_ = null;
       }
     }

     // Push final buffer, not complete.
     if (isEnd && this.currentBufferIndex_ > 0) {
       this.ttsStream_.sendAudioBuffer(this.currentBuffer_, charIndex, isEnd);
       didSendBuffers = true;
       this.currentBufferIndex_ = 0;
       this.currentBuffer_ = null;
     }

     // Connect and start playback if this is the
     // first buffer.
     if (didSendBuffers && !this.playing_) {
       this.playing_ = true;
       this.startTime_ = new Date();
     }
   };

   /**
    * Schedules an event to be fired indicating progress of speech synthesis.
    */
   scheduleTimepointEvent(textPosition, eventType) {
     if (!this.ttsEngineApiCallback_) {
       return;
     }

     if (textPosition < 0) {
       return;
     }
     this.ttsEngineApiCallback_({'type': eventType, 'charIndex': textPosition});
   }
 }

 // In 'split' manifest mode, the extension system runs two copies of the
 // extension. One in an incognito context; the other not. In guest mode, the
 // extension system runs only the extension in an incognito context. To prevent
 // doubling of this extension, only continue for one context.
 const manifest =
     /** @type {{incognito: (string|undefined)}} */ (
         chrome.runtime.getManifest());
 if (manifest.incognito == 'split' && !chrome.extension.inIncognitoContext) {
   window.close();
 } else {
   window.engine = new EspeakTtsEngine();
 }
	// Copyright 2018 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a the GPL license that can be
	// found in the LICENSE file.

	/**
	* @fileoverview eSpeak text-to-speech extension.
	*
	* There are three components to the extension:
	*
	* The open-source eSpeak-NG engine is compiled using Emscripten into a
	* WebAssembly module that's run in a web worker. The interface to it is
	* stored in this.tts_. This class sends it asynchronous queries for
	* a list of voices and commands to start generating speech. Callbacks
	* from the engine queue up events like word callbacks, and generate
	* audio data.
	*
	* The implementation of the chrome.ttsEngine API is how we get
	* commands to speak and stop. We also use this API to publish the
	* current set of enabled voices.
	*
	* Finally, the audio data generated by the engine is output using
	* a private mojo interface. This is an intermediate solution as will
	* serve as motivation for a new chrome.TtsEngine api.
	*/
	class EspeakTtsEngine {
	constructor() {
	/**
	* The callback provided by a call to chrome.ttsEngine.onSpeak, which is
	* our channel to send status updates about the progress of speech.
	*/
	this.ttsEngineApiCallback_ = null;

	/**
	* The string containing the text we're currently speaking.
	*/
	this.utterance_ = null;

	/**
	* An id to track the current utterance to, for example, ensure audio data
	* for previous utterances is ignored.
	*/
	this.utteranceId_ = 0;

	/**
	* The handle to the WASM Espeak-NG module.
	*/
	this.tts_ = null;

	/** Sample rate requested for Espeak to generate audio. */
	this.sampleRate_;

	/**
	* The size, in samples, of each audio frame. When using an
	* AudioWorkletProcessor this is always 128 samples.
	*/
	this.bufferLen_;

	/**
	* The current buffer of audio samples from the engine. When each
	* buffer fills up it's sent to the audio worklet processor thread.
	*/
	this.currentBuffer_ = null;

	/**
	* The current zero-based index into \|this.currentBuffer_\| where the
	* next audio sample should be written.
	*/
	this.currentBufferIndex_ = 0;

	/**
	* Whether or not we're currently playing audio.
	* This is not set to true until after we've got at least one
	* buffer worth of samples from the engine and sent it to be played.
	*/
	this.playing_ = false;

	/**
	* The voice data for the languages that are supported by eSpeak
	* and enabled by the user. The elements of this array are in the
	* format needed by the chrome.ttsEngine API.
	*/
	this.ttsEngineApiVoices_ = [];

	/**
	* The data about each enabled language in eSpeak's format.
	* Used when we get a speech request that specifies a language
	* code but not a specific complete voice name.
	*/
	this.internalVoiceInfos_ = [];

	/**
	* The data about every supported eSpeak language, in eSpeak's
	* format. Used by the options page to present all languages to
	* the user and let them choose which ones to enable.
	*/
	this.langInfoArray_ = [];

	/**
	* Function to call when langInfoArray_ is populated.
	*/
	this.pendingGetLangInfoArrayCallback_ = null;

	/**
	* The default voice to use when no metadata is provided at all.
	*/
	this.defaultEspeakVoiceName_ = null;

	/**
	* The timestamp when speech started. Used to fire events like
	* word callbacks.
	*/
	this.startTime_ = null;

	/**
	* Callback to run after initialization.
	*/
	this.pendingCallback_ = null;

	/**
	* True if initialization is done.
	*/
	this.initialized_ = false;

	// Initialize the WASM module and call updateVoices when it's ready.
	this.tts_ = new eSpeakNG('js/espeakng.worker.js', (function() {
	// This will call
	// this.finishInitialization_()
	// when done, which will
	// initialize the audio
	// worklet.
	this.updateVoices();
	}).bind(this));

	// Start listening to chrome.ttsEngine requests.
	chrome.ttsEngine.onSpeak.addListener(this.onSpeak.bind(this));
	chrome.ttsEngine.onStop.addListener(this.onStop.bind(this));
	}

	/**
	* Return the array of all languages supported by eSpeak. Called by
	* the options page.
	*/
	getLangInfoArray(callback) {
	if (this.langInfoArray_.length > 0) {
	callback(this.langInfoArray_);
	return;
	}

	this.pendingGetLangInfoArrayCallback_ = callback;
	};

	/**
	* Function called on startup and when the set of enabled voices
	* may have changed.
	*/
	updateVoices() {
	console.log('updateVoices');
	this.tts_.list_voices(
	(function(langInfoArray) {
	this.langInfoArray_ = langInfoArray;
	// Remove Japanese because the voice cannot pronounce many characters.
	for (var i = 0; i < this.langInfoArray_.length; ++i) {
	if (this.langInfoArray_[i].identifier === 'jpx/ja') {
	this.langInfoArray_.splice(i, 1);
	break;
	}
	}
	this.ttsEngineApiVoices_ = [];
	this.internalVoiceInfos_ = [];
	this.langsRemaining_ = this.langInfoArray_.length;
	this.langInfoArray_.forEach(
	(function(langInfo) {
	isEspeakLanguageEnabled(
	langInfo,
	(function(enabled) {
	if (enabled) {
	var voiceName = 'eSpeak ' + langInfo.name;
	var ttsEngineApiVoice = {
	voiceName: voiceName,
	lang: langInfo.languages[0].name,
	remote: false,
	eventTypes:
	['start', 'end', 'word', 'sentence', 'error']
	};
	this.ttsEngineApiVoices_.push(ttsEngineApiVoice);

	var internalVoiceInfo = {
	voiceName: voiceName,
	espeakVoiceName: langInfo.name,
	languages: langInfo.languages
	};
	this.internalVoiceInfos_.push(internalVoiceInfo);
	}

	this.langsRemaining_--;
	if (this.langsRemaining_ == 0) {
	this.finishInitialization_();
	}
	}).bind(this));
	}).bind(this));
	if (this.pendingGetLangInfoArrayCallback_) {
	this.pendingGetLangInfoArrayCallback_(this.langInfoArray_);
	this.pendingGetLangInfoArrayCallback_ = null;
	}
	}).bind(this));
	};

	/**
	* Called after asynchronously getting the list of languages
	* supported by eSpeak and filtering it to only the ones
	* currently enabled and converting that to the format expected
	* by the chrome.ttsEngine API.
	*
	* Calls chrome.ttsEngine.updateVoices to advertise the set of
	* voices currently enabled, and updates the default voice to
	* be used if someone requests speech without specifying any
	* other metadata.
	*/
	finishUpdatingVoices_() {
	console.log('finishUpdatingVoices_');
	chrome.ttsEngine.updateVoices(this.ttsEngineApiVoices_);
	console.log('Loaded ' + this.ttsEngineApiVoices_.length + ' voices');

	this.defaultEspeakVoiceName_ =
	this.getBestEspeakVoice('', navigator.language);
	};

	/**
	* Finish initialization by initializing the playback system.
	* This is the final step, then we can start speaking.
	*/
	finishInitialization_() {
	chrome.mojoPrivate.requireAsync('chromeos.tts.stream_factory')
	.then(factory => {
	factory.createPlaybackTtsStream().then(result => {
	this.ttsStream_ = result.stream;
	this.sampleRate_ = result.sampleRate;
	this.bufferLen_ = result.bufferSize;

	this.finishUpdatingVoices_();

	// Initialization is now complete.
	this.initialized_ = true;
	if (this.pendingCallback_) {
	this.pendingCallback_();
	this.pendingCallback_ = null;
	}
	});
	});
	}

	/**
	* Called by the client to stop speech.
	*/
	onStop() {
	this.ttsStream_.stop();
	this.onStopInternal();
	}

	onStopInternal() {
	this.pendingCallback_ = null;

	this.playing_ = false;
	this.ttsEngineApiCallback_ = null;
	};

	/**
	* Called by the client to start speech synthesis.
	*
	* @param {string} utterance The utterance to say.
	* @param {object} options The options affecting the speech, like language,
	* pitch, rate, etc.
	* @param {function(object)} callback The function to receive messages from
	* the
	* engine.
	*/
	onSpeak(utterance, options, callback) {
	if (this.ttsStream_ && !this.ttsStream_.ptr.isBound()) {
	window.close();
	return;
	}

	console.log('Will speak: "' + utterance + '" lang="' + options.lang + '"');
	this.utteranceId_++;
	this.pendingCallback_ = null;

	if (!this.initialized_) {
	// We got a call to speak before we're initialized. Enqueue this until
	// we're ready.
	this.pendingCallback_ =
	this.onSpeak.bind(this, utterance, options, callback);
	return;
	}

	this.ttsEngineApiCallback_ = callback;
	this.utterance_ = utterance;

	var espeakVoiceName =
	this.getBestEspeakVoice(options.voiceName, options.lang);
	this.tts_.set_voice(espeakVoiceName);

	this.tts_.set_systemSampleRate(this.sampleRate_);

	// Chrome TTS rates range from 0.1 to 10.0, with 1.0 as the default.
	// eSpeak rates range from 80 to 450, with 175 as the default.
	var rate = Math.min(Math.max(Math.floor(options.rate * 175), 80), 450);
	this.tts_.set_rate(rate);

	// Chrome TTS pitches range from 0.0 to 2.0, with 1.0 as the default.
	// eSpeak pitches range from 0 to 99, with 50 as the default.
	var pitch = Math.min(Math.max(Math.floor(options.pitch * 50), 0), 99);
	this.tts_.set_pitch(pitch);

	var volume = Math.min(Math.max(options.volume, 0.0), 1.0);
	this.ttsStream_.setVolume(volume);
	const owner = this;
	this.ttsStream_.play().then(ttsEventObserver => {
	new mojo.Binding(chromeos.tts.mojom.TtsEventObserver, new class {
	onStart() {
	owner.scheduleTimepointEvent(0, 'start');
	}

	onTimepoint(charIndex) {
	owner.scheduleTimepointEvent(charIndex, 'word');
	}

	onEnd() {
	owner.scheduleTimepointEvent(owner.utterance_.length, 'end');
	}

	onError() {}
	}(), ttsEventObserver.eventObserver);
	});

	var handleEvents =
	function(utteranceId, samples, events) {
	if (this.utteranceId_ != utteranceId) {
	return;
	}

	// 'end' type events can still be followed by audio data. This is a more
	// reliable way the native code signals end.
	let isEnd = events === undefined;
	let charIndex = -1;

	if (events) {
	for (const event of events) {
	charIndex = event.text_position;

	switch (event.type) {
	case 'sentence':
	case 'word':
	case 'end':
	break;
	}
	}
	}
	var sampleArray = new Float32Array(samples);
	this.processSamples(sampleArray, charIndex, isEnd);
	}

	this.tts_.synthesize(
	utterance, (handleEvents).bind(this, this.utteranceId_));
	};

	/**
	* Given a voice name and language, determine the best eSpeak voice
	* to use. Sometimes a partial match needs to be used, for example
	* because the language requested is just 'en', but eSpeak has
	* 'en-US', 'en-GB', etc.
	*/
	getBestEspeakVoice(desiredVoiceName, desiredLang) {
	var exactMatchEspeakVoiceName = null;
	var langMatchEspeakVoiceName = null;
	var langMatch = '';
	this.internalVoiceInfos_.forEach(
	(function(voice) {
	if (desiredVoiceName == voice.voiceName)
	exactMatchEspeakVoiceName = voice.espeakVoiceName;
	voice.languages.forEach(
	(function(lang) {
	if (desiredLang.toLowerCase() == lang.name.toLowerCase() &&
	lang.name.length > langMatch.length) {
	langMatch = lang.name;
	langMatchEspeakVoiceName = voice.voiceName;
	}
	}).bind(this));
	}).bind(this));

	if (exactMatchEspeakVoiceName) return exactMatchEspeakVoiceName;

	if (langMatchEspeakVoiceName) return langMatchEspeakVoiceName;

	return this.defaultEspeakVoiceName_;
	};

	/**
	* Called when we get samples back from the WebAssembly TTS engine.
	* Fills up buffers with samples, and as each buffer fills, sends it to be
	* played.
	*
	* If these are the first samples for a new utterance, starts speech
	* by connceting the audio node.
	*/
	processSamples(samples, charIndex, isEnd) {
	if (!this.ttsEngineApiCallback_) {
	return;
	}

	var i = 0;
	var len = samples.length;

	var didSendBuffers = false;
	while (i < len) {
	var chunkLen =
	Math.min(this.bufferLen_ - this.currentBufferIndex_, len - i);
	if (!this.currentBuffer_) {
	this.currentBuffer_ = new Float32Array(this.bufferLen_);
	}
	this.currentBuffer_.set(
	samples.subarray(i, i + chunkLen), this.currentBufferIndex_);
	i += chunkLen;
	this.currentBufferIndex_ += chunkLen;
	if (this.currentBufferIndex_ == this.bufferLen_) {
	this.ttsStream_.sendAudioBuffer(this.currentBuffer_, charIndex, isEnd);
	didSendBuffers = true;
	charIndex = -1;

	this.currentBufferIndex_ = 0;
	this.currentBuffer_ = null;
	}
	}

	// Push final buffer, not complete.
	if (isEnd && this.currentBufferIndex_ > 0) {
	this.ttsStream_.sendAudioBuffer(this.currentBuffer_, charIndex, isEnd);
	didSendBuffers = true;
	this.currentBufferIndex_ = 0;
	this.currentBuffer_ = null;
	}

	// Connect and start playback if this is the
	// first buffer.
	if (didSendBuffers && !this.playing_) {
	this.playing_ = true;
	this.startTime_ = new Date();
	}
	};

	/**
	* Schedules an event to be fired indicating progress of speech synthesis.
	*/
	scheduleTimepointEvent(textPosition, eventType) {
	if (!this.ttsEngineApiCallback_) {
	return;
	}

	if (textPosition < 0) {
	return;
	}
	this.ttsEngineApiCallback_({'type': eventType, 'charIndex': textPosition});
	}
	}

	// In 'split' manifest mode, the extension system runs two copies of the
	// extension. One in an incognito context; the other not. In guest mode, the
	// extension system runs only the extension in an incognito context. To prevent
	// doubling of this extension, only continue for one context.
	const manifest =
	/** @type {{incognito: (string\|undefined)}} */ (
	chrome.runtime.getManifest());
	if (manifest.incognito == 'split' && !chrome.extension.inIncognitoContext) {
	window.close();
	} else {
	window.engine = new EspeakTtsEngine();
	}