| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| import 'chrome://resources/polymer/v3_0/iron-pages/iron-pages.js'; |
| import 'chrome://resources/polymer/v3_0/iron-selector/iron-selector.js'; |
| import 'chrome://resources/cr_elements/cr_button/cr_button.m.js'; |
| import 'chrome://resources/cr_elements/cr_icon_button/cr_icon_button.m.js'; |
| |
| import {PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; |
| |
| import {loadTimeData} from './i18n_setup.js'; |
| import {PageHandlerRemote} from './new_tab_page.mojom-webui.js'; |
| import {NewTabPageProxy} from './new_tab_page_proxy.js'; |
| import {getTemplate} from './voice_search_overlay.html.js'; |
| import {WindowProxy} from './window_proxy.js'; |
| |
| /** |
| * Threshold for considering an interim speech transcript result as "confident |
| * enough". The more confident the API is about a transcript, the higher the |
| * confidence (number between 0 and 1). |
| */ |
| const RECOGNITION_CONFIDENCE_THRESHOLD: number = 0.5; |
| |
| /** |
| * Maximum number of characters recognized before force-submitting a query. |
| * Includes characters of non-confident recognition transcripts. |
| */ |
| const QUERY_LENGTH_LIMIT: number = 120; |
| |
| /** |
| * Time in milliseconds to wait before closing the UI if no interaction has |
| * occurred. |
| */ |
| const IDLE_TIMEOUT_MS: number = 8000; |
| |
| /** |
| * Time in milliseconds to wait before closing the UI after an error has |
| * occurred. This is a short timeout used when no click-target is present. |
| */ |
| const ERROR_TIMEOUT_SHORT_MS: number = 9000; |
| |
| /** |
| * Time in milliseconds to wait before closing the UI after an error has |
| * occurred. This is a longer timeout used when there is a click-target is |
| * present. |
| */ |
| const ERROR_TIMEOUT_LONG_MS: number = 24000; |
| |
| // The minimum transition time for the volume rings. |
| const VOLUME_ANIMATION_DURATION_MIN_MS: number = 170; |
| |
| // The range of the transition time for the volume rings. |
| const VOLUME_ANIMATION_DURATION_RANGE_MS: number = 10; |
| |
| // The set of controller states. |
| enum State { |
| // Initial state before voice recognition has been set up. |
| UNINITIALIZED = -1, |
| // Indicates that speech recognition has started, but no audio has yet |
| // been captured. |
| STARTED = 0, |
| // Indicates that audio is being captured by the Web Speech API, but no |
| // speech has yet been recognized. UI indicates that audio is being captured. |
| AUDIO_RECEIVED = 1, |
| // Indicates that speech has been recognized by the Web Speech API, but no |
| // resulting transcripts have yet been received back. UI indicates that audio |
| // is being captured and is pulsating audio button. |
| SPEECH_RECEIVED = 2, |
| // Indicates speech has been successfully recognized and text transcripts have |
| // been reported back. UI indicates that audio is being captured and is |
| // displaying transcripts received so far. |
| RESULT_RECEIVED = 3, |
| // Indicates that speech recognition has failed due to an error (or a no match |
| // error) being received from the Web Speech API. A timeout may have occurred |
| // as well. UI displays the error message. |
| ERROR_RECEIVED = 4, |
| // Indicates speech recognition has received a final search query but the UI |
| // has not yet redirected. The UI is displaying the final query. |
| RESULT_FINAL = 5, |
| } |
| |
| /** |
| * Action the user can perform while using voice search. This enum must match |
| * the numbering for NewTabPageVoiceAction in enums.xml. These values are |
| * persisted to logs. Entries should not be renumbered, removed or reused. |
| */ |
| export enum Action { |
| ACTIVATE_SEARCH_BOX = 0, |
| ACTIVATE_KEYBOARD = 1, |
| CLOSE_OVERLAY = 2, |
| QUERY_SUBMITTED = 3, |
| SUPPORT_LINK_CLICKED = 4, |
| TRY_AGAIN_LINK = 5, |
| TRY_AGAIN_MIC_BUTTON = 6, |
| } |
| |
| /** |
| * Errors than can occur while using voice search. This enum must match the |
| * numbering for NewTabPageVoiceError in enums.xml. These values are persisted |
| * to logs. Entries should not be renumbered, removed or reused. |
| */ |
| export enum Error { |
| ABORTED = 0, |
| AUDIO_CAPTURE = 1, |
| BAD_GRAMMAR = 2, |
| LANGUAGE_NOT_SUPPORTED = 3, |
| NETWORK = 4, |
| NO_MATCH = 5, |
| NO_SPEECH = 6, |
| NOT_ALLOWED = 7, |
| OTHER = 8, |
| SERVICE_NOT_ALLOWED = 9, |
| } |
| |
| export function recordVoiceAction(action: Action) { |
| chrome.metricsPrivate.recordEnumerationValue( |
| 'NewTabPage.VoiceActions', action, Object.keys(Action).length); |
| } |
| |
| /** |
| * Returns the error type based on the error string received from the webkit |
| * speech recognition API. |
| * @param webkitError The error string received from the webkit speech |
| * recognition API. |
| * @return The appropriate error state from the Error enum. |
| */ |
| function toError(webkitError: string): Error { |
| switch (webkitError) { |
| case 'aborted': |
| return Error.ABORTED; |
| case 'audio-capture': |
| return Error.AUDIO_CAPTURE; |
| case 'language-not-supported': |
| return Error.LANGUAGE_NOT_SUPPORTED; |
| case 'network': |
| return Error.NETWORK; |
| case 'no-speech': |
| return Error.NO_SPEECH; |
| case 'not-allowed': |
| return Error.NOT_ALLOWED; |
| case 'service-not-allowed': |
| return Error.SERVICE_NOT_ALLOWED; |
| case 'bad-grammar': |
| return Error.BAD_GRAMMAR; |
| default: |
| return Error.OTHER; |
| } |
| } |
| |
| /** |
| * Returns a timeout based on the error received from the webkit speech |
| * recognition API. |
| * @param error An error from the Error enum. |
| * @return The appropriate timeout in MS for displaying the error. |
| */ |
| function getErrorTimeout(error: Error): number { |
| switch (error) { |
| case Error.AUDIO_CAPTURE: |
| case Error.NO_SPEECH: |
| case Error.NOT_ALLOWED: |
| case Error.NO_MATCH: |
| return ERROR_TIMEOUT_LONG_MS; |
| default: |
| return ERROR_TIMEOUT_SHORT_MS; |
| } |
| } |
| |
| // TODO(crbug.com/570968): Remove when bug is fixed. |
| declare global { |
| interface Window { |
| webkitSpeechRecognition: typeof SpeechRecognition; |
| } |
| } |
| |
| export interface VoiceSearchOverlayElement { |
| $: { |
| dialog: HTMLDialogElement, |
| micContainer: HTMLElement, |
| micVolume: HTMLElement, |
| }; |
| } |
| |
| // Overlay that lats the user perform voice searches. |
| export class VoiceSearchOverlayElement extends PolymerElement { |
| static get is() { |
| return 'ntp-voice-search-overlay'; |
| } |
| |
| static get template() { |
| return getTemplate(); |
| } |
| |
| static get properties() { |
| return { |
| interimResult_: String, |
| finalResult_: String, |
| |
| state_: { |
| type: Number, |
| value: State.UNINITIALIZED, |
| }, |
| |
| error_: Number, |
| |
| helpUrl_: { |
| type: String, |
| readOnly: true, |
| value: `https://support.google.com/chrome/?` + |
| `p=ui_voice_search&hl=${window.navigator.language}`, |
| }, |
| |
| micVolumeLevel_: { |
| type: Number, |
| value: 0, |
| }, |
| |
| micVolumeDuration_: { |
| type: Number, |
| value: VOLUME_ANIMATION_DURATION_MIN_MS, |
| }, |
| }; |
| } |
| |
| private interimResult_: string; |
| private finalResult_: string; |
| private state_: State; |
| private error_: Error; |
| private helpUrl_: string; |
| private micVolumeLevel_: number; |
| private micVolumeDuration_: number; |
| |
| private pageHandler_: PageHandlerRemote; |
| private voiceRecognition_: SpeechRecognition; |
| private timerId_: number|null = null; |
| |
| constructor() { |
| super(); |
| this.pageHandler_ = NewTabPageProxy.getInstance().handler; |
| this.voiceRecognition_ = new window.webkitSpeechRecognition(); |
| this.voiceRecognition_.continuous = false; |
| this.voiceRecognition_.interimResults = true; |
| this.voiceRecognition_.lang = window.navigator.language; |
| this.voiceRecognition_.onaudiostart = this.onAudioStart_.bind(this); |
| this.voiceRecognition_.onspeechstart = this.onSpeechStart_.bind(this); |
| this.voiceRecognition_.onresult = this.onResult_.bind(this); |
| this.voiceRecognition_.onend = this.onEnd_.bind(this); |
| this.voiceRecognition_.onerror = (e) => { |
| this.onError_(toError(e.error)); |
| }; |
| this.voiceRecognition_.onnomatch = () => { |
| this.onError_(Error.NO_MATCH); |
| }; |
| } |
| |
| override connectedCallback() { |
| super.connectedCallback(); |
| this.$.dialog.showModal(); |
| this.start(); |
| } |
| |
| private start() { |
| this.voiceRecognition_.start(); |
| this.state_ = State.STARTED; |
| this.resetIdleTimer_(); |
| } |
| |
| private onOverlayClose_() { |
| this.voiceRecognition_.abort(); |
| this.dispatchEvent(new Event('close')); |
| } |
| |
| private onOverlayClick_() { |
| this.$.dialog.close(); |
| recordVoiceAction(Action.CLOSE_OVERLAY); |
| } |
| |
| /** |
| * Handles <ENTER> or <SPACE> to trigger a query if we have recognized speech. |
| */ |
| private onOverlayKeydown_(e: KeyboardEvent) { |
| if (['Enter', ' '].includes(e.key) && this.finalResult_) { |
| this.onFinalResult_(); |
| } else if (e.key === 'Escape') { |
| this.onOverlayClick_(); |
| } |
| } |
| |
| /** |
| * Handles <ENTER> or <SPACE> to simulate click. |
| */ |
| private onLinkKeydown_(e: KeyboardEvent) { |
| if (!['Enter', ' '].includes(e.key)) { |
| return; |
| } |
| // Otherwise, we may trigger overlay-wide keyboard shortcuts. |
| e.stopPropagation(); |
| // Otherwise, we open the link twice. |
| e.preventDefault(); |
| (e.target as HTMLElement).click(); |
| } |
| |
| private onLearnMoreClick_() { |
| recordVoiceAction(Action.SUPPORT_LINK_CLICKED); |
| } |
| |
| private onTryAgainClick_(e: Event) { |
| // Otherwise, we close the overlay. |
| e.stopPropagation(); |
| this.start(); |
| recordVoiceAction(Action.TRY_AGAIN_LINK); |
| } |
| |
| private onMicClick_(e: Event) { |
| if (this.state_ !== State.ERROR_RECEIVED || |
| this.error_ !== Error.NO_MATCH) { |
| return; |
| } |
| // Otherwise, we close the overlay. |
| e.stopPropagation(); |
| this.start(); |
| recordVoiceAction(Action.TRY_AGAIN_MIC_BUTTON); |
| } |
| |
| private resetIdleTimer_() { |
| WindowProxy.getInstance().clearTimeout(this.timerId_); |
| this.timerId_ = WindowProxy.getInstance().setTimeout( |
| this.onIdleTimeout_.bind(this), IDLE_TIMEOUT_MS); |
| } |
| |
| private onIdleTimeout_() { |
| if (this.state_ === State.RESULT_FINAL) { |
| // Waiting for query redirect. |
| return; |
| } |
| if (this.finalResult_) { |
| // Query what we recognized so far. |
| this.onFinalResult_(); |
| return; |
| } |
| this.voiceRecognition_.abort(); |
| this.onError_(Error.NO_MATCH); |
| } |
| |
| private resetErrorTimer_(duration: number) { |
| WindowProxy.getInstance().clearTimeout(this.timerId_); |
| this.timerId_ = WindowProxy.getInstance().setTimeout(() => { |
| this.$.dialog.close(); |
| }, duration); |
| } |
| |
| private onAudioStart_() { |
| this.resetIdleTimer_(); |
| this.state_ = State.AUDIO_RECEIVED; |
| } |
| |
| private onSpeechStart_() { |
| this.resetIdleTimer_(); |
| this.state_ = State.SPEECH_RECEIVED; |
| this.animateVolume_(); |
| } |
| |
| private onResult_(e: SpeechRecognitionEvent) { |
| this.resetIdleTimer_(); |
| |
| switch (this.state_) { |
| case State.STARTED: |
| // Network bugginess (the onspeechstart packet was lost). |
| this.onAudioStart_(); |
| this.onSpeechStart_(); |
| break; |
| case State.AUDIO_RECEIVED: |
| // Network bugginess (the onaudiostart packet was lost). |
| this.onSpeechStart_(); |
| break; |
| case State.SPEECH_RECEIVED: |
| case State.RESULT_RECEIVED: |
| // Normal, expected states for processing results. |
| break; |
| default: |
| // Not expecting results in any other states. |
| return; |
| } |
| |
| const results = e.results; |
| if (results.length === 0) { |
| return; |
| } |
| this.state_ = State.RESULT_RECEIVED; |
| this.interimResult_ = ''; |
| this.finalResult_ = ''; |
| |
| const finalResult = results[e.resultIndex]; |
| // Process final results. |
| if (finalResult.isFinal) { |
| this.finalResult_ = finalResult[0].transcript; |
| this.onFinalResult_(); |
| return; |
| } |
| |
| // Process interim results. |
| for (let j = 0; j < results.length; j++) { |
| const result = results[j][0]; |
| if (result.confidence > RECOGNITION_CONFIDENCE_THRESHOLD) { |
| this.finalResult_ += result.transcript; |
| } else { |
| this.interimResult_ += result.transcript; |
| } |
| } |
| |
| // Force-stop long queries. |
| if (this.interimResult_.length > QUERY_LENGTH_LIMIT) { |
| this.onFinalResult_(); |
| } |
| } |
| |
| private onFinalResult_() { |
| if (!this.finalResult_) { |
| this.onError_(Error.NO_MATCH); |
| return; |
| } |
| this.state_ = State.RESULT_FINAL; |
| const searchParams = new URLSearchParams(); |
| searchParams.append('q', this.finalResult_); |
| // Add a parameter to indicate that this request is a voice search. |
| searchParams.append('gs_ivs', '1'); |
| // Build the query URL. |
| const queryUrl = |
| new URL('/search', loadTimeData.getString('googleBaseUrl')); |
| queryUrl.search = searchParams.toString(); |
| recordVoiceAction(Action.QUERY_SUBMITTED); |
| WindowProxy.getInstance().navigate(queryUrl.href); |
| } |
| |
| private onEnd_() { |
| switch (this.state_) { |
| case State.STARTED: |
| this.onError_(Error.AUDIO_CAPTURE); |
| return; |
| case State.AUDIO_RECEIVED: |
| this.onError_(Error.NO_SPEECH); |
| return; |
| case State.SPEECH_RECEIVED: |
| case State.RESULT_RECEIVED: |
| this.onError_(Error.NO_MATCH); |
| return; |
| case State.ERROR_RECEIVED: |
| case State.RESULT_FINAL: |
| return; |
| default: |
| this.onError_(Error.OTHER); |
| return; |
| } |
| } |
| |
| private onError_(error: Error) { |
| chrome.metricsPrivate.recordEnumerationValue( |
| 'NewTabPage.VoiceErrors', error, Object.keys(Error).length); |
| if (error === Error.ABORTED) { |
| // We are in the process of closing voice search. |
| return; |
| } |
| this.error_ = error; |
| this.state_ = State.ERROR_RECEIVED; |
| this.resetErrorTimer_(getErrorTimeout(error)); |
| } |
| |
| private animateVolume_() { |
| this.micVolumeLevel_ = 0; |
| this.micVolumeDuration_ = VOLUME_ANIMATION_DURATION_MIN_MS; |
| if (this.state_ !== State.SPEECH_RECEIVED && |
| this.state_ !== State.RESULT_RECEIVED) { |
| return; |
| } |
| this.micVolumeLevel_ = WindowProxy.getInstance().random(); |
| this.micVolumeDuration_ = Math.round( |
| VOLUME_ANIMATION_DURATION_MIN_MS + |
| WindowProxy.getInstance().random() * |
| VOLUME_ANIMATION_DURATION_RANGE_MS); |
| WindowProxy.getInstance().setTimeout( |
| this.animateVolume_.bind(this), this.micVolumeDuration_); |
| } |
| |
| private getText_(): string { |
| switch (this.state_) { |
| case State.STARTED: |
| return 'waiting'; |
| case State.AUDIO_RECEIVED: |
| case State.SPEECH_RECEIVED: |
| return 'speak'; |
| case State.RESULT_RECEIVED: |
| case State.RESULT_FINAL: |
| return 'result'; |
| case State.ERROR_RECEIVED: |
| return 'error'; |
| default: |
| return 'none'; |
| } |
| } |
| |
| private getErrorText_(): string { |
| switch (this.error_) { |
| case Error.NO_SPEECH: |
| return 'no-speech'; |
| case Error.AUDIO_CAPTURE: |
| return 'audio-capture'; |
| case Error.NETWORK: |
| return 'network'; |
| case Error.NOT_ALLOWED: |
| case Error.SERVICE_NOT_ALLOWED: |
| return 'not-allowed'; |
| case Error.LANGUAGE_NOT_SUPPORTED: |
| return 'language-not-supported'; |
| case Error.NO_MATCH: |
| return 'no-match'; |
| case Error.ABORTED: |
| case Error.OTHER: |
| default: |
| return 'other'; |
| } |
| } |
| |
| private getErrorLink_(): string { |
| switch (this.error_) { |
| case Error.NO_SPEECH: |
| case Error.AUDIO_CAPTURE: |
| return 'learn-more'; |
| case Error.NOT_ALLOWED: |
| case Error.SERVICE_NOT_ALLOWED: |
| return 'details'; |
| case Error.NO_MATCH: |
| return 'try-again'; |
| default: |
| return 'none'; |
| } |
| } |
| |
| private getMicClass_(): string { |
| switch (this.state_) { |
| case State.AUDIO_RECEIVED: |
| return 'listening'; |
| case State.SPEECH_RECEIVED: |
| case State.RESULT_RECEIVED: |
| return 'receiving'; |
| default: |
| return ''; |
| } |
| } |
| } |
| |
| declare global { |
| interface HTMLElementTagNameMap { |
| 'ntp-voice-search-overlay': VoiceSearchOverlayElement; |
| } |
| } |
| |
| customElements.define(VoiceSearchOverlayElement.is, VoiceSearchOverlayElement); |