third_party/blink/renderer/modules/speech/speech_recognition.cc - chromium/src - Git at Google

 /*
  * Copyright (C) 2012 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *  * Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *  * Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "third_party/blink/renderer/modules/speech/speech_recognition.h"

 #include <algorithm>
 #include "build/build_config.h"
 #include "third_party/blink/renderer/core/dom/document.h"
 #include "third_party/blink/renderer/core/frame/local_frame.h"
 #include "third_party/blink/renderer/modules/speech/speech_recognition_controller.h"
 #include "third_party/blink/renderer/modules/speech/speech_recognition_error.h"
 #include "third_party/blink/renderer/modules/speech/speech_recognition_event.h"
 #include "third_party/blink/renderer/platform/bindings/exception_state.h"

 namespace blink {

 SpeechRecognition* SpeechRecognition::Create(ExecutionContext* context) {
   Document& document = To<Document>(*context);
   return MakeGarbageCollected<SpeechRecognition>(document.GetFrame(), context);
 }

 void SpeechRecognition::start(ExceptionState& exception_state) {
   if (!controller_)
     return;

   if (started_) {
     exception_state.ThrowDOMException(DOMExceptionCode::kInvalidStateError,
                                       "recognition has already started.");
     return;
   }

   final_results_.clear();

   mojom::blink::SpeechRecognitionSessionClientPtrInfo session_client;
   binding_.Bind(mojo::MakeRequest(&session_client),
                 GetExecutionContext()->GetInterfaceInvalidator());
   binding_.set_connection_error_handler(WTF::Bind(
       &SpeechRecognition::OnConnectionError, WrapWeakPersistent(this)));

   mojom::blink::SpeechRecognitionSessionRequest session_request =
       MakeRequest(&session_, GetExecutionContext()->GetInterfaceInvalidator());

   controller_->Start(std::move(session_request), std::move(session_client),
                      *grammars_, lang_, continuous_, interim_results_,
                      max_alternatives_);
   started_ = true;
 }

 void SpeechRecognition::stopFunction() {
   if (!controller_)
     return;

   if (started_ && !stopping_) {
     stopping_ = true;
     session_->StopCapture();
   }
 }

 void SpeechRecognition::abort() {
   if (!controller_)
     return;

   if (started_ && !stopping_) {
     stopping_ = true;
     session_->Abort();
   }
 }

 void SpeechRecognition::ResultRetrieved(
     WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
   auto* it = std::stable_partition(
       results.begin(), results.end(),
       [](const auto& result) { return !result->is_provisional; });
   wtf_size_t provisional_count = static_cast<wtf_size_t>(results.end() - it);

   // Add the new results to the previous final results.
   HeapVector<Member<SpeechRecognitionResult>> aggregated_results =
       std::move(final_results_);
   aggregated_results.ReserveCapacity(aggregated_results.size() +
                                      results.size());

   for (const auto& result : results) {
     HeapVector<Member<SpeechRecognitionAlternative>> alternatives;
     alternatives.ReserveInitialCapacity(result->hypotheses.size());
     for (const auto& hypothesis : result->hypotheses) {
       alternatives.push_back(SpeechRecognitionAlternative::Create(
           hypothesis->utterance, hypothesis->confidence));
     }
     aggregated_results.push_back(SpeechRecognitionResult::Create(
         std::move(alternatives), !result->is_provisional));
   }

   // |aggregated_results| now contains the following (in the given order):
   //
   // (1) previous final results from |final_results_|
   // (2) new final results from |results|
   // (3) new provisional results from |results|

   // |final_results_| = (1) + (2).
   HeapVector<Member<SpeechRecognitionResult>> new_final_results;
   new_final_results.ReserveInitialCapacity(aggregated_results.size() -
                                            provisional_count);
   new_final_results.AppendRange(aggregated_results.begin(),
                                 aggregated_results.end() - provisional_count);
   final_results_ = std::move(new_final_results);

   // We dispatch an event with (1) + (2) + (3).
   DispatchEvent(*SpeechRecognitionEvent::CreateResult(
       aggregated_results.size() - results.size(),
       std::move(aggregated_results)));
 }

 void SpeechRecognition::ErrorOccurred(
     mojom::blink::SpeechRecognitionErrorPtr error) {
   if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
     DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
   } else {
     // TODO(primiano): message?
     DispatchEvent(*SpeechRecognitionError::Create(error->code, String()));
   }
 }

 void SpeechRecognition::Started() {
   DispatchEvent(*Event::Create(event_type_names::kStart));
 }

 void SpeechRecognition::AudioStarted() {
   DispatchEvent(*Event::Create(event_type_names::kAudiostart));
 }

 void SpeechRecognition::SoundStarted() {
   DispatchEvent(*Event::Create(event_type_names::kSoundstart));
   DispatchEvent(*Event::Create(event_type_names::kSpeechstart));
 }

 void SpeechRecognition::SoundEnded() {
   DispatchEvent(*Event::Create(event_type_names::kSpeechend));
   DispatchEvent(*Event::Create(event_type_names::kSoundend));
 }

 void SpeechRecognition::AudioEnded() {
   DispatchEvent(*Event::Create(event_type_names::kAudioend));
 }

 void SpeechRecognition::Ended() {
   started_ = false;
   stopping_ = false;
   session_.reset();
   binding_.Close();
   DispatchEvent(*Event::Create(event_type_names::kEnd));
 }

 const AtomicString& SpeechRecognition::InterfaceName() const {
   return event_target_names::kSpeechRecognition;
 }

 ExecutionContext* SpeechRecognition::GetExecutionContext() const {
   return ContextLifecycleObserver::GetExecutionContext();
 }

 void SpeechRecognition::ContextDestroyed(ExecutionContext*) {
   controller_ = nullptr;
 }

 bool SpeechRecognition::HasPendingActivity() const {
   return started_;
 }

 void SpeechRecognition::PageVisibilityChanged() {
 #if defined(OS_ANDROID)
   if (!GetPage()->IsPageVisible())
     abort();
 #endif
 }

 void SpeechRecognition::OnConnectionError() {
   ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
       mojom::blink::SpeechRecognitionErrorCode::kNetwork,
       mojom::blink::SpeechAudioErrorDetails::kNone));
   Ended();
 }

 SpeechRecognition::SpeechRecognition(LocalFrame* frame,
                                      ExecutionContext* context)
     : ContextLifecycleObserver(context),
       PageVisibilityObserver(frame ? frame->GetPage() : nullptr),
       grammars_(SpeechGrammarList::Create()),  // FIXME: The spec is not clear
                                                // on the default value for the
                                                // grammars attribute.
       continuous_(false),
       interim_results_(false),
       max_alternatives_(1),
       controller_(SpeechRecognitionController::From(frame)),
       started_(false),
       stopping_(false),
       binding_(this) {}

 SpeechRecognition::~SpeechRecognition() = default;

 void SpeechRecognition::Trace(blink::Visitor* visitor) {
   visitor->Trace(grammars_);
   visitor->Trace(controller_);
   visitor->Trace(final_results_);
   EventTargetWithInlineData::Trace(visitor);
   ContextLifecycleObserver::Trace(visitor);
   PageVisibilityObserver::Trace(visitor);
 }

 }  // namespace blink
	/*
	* Copyright (C) 2012 Google Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "third_party/blink/renderer/modules/speech/speech_recognition.h"

	#include <algorithm>
	#include "build/build_config.h"
	#include "third_party/blink/renderer/core/dom/document.h"
	#include "third_party/blink/renderer/core/frame/local_frame.h"
	#include "third_party/blink/renderer/modules/speech/speech_recognition_controller.h"
	#include "third_party/blink/renderer/modules/speech/speech_recognition_error.h"
	#include "third_party/blink/renderer/modules/speech/speech_recognition_event.h"
	#include "third_party/blink/renderer/platform/bindings/exception_state.h"

	namespace blink {

	SpeechRecognition* SpeechRecognition::Create(ExecutionContext* context) {
	Document& document = To<Document>(*context);
	return MakeGarbageCollected<SpeechRecognition>(document.GetFrame(), context);
	}

	void SpeechRecognition::start(ExceptionState& exception_state) {
	if (!controller_)
	return;

	if (started_) {
	exception_state.ThrowDOMException(DOMExceptionCode::kInvalidStateError,
	"recognition has already started.");
	return;
	}

	final_results_.clear();

	mojom::blink::SpeechRecognitionSessionClientPtrInfo session_client;
	binding_.Bind(mojo::MakeRequest(&session_client),
	GetExecutionContext()->GetInterfaceInvalidator());
	binding_.set_connection_error_handler(WTF::Bind(
	&SpeechRecognition::OnConnectionError, WrapWeakPersistent(this)));

	mojom::blink::SpeechRecognitionSessionRequest session_request =
	MakeRequest(&session_, GetExecutionContext()->GetInterfaceInvalidator());

	controller_->Start(std::move(session_request), std::move(session_client),
	*grammars_, lang_, continuous_, interim_results_,
	max_alternatives_);
	started_ = true;
	}

	void SpeechRecognition::stopFunction() {
	if (!controller_)
	return;

	if (started_ && !stopping_) {
	stopping_ = true;
	session_->StopCapture();
	}
	}

	void SpeechRecognition::abort() {
	if (!controller_)
	return;

	if (started_ && !stopping_) {
	stopping_ = true;
	session_->Abort();
	}
	}

	void SpeechRecognition::ResultRetrieved(
	WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
	auto* it = std::stable_partition(
	results.begin(), results.end(),
	[](const auto& result) { return !result->is_provisional; });
	wtf_size_t provisional_count = static_cast<wtf_size_t>(results.end() - it);

	// Add the new results to the previous final results.
	HeapVector<Member<SpeechRecognitionResult>> aggregated_results =
	std::move(final_results_);
	aggregated_results.ReserveCapacity(aggregated_results.size() +
	results.size());

	for (const auto& result : results) {
	HeapVector<Member<SpeechRecognitionAlternative>> alternatives;
	alternatives.ReserveInitialCapacity(result->hypotheses.size());
	for (const auto& hypothesis : result->hypotheses) {
	alternatives.push_back(SpeechRecognitionAlternative::Create(
	hypothesis->utterance, hypothesis->confidence));
	}
	aggregated_results.push_back(SpeechRecognitionResult::Create(
	std::move(alternatives), !result->is_provisional));
	}

	// \|aggregated_results\| now contains the following (in the given order):
	//
	// (1) previous final results from \|final_results_\|
	// (2) new final results from \|results\|
	// (3) new provisional results from \|results\|

	// \|final_results_\| = (1) + (2).
	HeapVector<Member<SpeechRecognitionResult>> new_final_results;
	new_final_results.ReserveInitialCapacity(aggregated_results.size() -
	provisional_count);
	new_final_results.AppendRange(aggregated_results.begin(),
	aggregated_results.end() - provisional_count);
	final_results_ = std::move(new_final_results);

	// We dispatch an event with (1) + (2) + (3).
	DispatchEvent(*SpeechRecognitionEvent::CreateResult(
	aggregated_results.size() - results.size(),
	std::move(aggregated_results)));
	}

	void SpeechRecognition::ErrorOccurred(
	mojom::blink::SpeechRecognitionErrorPtr error) {
	if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
	DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
	} else {
	// TODO(primiano): message?
	DispatchEvent(*SpeechRecognitionError::Create(error->code, String()));
	}
	}

	void SpeechRecognition::Started() {
	DispatchEvent(*Event::Create(event_type_names::kStart));
	}

	void SpeechRecognition::AudioStarted() {
	DispatchEvent(*Event::Create(event_type_names::kAudiostart));
	}

	void SpeechRecognition::SoundStarted() {
	DispatchEvent(*Event::Create(event_type_names::kSoundstart));
	DispatchEvent(*Event::Create(event_type_names::kSpeechstart));
	}

	void SpeechRecognition::SoundEnded() {
	DispatchEvent(*Event::Create(event_type_names::kSpeechend));
	DispatchEvent(*Event::Create(event_type_names::kSoundend));
	}

	void SpeechRecognition::AudioEnded() {
	DispatchEvent(*Event::Create(event_type_names::kAudioend));
	}

	void SpeechRecognition::Ended() {
	started_ = false;
	stopping_ = false;
	session_.reset();
	binding_.Close();
	DispatchEvent(*Event::Create(event_type_names::kEnd));
	}

	const AtomicString& SpeechRecognition::InterfaceName() const {
	return event_target_names::kSpeechRecognition;
	}

	ExecutionContext* SpeechRecognition::GetExecutionContext() const {
	return ContextLifecycleObserver::GetExecutionContext();
	}

	void SpeechRecognition::ContextDestroyed(ExecutionContext*) {
	controller_ = nullptr;
	}

	bool SpeechRecognition::HasPendingActivity() const {
	return started_;
	}

	void SpeechRecognition::PageVisibilityChanged() {
	#if defined(OS_ANDROID)
	if (!GetPage()->IsPageVisible())
	abort();
	#endif
	}

	void SpeechRecognition::OnConnectionError() {
	ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
	mojom::blink::SpeechRecognitionErrorCode::kNetwork,
	mojom::blink::SpeechAudioErrorDetails::kNone));
	Ended();
	}

	SpeechRecognition::SpeechRecognition(LocalFrame* frame,
	ExecutionContext* context)
	: ContextLifecycleObserver(context),
	PageVisibilityObserver(frame ? frame->GetPage() : nullptr),
	grammars_(SpeechGrammarList::Create()), // FIXME: The spec is not clear
	// on the default value for the
	// grammars attribute.
	continuous_(false),
	interim_results_(false),
	max_alternatives_(1),
	controller_(SpeechRecognitionController::From(frame)),
	started_(false),
	stopping_(false),
	binding_(this) {}

	SpeechRecognition::~SpeechRecognition() = default;

	void SpeechRecognition::Trace(blink::Visitor* visitor) {
	visitor->Trace(grammars_);
	visitor->Trace(controller_);
	visitor->Trace(final_results_);
	EventTargetWithInlineData::Trace(visitor);
	ContextLifecycleObserver::Trace(visitor);
	PageVisibilityObserver::Trace(visitor);
	}

	} // namespace blink