blob: 03d836114b34613e1796a8b81138d0cafde2bb5b [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/speech/speech_recognizer_fsm.h"
#include "base/notreached.h"
#include "components/speech/audio_buffer.h"
#include "media/mojo/mojom/audio_data.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace speech {
SpeechRecognizerFsm::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
: event(event_value),
engine_error(media::mojom::SpeechRecognitionErrorCode::kNone,
media::mojom::SpeechAudioErrorDetails::kNone) {}
SpeechRecognizerFsm::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other)
: event(other.event),
audio_data(other.audio_data ? other.audio_data->Clone() : nullptr),
audio_chunk(other.audio_chunk),
recognition_context(other.recognition_context),
engine_error(other.engine_error) {
engine_results = mojo::Clone(other.engine_results);
}
SpeechRecognizerFsm::FSMEventArgs::~FSMEventArgs() = default;
SpeechRecognizerFsm::FSMState
SpeechRecognizerFsm::ExecuteTransitionAndGetNextState(
const FSMEventArgs& event_args) {
const FSMEvent event = event_args.event;
switch (state_) {
case STATE_IDLE:
switch (event) {
case EVENT_ABORT:
return AbortSilently(event_args);
case EVENT_PREPARE:
return PrepareRecognition(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
return AbortSilently(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_AUDIO_DATA: // Corner cases related to queued messages
case EVENT_ENGINE_RESULT: // being lately dispatched.
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return DoNothing(event_args);
}
break;
case STATE_PREPARING:
switch (event) {
case EVENT_ABORT:
return AbortSilently(event_args);
case EVENT_PREPARE:
return NotFeasible(event_args);
case EVENT_START:
return StartRecording(event_args);
case EVENT_STOP_CAPTURE:
return AbortSilently(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_AUDIO_DATA: // Corner cases related to queued messages
case EVENT_ENGINE_RESULT: // being lately dispatched.
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return DoNothing(event_args);
}
break;
case STATE_STARTING:
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
case EVENT_PREPARE:
return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
return AbortSilently(event_args);
case EVENT_AUDIO_DATA:
return StartRecognitionEngine(event_args);
case EVENT_ENGINE_RESULT:
if (event_args.audio_data) {
return ProcessIntermediateResult(event_args);
}
return NotFeasible(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return AbortWithError(event_args);
}
break;
case STATE_ESTIMATING_ENVIRONMENT:
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
case EVENT_PREPARE:
return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
return StopCaptureAndWaitForResult(event_args);
case EVENT_AUDIO_DATA:
return WaitEnvironmentEstimationCompletion(event_args);
case EVENT_ENGINE_RESULT:
return ProcessIntermediateResult(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return AbortWithError(event_args);
}
break;
case STATE_WAITING_FOR_SPEECH:
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
case EVENT_PREPARE:
return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
return StopCaptureAndWaitForResult(event_args);
case EVENT_AUDIO_DATA:
return DetectUserSpeechOrTimeout(event_args);
case EVENT_ENGINE_RESULT:
return ProcessIntermediateResult(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return AbortWithError(event_args);
}
break;
case STATE_RECOGNIZING:
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
case EVENT_PREPARE:
return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
return StopCaptureAndWaitForResult(event_args);
case EVENT_AUDIO_DATA:
return DetectEndOfSpeech(event_args);
case EVENT_ENGINE_RESULT:
return ProcessIntermediateResult(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return AbortWithError(event_args);
}
break;
case STATE_WAITING_FINAL_RESULT:
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
case EVENT_PREPARE:
return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
case EVENT_AUDIO_DATA:
return DoNothing(event_args);
case EVENT_ENGINE_RESULT:
return ProcessFinalResult(event_args);
case EVENT_UPDATE_RECOGNITION_CONTEXT:
return UpdateRecognitionContext(event_args);
case EVENT_ENGINE_ERROR:
case EVENT_AUDIO_ERROR:
return AbortWithError(event_args);
}
break;
case STATE_ENDED:
return DoNothing(event_args);
}
NOTREACHED();
}
} // namespace speech