// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Pico specific implementation of the TtsEngine interface defined in
// tts_engine.h.

#include "pico/pico_tts_engine.h"

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "base/string_number_conversions.h"

#include "log.h"

#define FAILERR(X) \
  if (PICO_OK != (X)) { \
    LOG(ERROR) << "Fail line " << __LINE__; \
    return TTS_FAILURE; \
  } \
  else

using std::string;

namespace speech_synthesis {

const char* PROP_RATE = "rate";
const char* PROP_PITCH = "pitch";
const char* PROP_VOLUME = "volume";

const int PICO_MEM_SIZE = 2500000;
const pico_Char * PICO_VOICE_NAME =
    reinterpret_cast<const pico_Char *>("PicoVoice");

PicoTtsEngine::PicoTtsEngine(const std::string& base_path)
    : base_path_(base_path),
      mem_area_(NULL),
      system_(NULL),
      engine_(NULL),
      ta_resource_(NULL),
      sg_resource_(NULL),
      receiver_(NULL) {
}

PicoTtsEngine::~PicoTtsEngine() {
  Shutdown();
}

// Unloads the Pico engine and any loaded Pico resources, but does not
// shut down.
void PicoTtsEngine::CleanResources(void) {
  if (engine_) {
    pico_disposeEngine(system_, &engine_);
    pico_releaseVoiceDefinition(system_, PICO_VOICE_NAME);
    engine_ = NULL;
  }
  if (ta_resource_) {
    pico_unloadResource(system_, &ta_resource_);
    ta_resource_ = NULL;
  }
  if (sg_resource_) {
    pico_unloadResource(system_, &sg_resource_);
    sg_resource_ = NULL;
  }

  current_voice_index_ = -1;
}

// Initializes the engine for the specified voice.
tts_result PicoTtsEngine::InitVoice(int voice_index) {
  if (voice_index < 0 || voice_index >= GetVoiceCount()) {
    LOG(INFO) << "Voice index out of range: " << voice_index;
    return TTS_FAILURE;
  }
  const PicoTtsVoice * voice = &voices_[voice_index];

  pico_Char ta_resource_name[PICO_MAX_RESOURCE_NAME_SIZE];
  pico_Char sg_resource_name[PICO_MAX_RESOURCE_NAME_SIZE];

  string tafile = base_path_ + voice->ta_lingware;
  string sgfile = base_path_ + voice->sg_lingware;
  const pico_Char *ta_filename =
      reinterpret_cast<const pico_Char *>(tafile.c_str());
  const pico_Char *sg_filename =
      reinterpret_cast<const pico_Char *>(sgfile.c_str());

  FAILERR(pico_loadResource(system_, ta_filename, &ta_resource_));
  FAILERR(pico_loadResource(system_, sg_filename, &sg_resource_));
  FAILERR(pico_getResourceName(system_, ta_resource_,
      reinterpret_cast<char *>(ta_resource_name)));
  FAILERR(pico_getResourceName(system_, sg_resource_,
      reinterpret_cast<char *>(sg_resource_name)));
  FAILERR(pico_createVoiceDefinition(system_, PICO_VOICE_NAME));
  FAILERR(pico_addResourceToVoiceDefinition(
      system_, PICO_VOICE_NAME, ta_resource_name));
  FAILERR(pico_addResourceToVoiceDefinition(
      system_, PICO_VOICE_NAME, sg_resource_name));
  pico_newEngine(system_, PICO_VOICE_NAME, &engine_);
  current_voice_index_ = voice_index;

  return TTS_SUCCESS;
}

// Initialize TTS engine.
tts_result PicoTtsEngine::Init() {
  LOG(INFO) << "Start.";
  LoadVoices(base_path_ + "tts_support.xml");
  mem_area_ = malloc(PICO_MEM_SIZE);
  if (!mem_area_) {
    LOG(ERROR) << "Failed to allocate memory for Pico system";
    return TTS_FAILURE;
  }

  FAILERR(pico_initialize(mem_area_, PICO_MEM_SIZE, &system_));
  // Set the first language in the data file as the default.
  FAILERR(InitVoice(0));
  LOG(INFO) << "Init done.";
  return TTS_SUCCESS;
}

// Shuts down the TTS engine, cleans up resources.
tts_result PicoTtsEngine::Shutdown() {
  CleanResources();
  if (system_) {
    pico_terminate(&system_);
    system_ = NULL;
  }
  if (mem_area_) {
    free(mem_area_);
    mem_area_ = NULL;
  }
  return TTS_SUCCESS;
}

tts_result PicoTtsEngine::Stop() {
  // TODO(fergus): use PICO_RESET_SOFT here instead?
  pico_resetEngine(engine_, PICO_RESET_FULL);
  return TTS_SUCCESS;
}

int PicoTtsEngine::GetVoiceCount() {
  return static_cast<int>(voices_.size());
}

const TtsVoice* PicoTtsEngine::GetVoiceInfo(int voice_index) {
  if (voice_index >= 0 && voice_index < GetVoiceCount()) {
    return &voices_[voice_index];
  } else {
    return NULL;
  }
}

tts_result PicoTtsEngine::SetVoice(int voice_index) {
  if (current_voice_index_ != voice_index) {
    CleanResources();
    return InitVoice(voice_index);
  } else {
    return TTS_SUCCESS;
  }
}

void PicoTtsEngine::SetReceiver(TtsDataReceiver* receiver) {
  receiver_ = receiver;
}

// Sets the property for the engine.
tts_result PicoTtsEngine::SetProperty(const char *property, const char *value) {
  if (properties_.count(property) > 0) {
    properties_[property] = value;
    return TTS_SUCCESS;
  } else {
    return TTS_PROPERTY_UNSUPPORTED;
  }
}

tts_result PicoTtsEngine::SetProperty(const char *property, float value) {
  SetProperty(property, base::IntToString(value).c_str());
  return TTS_SUCCESS;
}

tts_result PicoTtsEngine::SetRate(float rate) {
  if (rate < 0 || rate > 5) {
    return TTS_VALUE_INVALID;
  }
  rate = rate < 0.2 ? 0.2 : rate;
  return SetProperty(PROP_RATE, rate * 100);
}

tts_result PicoTtsEngine::SetPitch(float pitch) {
  if (pitch < 0 || pitch > 2) {
    return TTS_VALUE_INVALID;
  }
  pitch = pitch < 0.5 ? 0.5 : pitch;
  return SetProperty(PROP_PITCH, pitch * 100);
}

tts_result PicoTtsEngine::SetVolume(float volume) {
  if (volume < 0 || volume > 5) {
    return TTS_VALUE_INVALID;
  }
  return SetProperty(PROP_VOLUME, volume * 100);
}

tts_result PicoTtsEngine::GetProperty(const char *property,
    const char **value) {
  std::map<string, string>::const_iterator iter =
      properties_.find(property);
  if (iter != properties_.end()) {
    if (value != NULL) {
      (*value) = iter->second.c_str();
    }
    return TTS_SUCCESS;
  }
  return TTS_PROPERTY_UNSUPPORTED;
}

int PicoTtsEngine::GetSampleRate() {
  return voices_[current_voice_index_].sample_rate;
}

tts_result PicoTtsEngine::SynthesizeText(const char* text,
                                         int16_t* audio_buffer,
                                         int audio_buffer_size,
                                         int* out_total_samples) {
  if (out_total_samples != NULL) {
    *out_total_samples = 0;
  }

  string synth_text = "";
  AppendProperties(text, &synth_text);

  int text_pos = 0;
  const pico_Char* text_ptr =
      reinterpret_cast<const pico_Char*>(synth_text.c_str());
  int text_buffer_len = synth_text.size() + 1;
  while (text_pos < text_buffer_len) {
    pico_Int16 text_bytes_consumed = 0;
    if (PICO_OK != pico_putTextUtf8(
            engine_, text_ptr, text_buffer_len - text_pos,
            &text_bytes_consumed)) {
      return TTS_FAILURE;
    }

    int out_samples;
    tts_result result = GetAudioFromTts(
        audio_buffer, audio_buffer_size, &out_samples);
    if (out_total_samples != NULL) {
      *out_total_samples += out_samples;
    }

    if (result != TTS_SUCCESS) {
      return result;
    }

    text_pos += text_bytes_consumed;
    text_ptr += text_bytes_consumed;
  }
  return TTS_SUCCESS;
}

// This method appends the SSML tags for the supported properties if their
// values are different from the default values.
void PicoTtsEngine::AppendProperties(const char *text, string *synth_text) {
  int rate_level_ = floor(atof(properties_[PROP_RATE].c_str()));
  int pitch_level_ = floor(atof(properties_[PROP_PITCH].c_str()));
  int volume_level_ = floor(atof(properties_[PROP_VOLUME].c_str()));

  if (rate_level_ < PICO_MIN_RATE || rate_level_ > PICO_MAX_RATE) {
    LOG(WARNING) << "Rate is outside the allowed range.";
  }
  if (pitch_level_ < PICO_MIN_PITCH || pitch_level_ > PICO_MAX_PITCH) {
    LOG(WARNING) << "Pitch is outside the allowed range.";
  }
  if (volume_level_ < PICO_MIN_VOL || volume_level_ > PICO_MAX_VOL) {
    LOG(WARNING) << "Volume is outside the allowed range.";
  }

  *synth_text = "";

  // Append opening tags
  if (rate_level_ != PICO_DEF_RATE) {
    *synth_text += "<speed level='" + properties_[PROP_RATE] + "'>";
  }
  if (pitch_level_ != PICO_DEF_PITCH) {
    *synth_text += "<pitch level='" + properties_[PROP_PITCH] + "'>";
  }
  if (volume_level_ != PICO_DEF_VOL) {
    *synth_text += "<volume level='" + properties_[PROP_VOLUME] + "'>";
  }
  // Append text
  *synth_text += text;
  // Append closing tags in the reverse order
  if (volume_level_ != PICO_DEF_VOL) {
    *synth_text += "</volume>";
  }
  if (pitch_level_ != PICO_DEF_PITCH) {
    *synth_text += "</pitch>";
  }
  if (rate_level_ != PICO_DEF_RATE) {
    *synth_text += "</speed>";
  }
}

// max_iterations_without_apparent_progress is a hack to prevent infinite loops.
// This needs to be more than 200 to pass simple tests such as hello world.
// TODO(fergus): we should fix the underlying bug <http://b/2501315> in the
// //third_party/pico sources, and then delete all the code relating to
// max_iterations_without_apparent_progress.
int PicoTtsEngine::max_iterations_without_apparent_progress = 10000;

tts_result PicoTtsEngine::GetAudioFromTts(int16_t* audio_buffer,
                                          int audio_buffer_size,
                                          int* out_total_samples) {
  int total_samples_output = 0;
  int status;
  tts_callback_status callback_status = TTS_CALLBACK_CONTINUE;
  pico_Int16 data_type = PICO_DATA_PCM_16BIT;
  uint32_t sample_rate = voices_[current_voice_index_].sample_rate;
  int iterations_without_apparent_progress = 0;
  while (1) {
    pico_Int16 bytes_received = 0;
    data_type = 0;
    int8_t* buffer_ptr = reinterpret_cast<int8_t *>(audio_buffer);
    pico_Int16 buffer_size_bytes = audio_buffer_size * sizeof(pico_Int16);

    status = pico_getData(engine_, buffer_ptr, buffer_size_bytes,
        &bytes_received, &data_type);

    if (status != PICO_STEP_ERROR && bytes_received > 0) {
      if (data_type != PICO_DATA_PCM_16BIT) {
        break;
      }

      int samples_output = bytes_received / sizeof(const pico_Int16);
      total_samples_output += samples_output;

      // make the callback here...note that it's important to call this
      // method even if no data was received.
      if (receiver_) {
        callback_status =
            receiver_->Receive(sample_rate, 1, audio_buffer,
                               samples_output, TTS_SYNTH_PENDING);
        if (callback_status != TTS_CALLBACK_CONTINUE) {
          break;
        }
      }
    }
    if (status != PICO_STEP_BUSY) {
      break;
    }
    if (bytes_received == 0) {
      iterations_without_apparent_progress++;
      if (iterations_without_apparent_progress >
          max_iterations_without_apparent_progress) {
        break;
      }
    } else {
      iterations_without_apparent_progress = 0;
    }
  };

  if (out_total_samples != NULL) {
    *out_total_samples = total_samples_output;
  }

  if (status == PICO_STEP_ERROR ||
      callback_status == TTS_CALLBACK_ERROR ||
      data_type != PICO_DATA_PCM_16BIT ||
      iterations_without_apparent_progress >
      max_iterations_without_apparent_progress) {
    return TTS_FAILURE;
  }

  if (callback_status != TTS_CALLBACK_HALT) {
    // Send a zero-length packet to tell the destination receiver that
    // we're done.
    callback_status = receiver_->Receive(sample_rate, 1, NULL, 0,
                                         TTS_SYNTH_DONE);
    if (callback_status == TTS_CALLBACK_ERROR) {
      return TTS_FAILURE;
    }
  }

  return TTS_SUCCESS;
}

}  // namespace speech_synthesis
