blob: dd5df7a0314a3d57c5c4f368ca6259349799202c [file] [log] [blame] [edit]
/*
* pico.c - Speech Dispatcher SVOX pico output module
*
* A SVOX pico output module
*
* Copyright (C) 2010 Brailcom, o.p.s.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1, or (at your option)
* any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <string.h>
#include <glib.h>
#include <picoapi.h>
#include "spd_audio.h"
#include <speechd_types.h>
#include "module_utils.h"
#define MODULE_NAME "pico"
#define MODULE_VERSION "0.2"
DECLARE_DEBUG();
#define MIN_OUTBUF_SIZE (1600)
#define MAX_OUTBUF_SIZE (3200)
#define PICO_MEM_SIZE (10000000)
#define PICO_VOICE_SPEED_MIN (20)
#define PICO_VOICE_SPEED_MAX (500)
#define PICO_VOICE_SPEED_DEFAULT (100)
#define PICO_VOICE_PITCH_MIN (50)
#define PICO_VOICE_PITCH_MAX (200)
#define PICO_VOICE_PITCH_DEFAULT (100)
#define PICO_VOICE_VOLUME_MIN (0)
#define PICO_VOICE_VOLUME_MAX (500)
#define PICO_VOICE_VOLUME_DEFAULT (100)
static pico_System picoSystem;
static pico_Resource picoTaResource;
static pico_Resource picoSgResource;
static pico_Engine picoEngine;
static pico_Char *picoInp;
static const char *PICO_LINGWARE_PATH = "/usr/share/pico/lang/";
static const int PICO_SAMPLE_RATE = 16000;
static const char *picoInternalTaLingware[] = {
"en-US_ta.bin",
"en-GB_ta.bin",
"de-DE_ta.bin",
"es-ES_ta.bin",
"fr-FR_ta.bin",
"it-IT_ta.bin"
};
static const char *picoInternalSgLingware[] = {
"en-US_lh0_sg.bin",
"en-GB_kh0_sg.bin",
"de-DE_gl0_sg.bin",
"es-ES_zl0_sg.bin",
"fr-FR_nk0_sg.bin",
"it-IT_cm0_sg.bin"
};
static const SPDVoice pico_voices[] = {
{"samantha", "en-US", NULL},
{"serena", "en-GB", NULL},
{"sabrina", "de-DE", NULL},
{"isabel", "es-ES", NULL},
{"virginie", "fr-FR", NULL},
{"silvia", "it-IT", NULL}
};
static const SPDVoice *pico_voices_list[] = {
&pico_voices[0],
&pico_voices[1],
&pico_voices[2],
&pico_voices[3],
&pico_voices[4],
&pico_voices[5],
NULL
};
enum states { STATE_IDLE, STATE_PLAY, STATE_PAUSE, STATE_STOP, STATE_CLOSE };
static enum states pico_state;
/* Module configuration options */
MOD_OPTION_1_STR(PicoLingwarePath)
static int pico_set_rate(signed int value)
{
int speed;
if (value < 0)
speed = PICO_VOICE_SPEED_MIN + (value - (-100))
* (PICO_VOICE_SPEED_DEFAULT - PICO_VOICE_SPEED_MIN)
/ (0 - (-100));
else
speed = PICO_VOICE_SPEED_DEFAULT + (value - 0)
* (PICO_VOICE_SPEED_MAX - PICO_VOICE_SPEED_DEFAULT)
/ (100 - 0);
return speed;
}
static int pico_set_volume(signed int value)
{
int volume;
volume = PICO_VOICE_VOLUME_MIN + (value - (-100))
* (PICO_VOICE_VOLUME_DEFAULT - PICO_VOICE_VOLUME_MIN)
/ (100 - (-100));
return volume;
}
static int pico_set_pitch(signed int value)
{
int pitch;
if (value < 0)
pitch = PICO_VOICE_PITCH_MIN + (value - (-100))
* (PICO_VOICE_PITCH_DEFAULT - PICO_VOICE_PITCH_MIN)
/ (0 - (-100));
else
pitch = PICO_VOICE_PITCH_DEFAULT + (value - 0)
* (PICO_VOICE_PITCH_MAX - PICO_VOICE_PITCH_DEFAULT)
/ (100 - 0);
return pitch;
}
static int pico_process_tts(void)
{
pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
pico_Int16 bytes_stored;
int ret, getstatus;
short outbuf[MAX_OUTBUF_SIZE];
pico_Retstring outMessage;
AudioTrack track;
#if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
AudioFormat format = SPD_AUDIO_BE;
#else
AudioFormat format = SPD_AUDIO_LE;
#endif
pico_Char *buf = picoInp;
text_remaining = strlen((const char *)buf) + 1;
DBG(MODULE_NAME ": Text: %s\n", picoInp);
/* synthesis loop */
while (text_remaining) {
/* Process server events in case we were told to stop in between */
module_process(STDIN_FILENO, 0);
/* Feed the text into the engine. */
if ((ret = pico_putTextUtf8(picoEngine, buf, text_remaining,
&bytes_sent))) {
pico_getSystemStatusMessage(picoSystem, ret,
outMessage);
DBG(MODULE_NAME ": Cannot put Text (%i): %s\n", ret,
outMessage);
return -1;
}
DBG(MODULE_NAME ": Sent %d bytes\n", bytes_sent);
text_remaining -= bytes_sent;
buf += bytes_sent;
bytes_stored = 0;
do {
/* Retrieve the samples and add them to the buffer.
SVOX pico TTS sample rate is 16K */
getstatus = pico_getData(picoEngine,
(void *)outbuf + bytes_stored,
MAX_OUTBUF_SIZE - bytes_stored,
&bytes_recv,
&out_data_type);
if ((getstatus != PICO_STEP_BUSY)
&& (getstatus != PICO_STEP_IDLE)) {
pico_getSystemStatusMessage(picoSystem,
getstatus,
outMessage);
DBG(MODULE_NAME ": Cannot get Data (%i): %s\n",
getstatus, outMessage);
return -1;
}
bytes_stored += bytes_recv;
if (bytes_stored >= MIN_OUTBUF_SIZE
|| PICO_STEP_BUSY != getstatus) {
track.num_samples = bytes_stored / 2;
track.samples = outbuf;
track.num_channels = 1;
track.sample_rate = PICO_SAMPLE_RATE;
track.bits = 16;
DBG(MODULE_NAME
": Sending %i samples to audio.",
track.num_samples);
module_tts_output_server(&track, format);
bytes_stored = 0;
}
if (pico_state != STATE_PLAY) {
text_remaining = 0;
break;
}
} while (PICO_STEP_BUSY == getstatus);
}
g_free(picoInp);
picoInp = NULL;
return 0;
}
/* Public functions */
int module_load(void)
{
INIT_SETTINGS_TABLES();
MOD_OPTION_1_INT_REG(Debug, 0);
MOD_OPTION_1_STR_REG(PicoLingwarePath, PICO_LINGWARE_PATH);
return 0;
}
int pico_init_voice(int voice_index)
{
int ret;
pico_Retstring outMessage;
pico_Char picoTaFileName[PICO_MAX_DATAPATH_NAME_SIZE +
PICO_MAX_FILE_NAME_SIZE];
pico_Char picoSgFileName[PICO_MAX_DATAPATH_NAME_SIZE +
PICO_MAX_FILE_NAME_SIZE];
pico_Retstring picoTaResourceName;
pico_Retstring picoSgResourceName;
/* Load the text analysis Lingware resource file. */
strcpy((char *)picoTaFileName, PicoLingwarePath);
strcat((char *)picoTaFileName,
(const char *)picoInternalTaLingware[voice_index]);
if ((ret =
pico_loadResource(picoSystem, picoTaFileName, &picoTaResource))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot load TA Lingware resource file (%i): %s\n", ret,
outMessage);
return -1;
}
/* Load the signal generation Lingware resource file. */
strcpy((char *)picoSgFileName, PicoLingwarePath);
strcat((char *)picoSgFileName,
(const char *)picoInternalSgLingware[voice_index]);
if ((ret =
pico_loadResource(picoSystem, picoSgFileName, &picoSgResource))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot load SG Lingware resource file (%i): %s\n", ret,
outMessage);
return -1;
}
/* Get the text analysis resource name. */
if ((ret = pico_getResourceName(picoSystem, picoTaResource,
picoTaResourceName))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot get TA resource name (%i): %s\n", ret, outMessage);
return -1;
}
/* Get the signal generation resource name. */
if ((ret = pico_getResourceName(picoSystem, picoSgResource,
picoSgResourceName))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot get SG resource name (%i): %s\n", ret, outMessage);
return -1;
}
/* Create a voice definition. */
if ((ret = pico_createVoiceDefinition(picoSystem, (const pico_Char *)
pico_voices[voice_index].name))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot create voice definition (%i): %s\n", ret,
outMessage);
return -1;
}
/* Add the text analysis resource to the voice. */
if ((ret = pico_addResourceToVoiceDefinition(picoSystem,
(const pico_Char *)
pico_voices
[voice_index].name,
(pico_Char *)
picoTaResourceName))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot add TA resource to the voice (%i): %s\n",
ret, outMessage);
return -1;
}
/* Add the signal generation resource to the voice. */
if ((ret = pico_addResourceToVoiceDefinition(picoSystem,
(const pico_Char *)
pico_voices
[voice_index].name,
(pico_Char *)
picoSgResourceName))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot add SG resource to the voice (%i): %s\n",
ret, outMessage);
return -1;
}
return 0;
}
int module_init(char **status_info)
{
int ret, i;
pico_Retstring outMessage;
void *pmem;
module_audio_set_server();
pmem = g_malloc(PICO_MEM_SIZE);
if ((ret = pico_initialize(pmem, PICO_MEM_SIZE, &picoSystem))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
*status_info = g_strdup_printf(MODULE_NAME
": Cannot initialize (%i): %s\n",
ret, outMessage);
g_free(pmem);
return -1;
}
/* load resource for all language, probably need only one */
for (i = 0; i < sizeof(pico_voices) / sizeof(SPDVoice); i++) {
if (0 != pico_init_voice(i)) {
g_free(pmem);
*status_info = g_strdup_printf(MODULE_NAME
": fail init voice (%s)\n",
pico_voices[i].name);
return -1;
}
}
/* Create a new Pico engine, english default */
if ((ret = pico_newEngine(picoSystem,
(const pico_Char *)pico_voices[0].name,
&picoEngine))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
*status_info = g_strdup_printf(MODULE_NAME
": Cannot create a new pico engine (%i): %s\n",
ret, outMessage);
return -1;
}
*status_info = g_strdup(MODULE_NAME ": Initialized successfully.");
pico_state = STATE_IDLE;
return 0;
}
SPDVoice **module_list_voices(void)
{
return (SPDVoice **)pico_voices_list;
}
int pico_set_synthesis_voice(char *voice_name)
{
int ret;
pico_Retstring outMessage;
DBG(MODULE_NAME ": setting voice %s", voice_name);
if (picoEngine && (ret = pico_disposeEngine(picoSystem, &picoEngine))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot dispose pico engine (%i): %s\n", ret, outMessage);
return 0;
}
/* Create a new Pico engine */
if ((ret = pico_newEngine(picoSystem, (const pico_Char *)voice_name,
&picoEngine))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot create a new pico engine (%i): %s\n", ret,
outMessage);
return 0;
}
return 1;
}
static void pico_set_language(char *lang)
{
int i;
DBG(MODULE_NAME ": setting language %s", lang);
/* get voice name based on language */
for (i = 0; i < sizeof(pico_voices) / sizeof(SPDVoice); i++) {
if (!strcasecmp(pico_voices[i].language, lang)) {
if (pico_set_synthesis_voice(pico_voices[i].name))
return;
}
}
/* get voice name based on main part of language */
for (i = 0; i < sizeof(pico_voices) / sizeof(SPDVoice); i++) {
if (!strncasecmp(pico_voices[i].language, lang, 2)) {
if (pico_set_synthesis_voice(pico_voices[i].name))
return;
}
}
/* Try to fallback to english */
pico_set_synthesis_voice(pico_voices[0].name);
return;
}
void pico_set_synthesis_voice_fallback(char *voice_name)
{
if (!pico_set_synthesis_voice(voice_name))
/* Try to fallback to language */
pico_set_language(msg_settings.voice.language);
}
void module_speak_sync(const char *data, size_t bytes, SPDMessageType msgtype)
{
int value;
static pico_Char *tmp;
if (pico_state != STATE_IDLE) {
DBG(MODULE_NAME
": module still speaking state = %d", pico_state);
module_speak_error();
return;
}
/* Setting speech parameters. */
/* Set language first, since that sets the voice */
UPDATE_STRING_PARAMETER(voice.language, pico_set_language);
/* Then set the voice if needed */
UPDATE_STRING_PARAMETER(voice.name, pico_set_synthesis_voice_fallback);
/* UPDATE_PARAMETER(voice_type, pico_set_voice); */
picoInp = (pico_Char *) module_strip_ssml(data);
value = pico_set_rate(msg_settings.rate);
if (PICO_VOICE_SPEED_DEFAULT != value) {
tmp = picoInp;
picoInp = (pico_Char *)
g_strdup_printf("<speed level='%d'>%s</speed>", value, tmp);
g_free(tmp);
}
value = pico_set_volume(msg_settings.volume);
if (PICO_VOICE_VOLUME_DEFAULT != value) {
tmp = picoInp;
picoInp = (pico_Char *)
g_strdup_printf("<volume level='%d'>%s</volume>", value,
tmp);
g_free(tmp);
}
value = pico_set_pitch(msg_settings.pitch);
if (PICO_VOICE_PITCH_DEFAULT != value) {
tmp = picoInp;
picoInp = (pico_Char *)
g_strdup_printf("<pitch level='%d'>%s</pitch>", value, tmp);
g_free(tmp);
}
/* TODO: use a generic engine for SPELL, CHAR, KEY */
/* switch (msgtype) {
case SPD_MSGTYPE_CHAR:
case SPD_MSGTYPE_KEY:
case SPD_MSGTYPE_TEXT:
case SPD_MSGTYPE_SOUND_ICON:
default:
DBG(MODULE_NAME
": msgtype = %d", msgtype);
break;
}
*/
pico_state = STATE_PLAY;
module_speak_ok();
DBG(MODULE_NAME ": Sending to TTS engine");
module_report_event_begin();
if (0 != pico_process_tts()) {
DBG(MODULE_NAME ": ERROR in TTS");
}
module_report_event_end();
pico_state = STATE_IDLE;
}
int module_stop(void)
{
pico_Status ret;
pico_Retstring outMessage;
if (pico_state != STATE_PLAY) {
DBG(MODULE_NAME ": STOP called when not in PLAY state");
return -1;
}
pico_state = STATE_STOP;
/* reset Pico engine. */
if ((ret = pico_resetEngine(picoEngine, PICO_RESET_SOFT))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot reset pico engine (%i): %s\n", ret, outMessage);
return -1;
}
return 0;
}
size_t module_pause(void)
{
pico_Status ret;
pico_Retstring outMessage;
if (pico_state != STATE_PLAY) {
DBG(MODULE_NAME ": PAUSE called when not in PLAY state");
return -1;
}
pico_state = STATE_PAUSE;
/* reset Pico engine. */
if ((ret = pico_resetEngine(picoEngine, PICO_RESET_SOFT))) {
pico_getSystemStatusMessage(picoSystem, ret, outMessage);
DBG(MODULE_NAME
": Cannot reset pico engine (%i): %s\n", ret, outMessage);
return -1;
}
return 0;
}
int module_close(void)
{
pico_state = STATE_CLOSE;
if (picoSystem) {
pico_terminate(&picoSystem);
picoSystem = NULL;
}
return 0;
}