blob: 5993380e4953049b1c702fcb8f95e3cfd67b6275 [file] [log] [blame]
/*
* open_jtalk.c - Speech Dispatcher backend for Open JTalk
*
* Copyright (C) 2020-2021, 2024-2025 Samuel Thibault <samuel.thibault@ens-lyon.org>
* Copyright (C) 2023 Chinamu Kawano <tinaxd@protonmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY Samuel Thibault AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <sys/stat.h>
#include "spd_module_main.h"
#include "module_utils.h"
#define MODULE_NAME "open_jtalk"
#define MODULE_VERSION "0.1"
#define WAV_START_BITS_PER_SAMPLE 34
#define WAV_START_NUM_CHANNELS 22
#define WAV_START_SAMPLE_RATE 24
#define WAV_START_SIZE_OF_SAMPLES 40
#define WAV_START_SAMPLES 44
DECLARE_DEBUG();
MOD_OPTION_1_STR(OpenjtalkDictionaryDirectory);
MOD_OPTION_1_STR(OpenjtalkVoice);
int module_load(void)
{
INIT_SETTINGS_TABLES();
REGISTER_DEBUG();
MOD_OPTION_1_STR_REG(OpenjtalkDictionaryDirectory,
"/var/lib/mecab/dic/open-jtalk/naist-jdic");
MOD_OPTION_1_STR_REG(OpenjtalkVoice,
"/usr/share/hts-voice/nitech-jp-atr503-m001/nitech_jp_atr503_m001.htsvoice");
DBG("OpenjtalkDictionaryDirectory: %s", OpenjtalkDictionaryDirectory);
DBG("OpenjtalkVoice: %s", OpenjtalkVoice);
module_audio_set_server();
return 0;
}
int module_init(char **msg)
{
fprintf(stderr, "initializing\n");
/* Check configured data */
if (access(OpenjtalkVoice, R_OK) != 0)
{
fprintf(stderr, "cannot access %s\n", OpenjtalkVoice);
perror("open");
return -1;
}
if (access(OpenjtalkDictionaryDirectory, R_OK|X_OK) != 0)
{
fprintf(stderr, "cannot access %s\n", OpenjtalkDictionaryDirectory);
perror("open");
return -1;
}
/* Check that Open JTalk works */
char *cmd;
if (asprintf(&cmd,
"echo hello | open_jtalk -x %s -m %s -ow /dev/null",
OpenjtalkDictionaryDirectory, OpenjtalkVoice) < 0) {
DBG("failed to construct command line");
return -1;
}
if (system(cmd) != 0) {
fprintf(stderr, "Open JTalk failed\n");
return -1;
}
*msg = strdup("ok!");
return 0;
}
SPDVoice **module_list_voices(void)
{
/* TODO: Allow users to choose htsvoice */
SPDVoice **ret = malloc(2 * sizeof(*ret));
ret[0] = malloc(sizeof(*(ret[0])));
ret[0]->name = strdup("Default");
ret[0]->language = strdup("ja");
ret[0]->variant = NULL;
ret[1] = NULL;
return ret;
}
void module_speak_sync(const char *data, size_t bytes, SPDMessageType msgtype)
{
module_speak_ok();
DBG("speaking '%s'", data);
module_report_event_begin();
/* Strip SSML (Open JTalk does not support it.) */
char *plain_data = module_strip_ssml(data);
char template[] = "/tmp/speechd-openjtalk-XXXXXX";
umask(0077);
int tmp_fd = mkstemp(template);
if (tmp_fd == -1) {
DBG("temporary .wav file creation failed");
goto FINISH;
}
/* do not need fd */
close(tmp_fd);
/* construct command line */
char *cmd;
if (asprintf(&cmd,
"open_jtalk -x %s -m %s -ow %s",
OpenjtalkDictionaryDirectory, OpenjtalkVoice,
template) == -1) {
DBG("failed to construct command line");
goto TEMPLATE_FINISH;
}
FILE *oj_fp = popen(cmd, "w");
free(cmd);
if (oj_fp == NULL) {
DBG("failed to execute open_jtalk");
goto TEMPLATE_FINISH;
}
fprintf(oj_fp, "%s", plain_data);
/* wait for finish */
int status = pclose(oj_fp);
if (status != 0) {
DBG("open_jtalk exited with non-zero code");
goto TEMPLATE_FINISH;
}
/* play the output wav */
DBG("output to %s", template);
AudioTrack track = {
.bits = 0,
.num_channels = 0,
.sample_rate = 0,
.num_samples = 0,
.samples = NULL
};
AudioFormat format = SPD_AUDIO_LE;
FILE *audio_fp = fopen(template, "rb");
if (audio_fp == NULL) {
DBG("failed to open wav file");
goto TEMPLATE_FINISH;
}
DBG("opened wav file");
fseek(audio_fp, WAV_START_BITS_PER_SAMPLE, SEEK_SET);
size_t ret = fread(&track.bits, 2, 1, audio_fp);
if (ret != 1) {
DBG("failed to read track.bits");
goto FP_FINISH;
}
DBG("read track.bits");
fseek(audio_fp, WAV_START_NUM_CHANNELS, SEEK_SET);
ret = fread(&track.num_channels, 2, 1, audio_fp);
if (ret != 1) {
DBG("failed to read track.num_channels");
goto FP_FINISH;
}
DBG("read track.num_channels");
fseek(audio_fp, WAV_START_SAMPLE_RATE, SEEK_SET);
ret = fread(&track.sample_rate, 4, 1, audio_fp);
if (ret != 1) {
DBG("failed to read track.sample_rate");
goto FP_FINISH;
}
DBG("read track.sample_rate");
fseek(audio_fp, WAV_START_SIZE_OF_SAMPLES, SEEK_SET);
ret = fread(&track.num_samples, 4, 1, audio_fp);
if (ret != 1) {
DBG("failed to read track.num_samples");
goto FP_FINISH;
}
track.num_samples =
track.num_samples / (track.num_channels) / (track.bits / 8);
DBG("read track.num_samples");
DBG("bits: %d num_channels: %d sample_rate: %d num_samples: %d",
track.bits, track.num_channels, track.sample_rate,
track.num_samples);
fseek(audio_fp, WAV_START_SAMPLES, SEEK_SET);
track.samples =
malloc(track.num_samples * track.num_channels * track.bits / 8);
ret =
fread(track.samples, track.bits / 8,
track.num_samples * track.num_channels, audio_fp);
if (ret != track.num_samples * track.num_channels) {
DBG("failed to read track.samples");
free(track.samples);
goto FP_FINISH;
}
DBG("read track.samples");
module_tts_output_server(&track, format);
free(track.samples);
FP_FINISH:
fclose(audio_fp);
TEMPLATE_FINISH:
unlink(template);
FINISH:
free(plain_data);
module_report_event_end();
}
size_t module_pause(void)
{
/* not supported */
DBG("pausing (not supported)");
return -1;
}
int module_stop(void)
{
/* not supported */
DBG("stopping (not supported)");
return 0;
}
int module_close(void)
{
DBG("closing");
return 0;
}