src/modules/openjtalk.c - external/github.com/brailcom/speechd - Git at Google

 /*
  * open_jtalk.c - Speech Dispatcher backend for Open JTalk
  *
  * Copyright (C) 2020-2021, 2024-2025 Samuel Thibault <samuel.thibault@ens-lyon.org>
  * Copyright (C) 2023 Chinamu Kawano <tinaxd@protonmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Samuel Thibault AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */

 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <string.h>
 #include <assert.h>
 #include <limits.h>
 #include <sys/stat.h>

 #include "spd_module_main.h"
 #include "module_utils.h"

 #define MODULE_NAME "open_jtalk"
 #define MODULE_VERSION "0.1"

 #define WAV_START_BITS_PER_SAMPLE 34
 #define WAV_START_NUM_CHANNELS 22
 #define WAV_START_SAMPLE_RATE 24
 #define WAV_START_SIZE_OF_SAMPLES 40
 #define WAV_START_SAMPLES 44

 DECLARE_DEBUG();

 MOD_OPTION_1_STR(OpenjtalkDictionaryDirectory);
 MOD_OPTION_1_STR(OpenjtalkVoice);

 int module_load(void)
 {
 	INIT_SETTINGS_TABLES();
 	REGISTER_DEBUG();

 	MOD_OPTION_1_STR_REG(OpenjtalkDictionaryDirectory,
 			     "/var/lib/mecab/dic/open-jtalk/naist-jdic");
 	MOD_OPTION_1_STR_REG(OpenjtalkVoice,
 			     "/usr/share/hts-voice/nitech-jp-atr503-m001/nitech_jp_atr503_m001.htsvoice");

 	DBG("OpenjtalkDictionaryDirectory: %s", OpenjtalkDictionaryDirectory);
 	DBG("OpenjtalkVoice: %s", OpenjtalkVoice);

 	module_audio_set_server();

 	return 0;
 }

 int module_init(char **msg)
 {
 	fprintf(stderr, "initializing\n");

 	/* Check configured data */
 	if (access(OpenjtalkVoice, R_OK) != 0)
 	{
 		fprintf(stderr, "cannot access %s\n", OpenjtalkVoice);
 		perror("open");
 		return -1;
 	}

 	if (access(OpenjtalkDictionaryDirectory, R_OK|X_OK) != 0)
 	{
 		fprintf(stderr, "cannot access %s\n", OpenjtalkDictionaryDirectory);
 		perror("open");
 		return -1;
 	}

 	/* Check that Open JTalk works */
 	char *cmd;
 	if (asprintf(&cmd,
 		     "echo hello | open_jtalk -x %s -m %s -ow /dev/null",
 		     OpenjtalkDictionaryDirectory, OpenjtalkVoice) < 0) {
 		DBG("failed to construct command line");
 		return -1;
 	}

 	if (system(cmd) != 0) {
 		fprintf(stderr, "Open JTalk failed\n");
 		return -1;
 	}

 	*msg = strdup("ok!");

 	return 0;
 }

 SPDVoice **module_list_voices(void)
 {
 	/* TODO: Allow users to choose htsvoice */
 	SPDVoice **ret = malloc(2 * sizeof(*ret));

 	ret[0] = malloc(sizeof(*(ret[0])));
 	ret[0]->name = strdup("Default");
 	ret[0]->language = strdup("ja");
 	ret[0]->variant = NULL;

 	ret[1] = NULL;

 	return ret;
 }

 void module_speak_sync(const char *data, size_t bytes, SPDMessageType msgtype)
 {
 	module_speak_ok();

 	DBG("speaking '%s'", data);

 	module_report_event_begin();

 	/* Strip SSML (Open JTalk does not support it.) */
 	char *plain_data = module_strip_ssml(data);

 	char template[] = "/tmp/speechd-openjtalk-XXXXXX";
 	umask(0077);
 	int tmp_fd = mkstemp(template);
 	if (tmp_fd == -1) {
 		DBG("temporary .wav file creation failed");
 		goto FINISH;
 	}

 	/* do not need fd */
 	close(tmp_fd);

 	/* construct command line */
 	char *cmd;
 	if (asprintf(&cmd,
 		     "open_jtalk -x %s -m %s -ow %s",
 		     OpenjtalkDictionaryDirectory, OpenjtalkVoice,
 		     template) == -1) {
 		DBG("failed to construct command line");
 		goto TEMPLATE_FINISH;
 	}

 	FILE *oj_fp = popen(cmd, "w");
 	free(cmd);
 	if (oj_fp == NULL) {
 		DBG("failed to execute open_jtalk");
 		goto TEMPLATE_FINISH;
 	}

 	fprintf(oj_fp, "%s", plain_data);

 	/* wait for finish */
 	int status = pclose(oj_fp);
 	if (status != 0) {
 		DBG("open_jtalk exited with non-zero code");
 		goto TEMPLATE_FINISH;
 	}

 	/* play the output wav */
 	DBG("output to %s", template);

 	AudioTrack track = {
 		.bits = 0,
 		.num_channels = 0,
 		.sample_rate = 0,
 		.num_samples = 0,
 		.samples = NULL
 	};
 	AudioFormat format = SPD_AUDIO_LE;

 	FILE *audio_fp = fopen(template, "rb");
 	if (audio_fp == NULL) {
 		DBG("failed to open wav file");
 		goto TEMPLATE_FINISH;
 	}
 	DBG("opened wav file");

 	fseek(audio_fp, WAV_START_BITS_PER_SAMPLE, SEEK_SET);
 	size_t ret = fread(&track.bits, 2, 1, audio_fp);
 	if (ret != 1) {
 		DBG("failed to read track.bits");
 		goto FP_FINISH;
 	}
 	DBG("read track.bits");

 	fseek(audio_fp, WAV_START_NUM_CHANNELS, SEEK_SET);
 	ret = fread(&track.num_channels, 2, 1, audio_fp);
 	if (ret != 1) {
 		DBG("failed to read track.num_channels");
 		goto FP_FINISH;
 	}
 	DBG("read track.num_channels");

 	fseek(audio_fp, WAV_START_SAMPLE_RATE, SEEK_SET);
 	ret = fread(&track.sample_rate, 4, 1, audio_fp);
 	if (ret != 1) {
 		DBG("failed to read track.sample_rate");
 		goto FP_FINISH;
 	}
 	DBG("read track.sample_rate");

 	fseek(audio_fp, WAV_START_SIZE_OF_SAMPLES, SEEK_SET);
 	ret = fread(&track.num_samples, 4, 1, audio_fp);
 	if (ret != 1) {
 		DBG("failed to read track.num_samples");
 		goto FP_FINISH;
 	}
 	track.num_samples =
 	    track.num_samples / (track.num_channels) / (track.bits / 8);
 	DBG("read track.num_samples");
 	DBG("bits: %d num_channels: %d sample_rate: %d num_samples: %d",
 	    track.bits, track.num_channels, track.sample_rate,
 	    track.num_samples);

 	fseek(audio_fp, WAV_START_SAMPLES, SEEK_SET);
 	track.samples =
 	    malloc(track.num_samples * track.num_channels * track.bits / 8);
 	ret =
 	    fread(track.samples, track.bits / 8,
 		  track.num_samples * track.num_channels, audio_fp);
 	if (ret != track.num_samples * track.num_channels) {
 		DBG("failed to read track.samples");
 		free(track.samples);
 		goto FP_FINISH;
 	}
 	DBG("read track.samples");

 	module_tts_output_server(&track, format);

 	free(track.samples);

 FP_FINISH:
 	fclose(audio_fp);

 TEMPLATE_FINISH:
 	unlink(template);

 FINISH:
 	free(plain_data);

 	module_report_event_end();
 }

 size_t module_pause(void)
 {
 	/* not supported */
 	DBG("pausing (not supported)");

 	return -1;
 }

 int module_stop(void)
 {
 	/* not supported */
 	DBG("stopping (not supported)");

 	return 0;
 }

 int module_close(void)
 {
 	DBG("closing");

 	return 0;
 }
	/*
	* open_jtalk.c - Speech Dispatcher backend for Open JTalk
	*
	* Copyright (C) 2020-2021, 2024-2025 Samuel Thibault <samuel.thibault@ens-lyon.org>
	* Copyright (C) 2023 Chinamu Kawano <tinaxd@protonmail.com>
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY Samuel Thibault AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	#define _GNU_SOURCE
	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <unistd.h>
	#include <string.h>
	#include <assert.h>
	#include <limits.h>
	#include <sys/stat.h>

	#include "spd_module_main.h"
	#include "module_utils.h"

	#define MODULE_NAME "open_jtalk"
	#define MODULE_VERSION "0.1"

	#define WAV_START_BITS_PER_SAMPLE 34
	#define WAV_START_NUM_CHANNELS 22
	#define WAV_START_SAMPLE_RATE 24
	#define WAV_START_SIZE_OF_SAMPLES 40
	#define WAV_START_SAMPLES 44

	DECLARE_DEBUG();

	MOD_OPTION_1_STR(OpenjtalkDictionaryDirectory);
	MOD_OPTION_1_STR(OpenjtalkVoice);

	int module_load(void)
	{
	INIT_SETTINGS_TABLES();
	REGISTER_DEBUG();

	MOD_OPTION_1_STR_REG(OpenjtalkDictionaryDirectory,
	"/var/lib/mecab/dic/open-jtalk/naist-jdic");
	MOD_OPTION_1_STR_REG(OpenjtalkVoice,
	"/usr/share/hts-voice/nitech-jp-atr503-m001/nitech_jp_atr503_m001.htsvoice");

	DBG("OpenjtalkDictionaryDirectory: %s", OpenjtalkDictionaryDirectory);
	DBG("OpenjtalkVoice: %s", OpenjtalkVoice);

	module_audio_set_server();

	return 0;
	}

	int module_init(char **msg)
	{
	fprintf(stderr, "initializing\n");

	/* Check configured data */
	if (access(OpenjtalkVoice, R_OK) != 0)
	{
	fprintf(stderr, "cannot access %s\n", OpenjtalkVoice);
	perror("open");
	return -1;
	}

	if (access(OpenjtalkDictionaryDirectory, R_OK\|X_OK) != 0)
	{
	fprintf(stderr, "cannot access %s\n", OpenjtalkDictionaryDirectory);
	perror("open");
	return -1;
	}

	/* Check that Open JTalk works */
	char *cmd;
	if (asprintf(&cmd,
	"echo hello \| open_jtalk -x %s -m %s -ow /dev/null",
	OpenjtalkDictionaryDirectory, OpenjtalkVoice) < 0) {
	DBG("failed to construct command line");
	return -1;
	}

	if (system(cmd) != 0) {
	fprintf(stderr, "Open JTalk failed\n");
	return -1;
	}

	*msg = strdup("ok!");

	return 0;
	}

	SPDVoice **module_list_voices(void)
	{
	/* TODO: Allow users to choose htsvoice */
	SPDVoice *ret = malloc(2 sizeof(*ret));

	ret[0] = malloc(sizeof(*(ret[0])));
	ret[0]->name = strdup("Default");
	ret[0]->language = strdup("ja");
	ret[0]->variant = NULL;

	ret[1] = NULL;

	return ret;
	}

	void module_speak_sync(const char *data, size_t bytes, SPDMessageType msgtype)
	{
	module_speak_ok();

	DBG("speaking '%s'", data);

	module_report_event_begin();

	/* Strip SSML (Open JTalk does not support it.) */
	char *plain_data = module_strip_ssml(data);

	char template[] = "/tmp/speechd-openjtalk-XXXXXX";
	umask(0077);
	int tmp_fd = mkstemp(template);
	if (tmp_fd == -1) {
	DBG("temporary .wav file creation failed");
	goto FINISH;
	}

	/* do not need fd */
	close(tmp_fd);

	/* construct command line */
	char *cmd;
	if (asprintf(&cmd,
	"open_jtalk -x %s -m %s -ow %s",
	OpenjtalkDictionaryDirectory, OpenjtalkVoice,
	template) == -1) {
	DBG("failed to construct command line");
	goto TEMPLATE_FINISH;
	}

	FILE *oj_fp = popen(cmd, "w");
	free(cmd);
	if (oj_fp == NULL) {
	DBG("failed to execute open_jtalk");
	goto TEMPLATE_FINISH;
	}

	fprintf(oj_fp, "%s", plain_data);

	/* wait for finish */
	int status = pclose(oj_fp);
	if (status != 0) {
	DBG("open_jtalk exited with non-zero code");
	goto TEMPLATE_FINISH;
	}

	/* play the output wav */
	DBG("output to %s", template);

	AudioTrack track = {
	.bits = 0,
	.num_channels = 0,
	.sample_rate = 0,
	.num_samples = 0,
	.samples = NULL
	};
	AudioFormat format = SPD_AUDIO_LE;

	FILE *audio_fp = fopen(template, "rb");
	if (audio_fp == NULL) {
	DBG("failed to open wav file");
	goto TEMPLATE_FINISH;
	}
	DBG("opened wav file");

	fseek(audio_fp, WAV_START_BITS_PER_SAMPLE, SEEK_SET);
	size_t ret = fread(&track.bits, 2, 1, audio_fp);
	if (ret != 1) {
	DBG("failed to read track.bits");
	goto FP_FINISH;
	}
	DBG("read track.bits");

	fseek(audio_fp, WAV_START_NUM_CHANNELS, SEEK_SET);
	ret = fread(&track.num_channels, 2, 1, audio_fp);
	if (ret != 1) {
	DBG("failed to read track.num_channels");
	goto FP_FINISH;
	}
	DBG("read track.num_channels");

	fseek(audio_fp, WAV_START_SAMPLE_RATE, SEEK_SET);
	ret = fread(&track.sample_rate, 4, 1, audio_fp);
	if (ret != 1) {
	DBG("failed to read track.sample_rate");
	goto FP_FINISH;
	}
	DBG("read track.sample_rate");

	fseek(audio_fp, WAV_START_SIZE_OF_SAMPLES, SEEK_SET);
	ret = fread(&track.num_samples, 4, 1, audio_fp);
	if (ret != 1) {
	DBG("failed to read track.num_samples");
	goto FP_FINISH;
	}
	track.num_samples =
	track.num_samples / (track.num_channels) / (track.bits / 8);
	DBG("read track.num_samples");
	DBG("bits: %d num_channels: %d sample_rate: %d num_samples: %d",
	track.bits, track.num_channels, track.sample_rate,
	track.num_samples);

	fseek(audio_fp, WAV_START_SAMPLES, SEEK_SET);
	track.samples =
	malloc(track.num_samples * track.num_channels * track.bits / 8);
	ret =
	fread(track.samples, track.bits / 8,
	track.num_samples * track.num_channels, audio_fp);
	if (ret != track.num_samples * track.num_channels) {
	DBG("failed to read track.samples");
	free(track.samples);
	goto FP_FINISH;
	}
	DBG("read track.samples");

	module_tts_output_server(&track, format);

	free(track.samples);

	FP_FINISH:
	fclose(audio_fp);

	TEMPLATE_FINISH:
	unlink(template);

	FINISH:
	free(plain_data);

	module_report_event_end();
	}

	size_t module_pause(void)
	{
	/* not supported */
	DBG("pausing (not supported)");

	return -1;
	}

	int module_stop(void)
	{
	/* not supported */
	DBG("stopping (not supported)");

	return 0;
	}

	int module_close(void)
	{
	DBG("closing");

	return 0;
	}