speechd-discuss
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] pico sends audio by chunk


From: Laurent Pelecq
Subject: [PATCH] pico sends audio by chunk
Date: Sun, 2 Feb 2014 18:38:07 +0100 (CET)

Hello,

I can't make the pico module works. It seems that it sends the audio data 
by chunk of 128 bytes, but module_tts_output expects the full text all 
at once. With Alsa, the device is initialized and uninitialized for each 
chunk.

The following patch works for me. It accumulates the samples in a buffer 
before playing it.

Regards,

Laurent


---
  src/modules/pico.c | 87 +++++++++++++++++++++++++++++++++++++++---------------
  1 file changed, 63 insertions(+), 24 deletions(-)

diff --git a/src/modules/pico.c b/src/modules/pico.c
index 9df4622..860a73b 100644
--- a/src/modules/pico.c
+++ b/src/modules/pico.c
@@ -46,6 +46,9 @@
  DECLARE_DEBUG();

  #define MAX_OUTBUF_SIZE               (128)
+#define SAMPLE_SIZE            2
+#define MAX_OUTBUF_NUM_SAMPLES (MAX_OUTBUF_SIZE / SAMPLE_SIZE)
+
  #define PICO_MEM_SIZE                 (10000000)

  #define PICO_VOICE_SPEED_MIN          (20)
@@ -162,7 +165,8 @@ static int pico_process_tts(void)
  {
        pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
        int ret, getstatus;
-       short outbuf[MAX_OUTBUF_SIZE];
+       size_t max_samples, num_samples;
+       short *samples;
        pico_Retstring outMessage;
        AudioTrack track;
  #if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
@@ -176,6 +180,10 @@ static int pico_process_tts(void)

        DBG(MODULE_NAME " Text: %s\n", picoInp);

+       max_samples = MAX_OUTBUF_NUM_SAMPLES;
+       num_samples = 0;
+       samples = g_new(short, max_samples);
+
        /* synthesis loop   */
        while (text_remaining) {
                /* Feed the text into the engine.   */
@@ -192,9 +200,16 @@ static int pico_process_tts(void)
                buf += bytes_sent;

                do {
+                       if (num_samples + MAX_OUTBUF_NUM_SAMPLES >= 
max_samples) {
+                               samples = g_renew(short, samples, max_samples * 
2);
+                               if (samples == NULL) {
+                                       return -1;
+                               }
+                               max_samples *= 2;
+                       }
                        /* Retrieve the samples and add them to the buffer.
                           SVOX pico TTS sample rate is 16K */
-                       getstatus = pico_getData(picoEngine, (void *)outbuf,
+                       getstatus = pico_getData(picoEngine, (void *)(samples + 
num_samples),
                                                 MAX_OUTBUF_SIZE, &bytes_recv,
                                                 &out_data_type);
                        if ((getstatus != PICO_STEP_BUSY)
@@ -207,29 +222,32 @@ static int pico_process_tts(void)
                                return -1;
                        }

-                       if (bytes_recv) {
-                               track.num_samples = bytes_recv / 2;
-                               track.samples =
-                                   (short *)g_memdup((gconstpointer) outbuf,
-                                                     bytes_recv);
-                               track.num_channels = 1;
-                               track.sample_rate = PICO_SAMPLE_RATE;
-                               track.bits = 16;
-                               DBG(MODULE_NAME
-                                   ": Sending %i samples to audio.",
-                                   track.num_samples);
-
-                               if (module_tts_output(track, format) < 0) {
-                                       DBG(MODULE_NAME
-                                           "Can't play track for unknown 
reason.");
-                                       return -1;
-                               }
+                       if (bytes_recv > 0) {
+                               num_samples += (bytes_recv / SAMPLE_SIZE);
                        }
                        if (g_atomic_int_get(&pico_state) != STATE_PLAY) {
                                text_remaining = 0;
                                break;
                        }
                } while (PICO_STEP_BUSY == getstatus);
+
+               if (num_samples > 0) {
+                       track.num_samples = num_samples;
+                       track.samples = samples;
+                       track.num_channels = 1;
+                       track.sample_rate = PICO_SAMPLE_RATE;
+                       track.bits = 16;
+                       DBG(MODULE_NAME
+                           ": Sending %i samples to audio.",
+                           track.num_samples);
+
+                       if (module_tts_output(track, format) < 0) {
+                               DBG(MODULE_NAME
+                                   "Can't play track for unknown reason.");
+                               return -1;
+                       }
+               }
+               g_free(samples);
        }

        g_free(picoInp);
@@ -286,7 +304,7 @@ int module_load(void)
  {
        INIT_SETTINGS_TABLES();

-       MOD_OPTION_1_INT_REG(Debug, 0);
+       REGISTER_DEBUG();
        MOD_OPTION_1_STR_REG(PicoLingwarePath, PICO_LINGWARE_PATH);

        return 0;
@@ -303,6 +321,10 @@ int pico_init_voice(int voice_index)
        pico_Char picoTaResourceName[PICO_MAX_RESOURCE_NAME_SIZE];
        pico_Char picoSgResourceName[PICO_MAX_RESOURCE_NAME_SIZE];

+       DBG(MODULE_NAME
+           ": Initializing voice %i: %s.",
+           voice_index,
+           pico_voices[voice_index].name);
        /* Load the text analysis Lingware resource file.   */
        strcpy((char *)picoTaFileName, PicoLingwarePath);
        strcat((char *)picoTaFileName,
@@ -458,6 +480,10 @@ void pico_set_synthesis_voice(char *voice_name)
        int ret;
        pico_Retstring outMessage;

+       DBG(MODULE_NAME
+           ": Set voice '%s'.",
+           voice_name);
+
        /* Create a new Pico engine, english default */
        if ((ret = pico_disposeEngine(picoSystem, &picoEngine))) {
                pico_getSystemStatusMessage(picoSystem, ret, outMessage);
@@ -483,6 +509,9 @@ static void pico_set_language(char *lang)
  {
        int i;

+       DBG(MODULE_NAME
+           ": Set language %s.",
+           lang);
        /* get voice name based on language */
        for (i = 0; i < sizeof(pico_voices) / sizeof(SPDVoice); i++) {
                if (!strcmp(pico_voices[i].language, lang)) {
@@ -497,6 +526,7 @@ int module_speak(char *data, size_t bytes, SPDMessageType 
msgtype)
  {
        int value;
        static pico_Char *tmp;
+       const char *msgtype_name = "UNKNOWN";

        if (g_atomic_int_get(&pico_state) != STATE_IDLE) {
                DBG(MODULE_NAME
@@ -540,17 +570,26 @@ int module_speak(char *data, size_t bytes, SPDMessageType 
msgtype)
                g_free(tmp);
        }

-/*     switch (msgtype) {
+       switch (msgtype) {
                case SPD_MSGTYPE_CHAR:
+                       msgtype_name = "CHAR";
+                       break;
                case SPD_MSGTYPE_KEY:
+                       msgtype_name = "KEY";
+                       break;
                case SPD_MSGTYPE_TEXT:
+                       msgtype_name = "TEXT";
+                       break;
                case SPD_MSGTYPE_SOUND_ICON:
+                       msgtype_name = "SOUND_ICON";
+                       break;
                default:
-                       DBG(MODULE_NAME
-                               ": msgtype = %d", msgtype);
+                       msgtype_name = "UNKNOWN";
                        break;
        }
-*/
+       DBG(MODULE_NAME
+           ": msgtype %s = %d", msgtype_name, msgtype);
+
        g_atomic_int_set(&pico_state, STATE_PLAY);
        sem_post(&pico_play_semaphore);
        return bytes;
-- 
1.8.5.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]