From 9ad34dcb97ff57063e606784e6443953bc1a569d Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 5 Apr 2026 16:40:04 -0300 Subject: [PATCH 01/26] add imgui speech options --- code/sound/fsspeech.cpp | 172 +++++++++++++++++++++++++++++++++++++--- code/sound/speech.cpp | 15 ++-- 2 files changed, 171 insertions(+), 16 deletions(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 65ef525bb3a..da153620c1f 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -10,6 +10,7 @@ #include "osapi/osregistry.h" #include "sound/fsspeech.h" #include "sound/speech.h" +#include "options/Option.h" extern int Cmdline_freespace_no_sound; @@ -30,6 +31,141 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] = char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = ""; size_t Speech_buffer_len; +static bool ttsingame_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val; + return true; +} + +static bool ttsmulti_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val; + return true; +} + +static bool ttsbriefing_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val; + return true; +} + +static bool ttstechroom_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val; + return true; +} + +static bool ttsvolume_change(float new_val, bool initial) +{ + if (initial) { + return false; + } + speech_set_volume((unsigned short) new_val); + return true; +} + +static SCP_vector ttsvoice_enumerator() +{ + SCP_vector vals; + auto voices = speech_enumerate_voices(); + for (int i = 0; i < voices.size(); ++i) { + vals.push_back(i); + } + return vals; +} + +static SCP_string ttsvoice_display(int id) +{ + SCP_string out; + auto voices = speech_enumerate_voices(); + sprintf(out, "(%d) %s", id + 1, voices[id].c_str()); + return out; +} + +static bool ttsvoice_change(int id, bool initial) +{ + if (initial) { + return false; + } + speech_set_voice(id); + return true; +} + +static auto SpeechVoiceOption = options::OptionBuilder("Speech.Voice", + std::pair{"TTS Voice", -1}, + std::pair{"The voice used to read text", -1}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .enumerator(ttsvoice_enumerator) + .display(ttsvoice_display) + .flags({ options::OptionFlags::ForceMultiValueSelection }) + .default_val(0) + .change_listener(ttsvoice_change) + .importance(2) + .finish(); + +static auto SpeechVolumeOption = options::OptionBuilder("Speech.Volume", + std::pair{"TTS Volume", -1}, + std::pair{"Volume used for playing TTS speech", -1}) + .category(std::make_pair("Audio", 1826)) + .range(0.0f, 100.0f) + .default_val(100.0f) + .change_listener(ttsvolume_change) + .importance(1) + .finish(); + +static auto SpeechBriefingOption = options::OptionBuilder("Speech.Briefing", + std::pair{"TTS in briefings", -1}, + std::pair{"Enable or disable TTS in briefings", -1}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttsbriefing_change) + .default_val(true) + .importance(0) + .finish(); + +static auto SpeechTechroomOption = options::OptionBuilder("Speech.Techroom", + std::pair{"TTS in techroom", -1}, + std::pair{"Enable or disable TTS in techroom", -1}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttstechroom_change) + .default_val(true) + .importance(0) + .finish(); + +static auto SpeechIngameOption = options::OptionBuilder("Speech.Ingame", + std::pair{"TTS in-game", -1}, + std::pair{"Enable or disable TTS in-game", -1}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttsingame_change) + .default_val(true) + .importance(0) + .finish(); + +static auto SpeechMultiOption = options::OptionBuilder("Speech.Multi", + std::pair{"TTS in multiplayer", -1}, + std::pair{"Enable or disable TTS in multiplayer", -1}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttsmulti_change) + .default_val(true) + .importance(0) + .finish(); + bool fsspeech_init() { if (speech_inited) { @@ -45,18 +181,32 @@ bool fsspeech_init() return false; } - // Get the settings from the registry - for(int i = 0; i < FSSPEECH_FROM_MAX; i++) { - FSSpeech_play_from[i] = - os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false; - nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false")); + if (Using_in_game_options) + { + FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue(); + FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue(); + FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue(); + FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue(); + // Early caching of voices names, needed for sapi not to override initial voice selection + speech_enumerate_voices(); + speech_set_volume((unsigned short)SpeechVolumeOption->getValue()); + speech_set_voice(SpeechVoiceOption->getValue()); + } + else + { + // Get the settings from the registry + for (int i = 0; i < FSSPEECH_FROM_MAX; i++) { + FSSpeech_play_from[i] = + os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false; + nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false")); + } + + int volume = os_config_read_uint(NULL, "SpeechVolume", 100); + speech_set_volume((unsigned short)volume); + + int voice = os_config_read_uint(NULL, "SpeechVoice", 0); + speech_set_voice(voice); } - - int volume = os_config_read_uint(NULL, "SpeechVolume", 100); - speech_set_volume((unsigned short) volume); - - int voice = os_config_read_uint(NULL, "SpeechVoice", 0); - speech_set_voice(voice); speech_inited = 1; diff --git a/code/sound/speech.cpp b/code/sound/speech.cpp index 7967950ac10..f958f32d1fb 100644 --- a/code/sound/speech.cpp +++ b/code/sound/speech.cpp @@ -66,12 +66,12 @@ #include #include #pragma warning(pop) - #include "globalincs/pstypes.h" #include "utils/unicode.h" #include "speech.h" - +static SCP_vector cached_voices; +static bool voices_cached = false; bool Speech_init = false; bool speech_init() @@ -303,6 +303,9 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { + if (voices_cached) { + return cached_voices; + } #ifdef _WIN32 HRESULT hr = CoCreateInstance( CLSID_SpVoice, @@ -368,9 +371,11 @@ SCP_vector speech_enumerate_voices() } comTokenCategory->Release(); - - Voice_device->Release(); - + //only release the voice_device when getting flags + if (!Speech_init) + Voice_device->Release(); + voices_cached = true; + cached_voices = voices; return voices; #else STUB_FUNCTION; From 839d6b662da27d054b0d481133f7139c893eebe3 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 5 Apr 2026 18:47:13 -0300 Subject: [PATCH 02/26] adapt existing windows sapi speech implementation --- code/sound/fsspeech.cpp | 26 +++- code/sound/speech.h | 4 +- code/sound/{speech.cpp => speech_win.cpp} | 157 +++------------------- code/source_groups.cmake | 8 +- 4 files changed, 50 insertions(+), 145 deletions(-) rename code/sound/{speech.cpp => speech_win.cpp} (68%) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index da153620c1f..ecc7eb64b1a 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -166,6 +166,22 @@ static auto SpeechMultiOption = options::OptionBuilder("Speech.Multi", .importance(0) .finish(); +void sanitize_text(const char* input, SCP_string& output) { + output.clear(); + bool saw_dollar = false; + for (auto ch : unicode::codepoint_range(input)) { + if (ch == UNICODE_CHAR('$')) { + saw_dollar = true; + continue; + } + else if (saw_dollar) { + saw_dollar = false; + continue; + } + unicode::encode(ch, std::back_inserter(output)); + } +} + bool fsspeech_init() { if (speech_inited) { @@ -225,6 +241,11 @@ void fsspeech_deinit() void fsspeech_play(int type, const char *text) { + if (text == nullptr) { + nprintf(("Speech", "Not playing speech because passed text is null.\n")); + return; + } + if (!speech_inited) { nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n")); return; @@ -240,7 +261,10 @@ void fsspeech_play(int type, const char *text) return; } - speech_play(text); + SCP_string sanitized_string; + sanitize_text(text, sanitized_string); + + speech_play(sanitized_string); } void fsspeech_stop() diff --git a/code/sound/speech.h b/code/sound/speech.h index 3f731dd5a7f..e16eeef2a43 100644 --- a/code/sound/speech.h +++ b/code/sound/speech.h @@ -15,7 +15,7 @@ bool speech_init(); void speech_deinit(); -bool speech_play(const char *text); +bool speech_play(const SCP_string& text); bool speech_pause(); bool speech_resume(); bool speech_stop(); @@ -31,7 +31,7 @@ SCP_vector speech_enumerate_voices(); inline bool speech_init() { return false; } inline void speech_deinit() {} -inline bool speech_play(const char* /*text*/) { return false; } +inline bool speech_play(const SCP_string& /*text*/) { return false; } inline bool speech_pause() { return false; } inline bool speech_resume() { return false; } inline bool speech_stop() { return false; } diff --git a/code/sound/speech.cpp b/code/sound/speech_win.cpp similarity index 68% rename from code/sound/speech.cpp rename to code/sound/speech_win.cpp index f958f32d1fb..90698a52417 100644 --- a/code/sound/speech.cpp +++ b/code/sound/speech_win.cpp @@ -5,26 +5,18 @@ * created based on the source. * */ - - - - - #ifndef FS2_SPEECH -#if defined(_WIN32) || defined(__APPLE__) +#if defined(_WIN32) #if NDEBUG #pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" ) #endif // NDEBUG -#endif // _WIN32 or __APPLE__ -#elif !defined(__APPLE__) // to end-of-file ... - +#endif // _WIN32 +#else // FS2_SPEECH #ifdef LAUNCHER #include "stdafx.h" #endif //LAUNCHER -#ifdef _WIN32 - // Since we define these ourself we need to undefine them for the sapi header #pragma push_macro("strcpy_s") #pragma push_macro("strncpy_s") @@ -37,10 +29,9 @@ #undef memset #undef memcpy - #include - #include - - #include +#include +#include +#include #pragma pushpop_macro("strcpy_s") #pragma pushpop_macro("strncpy_s") @@ -48,16 +39,7 @@ #pragma pushpop_macro("memset") #pragma pushpop_macro("memcpy") - ISpVoice *Voice_device; -#elif defined(SCP_UNIX) - #include -// #include - - int speech_dev = -1; -// FILE *speech_dev = NULL; -#else - #pragma error( "ERROR: Unknown platform, speech (FS2_SPEECH) is not supported" ) -#endif //_WIN32 +ISpVoice *Voice_device; #pragma warning(push) #pragma warning(disable: 4995) @@ -76,7 +58,6 @@ bool Speech_init = false; bool speech_init() { -#ifdef _WIN32 HRESULT hr = CoCreateInstance( CLSID_SpVoice, NULL, @@ -85,19 +66,6 @@ bool speech_init() (void **)&Voice_device); Speech_init = SUCCEEDED(hr); -#else - - speech_dev = open("/dev/speech", O_WRONLY | O_DIRECT); -// speech_dev = fopen("/dev/speech", "w"); - - if (speech_dev == -1) { -// if (speech_dev == NULL) { - mprintf(("Couldn't open '/dev/speech', turning text-to-speech off...\n")); - return false; - } - - Speech_init = true; -#endif nprintf(("Speech", "Speech init %s\n", Speech_init ? "succeeded!" : "failed!")); return Speech_init; @@ -106,44 +74,22 @@ bool speech_init() void speech_deinit() { if(Speech_init == false) return; - -#ifdef _WIN32 Voice_device->Release(); -#else - close(speech_dev); -// fclose(speech_dev); -#endif } -bool speech_play(const char *text) +bool speech_play(const SCP_string& text) { - nprintf(("Speech", "Attempting to play speech string %s...\n", text)); + nprintf(("Speech", "Attempting to play speech string %s...\n", text.c_str())); if(Speech_init == false) return true; - if (text == NULL) { - nprintf(("Speech", "Not playing speech because passed text is null.\n")); - return false; - } - -#ifdef _WIN32 - SCP_string work_buffer; - - bool saw_dollar = false; - for (auto ch : unicode::codepoint_range(text)) { - if (ch == UNICODE_CHAR('$')) { - // Skip $ escape sequences which appear in briefing text - saw_dollar = true; - continue; - } else if (saw_dollar) { - saw_dollar = false; - continue; - } - unicode::encode(ch, std::back_inserter(work_buffer)); + if (text.empty()) { + nprintf(("Speech", "Not playing speech because passed text is empty.\n")); + return false; } // Determine the needed amount of data - auto num_chars = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int) work_buffer.size(), nullptr, 0); + auto num_chars = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), nullptr, 0); if (num_chars <= 0) { // Error @@ -153,7 +99,7 @@ bool speech_play(const char *text) std::wstring wide_string; wide_string.resize(num_chars); - auto err = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int)work_buffer.size(), &wide_string[0], num_chars); + auto err = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), &wide_string[0], num_chars); if (err <= 0) { return false; @@ -161,88 +107,33 @@ bool speech_play(const char *text) speech_stop(); return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, NULL)); -#else - int len = strlen(text); - char Conversion_buffer[MAX_SPEECH_CHAR_LEN]; - - if(len > (MAX_SPEECH_CHAR_LEN - 1)) { - len = MAX_SPEECH_CHAR_LEN - 1; - } - - int count = 0; - for(int i = 0; i < len; i++) { - if(text[i] == '$') { - i++; - continue; - } - - Conversion_buffer[count] = text[i]; - count++; - } - - Conversion_buffer[count] = '\0'; - - if ( write(speech_dev, Conversion_buffer, count) == -1 ) - return false; -// if (fwrite(Conversion_buffer, count, 1, speech_dev)) -// fflush(speech_dev); -// else -// return false; - - return true; -#endif //_WIN32 } bool speech_pause() { if(Speech_init == false) return true; -#ifdef _WIN32 return SUCCEEDED(Voice_device->Pause()); -#else - STUB_FUNCTION; - - return true; -#endif } bool speech_resume() { if(Speech_init == false) return true; -#ifdef _WIN32 return SUCCEEDED(Voice_device->Resume()); -#else - STUB_FUNCTION; - - return true; -#endif } bool speech_stop() { if(Speech_init == false) return true; -#ifdef _WIN32 return SUCCEEDED(Voice_device->Speak( NULL, SPF_PURGEBEFORESPEAK, NULL )); -#else - STUB_FUNCTION; - - return true; -#endif } bool speech_set_volume(unsigned short volume) { -#ifdef _WIN32 return SUCCEEDED(Voice_device->SetVolume(volume)); -#else - STUB_FUNCTION; - - return true; -#endif } bool speech_set_voice(int voice) { -#ifdef _WIN32 HRESULT hr; CComPtr cpVoiceToken; CComPtr cpEnum; @@ -276,17 +167,11 @@ bool speech_set_voice(int voice) count++; } return false; -#else - STUB_FUNCTION; - - return true; -#endif } // Goober5000 bool speech_is_speaking() { -#ifdef _WIN32 HRESULT hr; SPVOICESTATUS pStatus; @@ -294,11 +179,6 @@ bool speech_is_speaking() if (FAILED(hr)) return false; return (pStatus.dwRunningState != SPRS_DONE); -#else - STUB_FUNCTION; - - return false; -#endif } SCP_vector speech_enumerate_voices() @@ -306,7 +186,7 @@ SCP_vector speech_enumerate_voices() if (voices_cached) { return cached_voices; } -#ifdef _WIN32 + HRESULT hr = CoCreateInstance( CLSID_SpVoice, NULL, @@ -377,11 +257,6 @@ SCP_vector speech_enumerate_voices() voices_cached = true; cached_voices = voices; return voices; -#else - STUB_FUNCTION; - - return SCP_vector(); -#endif } -#endif // FS2_SPEECH +#endif // FS2_SPEECH \ No newline at end of file diff --git a/code/source_groups.cmake b/code/source_groups.cmake index 54e6bf58501..e28d696cd82 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -1619,12 +1619,18 @@ add_file_folder("Sound" sound/rtvoice.h sound/sound.cpp sound/sound.h - sound/speech.cpp sound/speech.h sound/voicerec.cpp sound/voicerec.h ) +if (WIN32) + add_file_folder("Sound" + ${file_root_sound} + sound/speech_win.cpp + ) +endif() + if (APPLE) add_file_folder("Sound" ${file_root_sound} From 618cf58e96ced0d9c07ec003f6192cd5484de095 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 5 Apr 2026 18:56:52 -0300 Subject: [PATCH 03/26] adapt existing mac speech integration --- code/sound/{speech.mm => speech_mac.cpp} | 37 ++++++++---------------- code/source_groups.cmake | 2 +- 2 files changed, 13 insertions(+), 26 deletions(-) rename code/sound/{speech.mm => speech_mac.cpp} (81%) diff --git a/code/sound/speech.mm b/code/sound/speech_mac.cpp similarity index 81% rename from code/sound/speech.mm rename to code/sound/speech_mac.cpp index 0cb45534028..17e7e2313d5 100644 --- a/code/sound/speech.mm +++ b/code/sound/speech_mac.cpp @@ -6,7 +6,8 @@ #include "globalincs/pstypes.h" #include "utils/unicode.h" - +static SCP_vector cached_voices; +static bool voices_cached = false; static NSSpeechSynthesizer *synth = nil; static bool Speech_init = false; @@ -36,40 +37,20 @@ void speech_deinit() Speech_init = false; } -bool speech_play(const char *text) +bool speech_play(const SCP_string& text) { if ( !Speech_init ) { return false; } - if ( !text || !strlen(text) ) { - nprintf(("Speech", "Not playing speech because passed text is null.\n")); - return false; - } - - SCP_string work_buffer; - - bool saw_dollar = false; - for (auto ch : unicode::codepoint_range(text)) { - if (ch == UNICODE_CHAR('$')) { - // Skip $ escape sequences which appear in briefing text - saw_dollar = true; - continue; - } else if (saw_dollar) { - saw_dollar = false; - continue; - } - - unicode::encode(ch, std::back_inserter(work_buffer)); - } - - if (work_buffer.empty()) { + if (text.empty()) { + nprintf(("Speech", "Not playing speech because passed text is empty.\n")); return false; } [synth startSpeakingString: [NSString stringWithUTF8String: - work_buffer.c_str() + text.c_str() ] ]; @@ -154,6 +135,10 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { + if (voices_cached) { + return cached_voices; + } + NSArray *voices = [NSSpeechSynthesizer availableVoices]; SCP_vector fsoVoices; @@ -165,6 +150,8 @@ bool speech_is_speaking() fsoVoices.push_back([name UTF8String]); } + voices_cached = true; + cached_voices = fsoVoices; return fsoVoices; } diff --git a/code/source_groups.cmake b/code/source_groups.cmake index e28d696cd82..e0fcce0fd58 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -1634,7 +1634,7 @@ endif() if (APPLE) add_file_folder("Sound" ${file_root_sound} - sound/speech.mm + sound/speech_mac.cpp ) endif() From 5a94f8762dd17e25812098d66d37a7305b40c451 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 5 Apr 2026 19:18:32 -0300 Subject: [PATCH 04/26] add speech linux stubs --- cmake/finder/FindSpeech.cmake | 4 ++ code/sound/speech_linux.cpp | 116 ++++++++++++++++++++++++++++++++++ code/source_groups.cmake | 9 ++- 3 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 code/sound/speech_linux.cpp diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake index b85b5b7fe9a..172f7910137 100644 --- a/cmake/finder/FindSpeech.cmake +++ b/cmake/finder/FindSpeech.cmake @@ -11,6 +11,10 @@ if (WIN32) endif() elseif(APPLE) # it should just work +elseif(UNIX) + # speech-dispatcher + find_package(Speechd REQUIRED) + target_link_libraries(speech INTERFACE Speechd::Speechd) else() message(SEND_ERROR "Text to Speech is not supported on this platform!") endif() diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp new file mode 100644 index 00000000000..406276bf77f --- /dev/null +++ b/code/sound/speech_linux.cpp @@ -0,0 +1,116 @@ +#ifdef FS2_SPEECH +#include +#include "globalincs/pstypes.h" +#include "utils/unicode.h" + +static SCP_vector cached_voices; +static bool voices_cached = false; + +bool speech_init() +{ + if (Speech_init) { + return true; + } + + + Speech_init = true; + return true; +} + +void speech_deinit() +{ + if ( !Speech_init ) { + return; + } + + Speech_init = false; +} + +bool speech_play(const SCP_string& text) +{ + if ( !Speech_init ) { + return false; + } + + if (text.empty()) { + nprintf(("Speech", "Not playing speech because passed text is empty.\n")); + return false; + } + + + return true; +} + +bool speech_pause() +{ + if ( !Speech_init ) { + return false; + } + + + return true; +} + +bool speech_resume() +{ + if ( !Speech_init ) { + return false; + } + + + return true; +} + +bool speech_stop() +{ + if ( !Speech_init ) { + return false; + } + + + return true; +} + +bool speech_set_volume(unsigned short volume) +{ + if ( !Speech_init ) { + return false; + } + + + return true; +} + +bool speech_set_voice(int voice) +{ + if ( !Speech_init ) { + return false; + } + + return true; +} + +bool speech_is_speaking() +{ + if ( !Speech_init ) { + return false; + } + + return false; +} + +SCP_vector speech_enumerate_voices() +{ + if (voices_cached) { + return cached_voices; + } + + SCP_vector fsoVoices; + + + voices_cached = true; + cached_voices = fsoVoices; + return fsoVoices; +} + +#endif \ No newline at end of file diff --git a/code/source_groups.cmake b/code/source_groups.cmake index e0fcce0fd58..a5d2481c5be 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -1629,13 +1629,16 @@ if (WIN32) ${file_root_sound} sound/speech_win.cpp ) -endif() - -if (APPLE) +elseif (APPLE) add_file_folder("Sound" ${file_root_sound} sound/speech_mac.cpp ) +elseif (UNIX) + add_file_folder("Sound" + ${file_root_sound} + sound/speech_linux.cpp + ) endif() if (FSO_BUILD_WITH_FFMPEG) From 1efe01bae6029d97b61918b4b07e3b832bc62ff7 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 5 Apr 2026 22:20:08 -0300 Subject: [PATCH 05/26] add speech support in linux --- CMakeLists.txt | 8 ++--- cmake/finder/FindSpeech.cmake | 8 +++-- code/sound/fsspeech.cpp | 2 +- code/sound/speech_linux.cpp | 61 ++++++++++++++++++++++++++++------- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 82075aa4d1b..9bf1923e2f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,9 +74,7 @@ IF(RESET_INSTALL_PREFIX) ENDIF(NOT $ENV{FS2PATH} STREQUAL "") ENDIF(RESET_INSTALL_PREFIX) -IF(WIN32 OR APPLE) - OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON) -ENDIF(WIN32 OR APPLE) +OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON) IF (WIN32) OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON) @@ -227,9 +225,7 @@ include(package) include(doxygen) # Print used options to log -IF(WIN32 OR APPLE) - message(STATUS "Using text to speech: ${FSO_USE_SPEECH}") -ENDIF() +message(STATUS "Using text to speech: ${FSO_USE_SPEECH}") IF (WIN32) message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}") message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}") diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake index 172f7910137..0724e0f13af 100644 --- a/cmake/finder/FindSpeech.cmake +++ b/cmake/finder/FindSpeech.cmake @@ -12,9 +12,11 @@ if (WIN32) elseif(APPLE) # it should just work elseif(UNIX) - # speech-dispatcher - find_package(Speechd REQUIRED) - target_link_libraries(speech INTERFACE Speechd::Speechd) + # speech-dispatcher-> libspeechd-dev + find_package(PkgConfig REQUIRED) + pkg_check_modules(SPEECHD REQUIRED speech-dispatcher) + target_include_directories(speech INTERFACE ${SPEECHD_INCLUDE_DIRS}) + target_link_libraries(speech INTERFACE ${SPEECHD_LIBRARIES}) else() message(SEND_ERROR "Text to Speech is not supported on this platform!") endif() diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index ecc7eb64b1a..0333c9e8564 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -80,7 +80,7 @@ static SCP_vector ttsvoice_enumerator() { SCP_vector vals; auto voices = speech_enumerate_voices(); - for (int i = 0; i < voices.size(); ++i) { + for (size_t i = 0; i < voices.size(); ++i) { vals.push_back(i); } return vals; diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 406276bf77f..7f7ccf001c6 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -1,17 +1,24 @@ #ifdef FS2_SPEECH -#include +#include #include "globalincs/pstypes.h" #include "utils/unicode.h" static SCP_vector cached_voices; static bool voices_cached = false; +static bool Speech_init = false; +static SPDConnection* spd = nullptr; bool speech_init() { if (Speech_init) { return true; } - + + spd = spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); + if (!spd) { + mprintf(("Speech: Unable to connect to speech-dispatcher\n")); + return false; + } Speech_init = true; return true; @@ -22,7 +29,7 @@ void speech_deinit() if ( !Speech_init ) { return; } - + spd_close(spd); Speech_init = false; } @@ -37,8 +44,7 @@ bool speech_play(const SCP_string& text) return false; } - - return true; + return (spd_say(spd, SPD_TEXT, text.c_str()) >= 0); } bool speech_pause() @@ -47,7 +53,8 @@ bool speech_pause() return false; } - + spd_pause(spd); + return true; } @@ -57,7 +64,8 @@ bool speech_resume() return false; } - + spd_resume(spd); + return true; } @@ -67,7 +75,8 @@ bool speech_stop() return false; } - + spd_stop(spd); + return true; } @@ -77,7 +86,8 @@ bool speech_set_volume(unsigned short volume) return false; } - + spd_set_volume(spd, volume); + return true; } @@ -86,7 +96,9 @@ bool speech_set_voice(int voice) if ( !Speech_init ) { return false; } - + + spd_set_synthesis_voice(spd, cached_voices[voice].c_str()); + return true; } @@ -107,10 +119,37 @@ SCP_vector speech_enumerate_voices() SCP_vector fsoVoices; + SPDConnection* connection = spd; + if ( !Speech_init ) { + connection = spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE); + if (!connection) { + mprintf(("Speech: Unable to connect to speech-dispatcher\n")); + voices_cached = true; + cached_voices = fsoVoices; + return fsoVoices; + } + } + SPDVoice** voices = spd_list_synthesis_voices(connection); + + for (int i = 0; voices[i] != NULL; i++) { + SCP_string lang = voices[i]->language; + // There are too many we cant add them all + // Only add English voices + if(lang.find("en") == 0) { + SCP_string voiceName; + voiceName = voices[i]->name ? voices[i]->name : "unknown"; + fsoVoices.push_back(voiceName); + } + } + + //spd_free_voices(voices); + if ( !Speech_init ) { + spd_close(connection); + } voices_cached = true; cached_voices = fsoVoices; return fsoVoices; } -#endif \ No newline at end of file +#endif From ecacd4f3d3bb901813d362c022fd4a32f52394fa Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 6 Apr 2026 20:24:48 -0300 Subject: [PATCH 06/26] Add array checks --- code/sound/fsspeech.cpp | 9 ++++++++- code/sound/speech_linux.cpp | 4 ++++ code/sound/speech_win.cpp | 4 ++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 0333c9e8564..d649e2603d6 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -88,8 +88,11 @@ static SCP_vector ttsvoice_enumerator() static SCP_string ttsvoice_display(int id) { - SCP_string out; auto voices = speech_enumerate_voices(); + if (voices.empty() || id < 0 || static_cast(id) >= voices.size()) { + return "No voices loaded"; + } + SCP_string out; sprintf(out, "(%d) %s", id + 1, voices[id].c_str()); return out; } @@ -99,6 +102,10 @@ static bool ttsvoice_change(int id, bool initial) if (initial) { return false; } + auto voices = speech_enumerate_voices(); + if (voices.empty() || id < 0 || static_cast(id) >= voices.size()) { + return false; + } speech_set_voice(id); return true; } diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 7f7ccf001c6..b44a59e87c2 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -97,6 +97,10 @@ bool speech_set_voice(int voice) return false; } + if (voice < 0 || static_cast(voice) >= cached_voices.size()) { + return false; + } + spd_set_synthesis_voice(spd, cached_voices[voice].c_str()); return true; diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index 90698a52417..5d9605f9395 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -134,6 +134,10 @@ bool speech_set_volume(unsigned short volume) bool speech_set_voice(int voice) { + if (voice < 0 || static_cast(voice) >= cached_voices.size()) { + return false; + } + HRESULT hr; CComPtr cpVoiceToken; CComPtr cpEnum; From ae8e56bd682e7416b3250fc760b34dce21cd2c64 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 6 Apr 2026 20:31:53 -0300 Subject: [PATCH 07/26] Use dlopen for speech-dispatcher --- cmake/finder/FindSpeech.cmake | 6 +- code/sound/speech_linux.cpp | 134 ++++++++++++++++++++++++++++++---- 2 files changed, 120 insertions(+), 20 deletions(-) diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake index 0724e0f13af..f8c28300833 100644 --- a/cmake/finder/FindSpeech.cmake +++ b/cmake/finder/FindSpeech.cmake @@ -12,11 +12,7 @@ if (WIN32) elseif(APPLE) # it should just work elseif(UNIX) - # speech-dispatcher-> libspeechd-dev - find_package(PkgConfig REQUIRED) - pkg_check_modules(SPEECHD REQUIRED speech-dispatcher) - target_include_directories(speech INTERFACE ${SPEECHD_INCLUDE_DIRS}) - target_link_libraries(speech INTERFACE ${SPEECHD_LIBRARIES}) + # uses speech-dispatcher with dlopen else() message(SEND_ERROR "Text to Speech is not supported on this platform!") endif() diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index b44a59e87c2..f6c4b1ce84b 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -1,7 +1,96 @@ #ifdef FS2_SPEECH -#include +#include #include "globalincs/pstypes.h" #include "utils/unicode.h" +#include "external_dll/externalcode.h" + +// Adapted from libspeechd.h / speechd_types.h +// https://github.com/brailcom/speechd/tree/master/src/api/c + +typedef struct SPDConnection SPDConnection; + +typedef struct { + char *name; + char *language; + char *variant; +} SPDVoice; + +typedef enum { + SPD_MODE_SINGLE = 0, + SPD_MODE_THREADED = 1 +} SPDConnectionMode; + +typedef enum { + SPD_IMPORTANT = 1, + SPD_MESSAGE = 2, + SPD_TEXT = 3, + SPD_NOTIFICATION = 4, + SPD_PROGRESS = 5 +} SPDPriority; + +static void* lib_handle = nullptr; + +typedef SPDConnection* (*pfn_spd_open)(const char*, const char*, const char*, SPDConnectionMode); +typedef void (*pfn_spd_close)(SPDConnection*); +typedef int (*pfn_spd_say)(SPDConnection*, SPDPriority, const char*); +typedef int (*pfn_spd_pause)(SPDConnection*); +typedef int (*pfn_spd_resume)(SPDConnection*); +typedef int (*pfn_spd_stop)(SPDConnection*); +typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int); +typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*); +typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*); +typedef void (*pfn_free_spd_voices)(SPDVoice**); + +static pfn_spd_open p_spd_open = nullptr; +static pfn_spd_close p_spd_close = nullptr; +static pfn_spd_say p_spd_say = nullptr; +static pfn_spd_pause p_spd_pause = nullptr; +static pfn_spd_resume p_spd_resume = nullptr; +static pfn_spd_stop p_spd_stop = nullptr; +static pfn_spd_set_volume p_spd_set_volume = nullptr; +static pfn_spd_set_synthesis_voice p_spd_set_synthesis_voice = nullptr; +static pfn_spd_list_synthesis_voices p_spd_list_synthesis_voices = nullptr; +static pfn_free_spd_voices p_free_spd_voices = nullptr; + +// Load speech-dispatcher with dlopen and load symbols +static bool ensure_speechd_lib() +{ + if (lib_handle) return true; + lib_handle = dlopen("libspeechd.so.3", RTLD_LAZY | RTLD_LOCAL); + if (!lib_handle) { + lib_handle = dlopen("libspeechd.so", RTLD_LAZY | RTLD_LOCAL); + } + + if (!lib_handle) { + mprintf(("Speech: Unable to load libspeechd.so: %s\n", dlerror())); + return false; + } + + // used symbols + p_spd_open = (pfn_spd_open) dlsym(lib_handle, "spd_open"); + p_spd_close = (pfn_spd_close) dlsym(lib_handle, "spd_close"); + p_spd_say = (pfn_spd_say) dlsym(lib_handle, "spd_say"); + p_spd_pause = (pfn_spd_pause) dlsym(lib_handle, "spd_pause"); + p_spd_resume = (pfn_spd_resume) dlsym(lib_handle, "spd_resume"); + p_spd_stop = (pfn_spd_stop) dlsym(lib_handle, "spd_stop"); + p_spd_set_volume = (pfn_spd_set_volume) dlsym(lib_handle, "spd_set_volume"); + p_spd_set_synthesis_voice = (pfn_spd_set_synthesis_voice) dlsym(lib_handle, "spd_set_synthesis_voice"); + p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices) dlsym(lib_handle, "spd_list_synthesis_voices"); + p_free_spd_voices = (pfn_free_spd_voices) dlsym(lib_handle, "free_spd_voices"); + + if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || + !p_spd_resume || !p_spd_stop || !p_spd_set_volume || + !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) { + mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror())); + dlclose(lib_handle); + lib_handle = nullptr; + return false; + } + + return true; +} + +// Speech handling starts here static SCP_vector cached_voices; static bool voices_cached = false; @@ -13,8 +102,12 @@ bool speech_init() if (Speech_init) { return true; } - - spd = spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); + + if (!ensure_speechd_lib()) { + return false; + } + + spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); if (!spd) { mprintf(("Speech: Unable to connect to speech-dispatcher\n")); return false; @@ -29,8 +122,13 @@ void speech_deinit() if ( !Speech_init ) { return; } - spd_close(spd); + p_spd_close(spd); Speech_init = false; + spd = nullptr; + if (lib_handle) { + dlclose(lib_handle); + lib_handle = nullptr; + } } bool speech_play(const SCP_string& text) @@ -44,7 +142,7 @@ bool speech_play(const SCP_string& text) return false; } - return (spd_say(spd, SPD_TEXT, text.c_str()) >= 0); + return (p_spd_say(spd, SPD_TEXT, text.c_str()) >= 0); } bool speech_pause() @@ -53,7 +151,7 @@ bool speech_pause() return false; } - spd_pause(spd); + p_spd_pause(spd); return true; } @@ -64,7 +162,7 @@ bool speech_resume() return false; } - spd_resume(spd); + p_spd_resume(spd); return true; } @@ -75,7 +173,7 @@ bool speech_stop() return false; } - spd_stop(spd); + p_spd_stop(spd); return true; } @@ -86,7 +184,7 @@ bool speech_set_volume(unsigned short volume) return false; } - spd_set_volume(spd, volume); + p_spd_set_volume(spd, volume); return true; } @@ -100,8 +198,8 @@ bool speech_set_voice(int voice) if (voice < 0 || static_cast(voice) >= cached_voices.size()) { return false; } - - spd_set_synthesis_voice(spd, cached_voices[voice].c_str()); + + p_spd_set_synthesis_voice(spd, cached_voices[voice].c_str()); return true; } @@ -122,10 +220,16 @@ SCP_vector speech_enumerate_voices() } SCP_vector fsoVoices; + + if (!ensure_speechd_lib()) { + voices_cached = true; + cached_voices = fsoVoices; + return fsoVoices; + } SPDConnection* connection = spd; if ( !Speech_init ) { - connection = spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE); + connection = p_spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE); if (!connection) { mprintf(("Speech: Unable to connect to speech-dispatcher\n")); voices_cached = true; @@ -134,7 +238,7 @@ SCP_vector speech_enumerate_voices() } } - SPDVoice** voices = spd_list_synthesis_voices(connection); + SPDVoice** voices = p_spd_list_synthesis_voices(connection); for (int i = 0; voices[i] != NULL; i++) { SCP_string lang = voices[i]->language; @@ -147,9 +251,9 @@ SCP_vector speech_enumerate_voices() } } - //spd_free_voices(voices); + p_free_spd_voices(voices); if ( !Speech_init ) { - spd_close(connection); + p_spd_close(connection); } voices_cached = true; cached_voices = fsoVoices; From 191061d7e483b8fa763bda3cd484e00e276a2c13 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 6 Apr 2026 21:42:19 -0300 Subject: [PATCH 08/26] corrrect lib name --- code/sound/speech_linux.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index f6c4b1ce84b..0e276646159 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -56,7 +56,7 @@ static pfn_free_spd_voices p_free_spd_voices = nullptr; static bool ensure_speechd_lib() { if (lib_handle) return true; - lib_handle = dlopen("libspeechd.so.3", RTLD_LAZY | RTLD_LOCAL); + lib_handle = dlopen("libspeechd.so.2", RTLD_LAZY | RTLD_LOCAL); if (!lib_handle) { lib_handle = dlopen("libspeechd.so", RTLD_LAZY | RTLD_LOCAL); } From fc5a017706f1f7a713dd4abf4f7ae72605954089 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 6 Apr 2026 22:38:03 -0300 Subject: [PATCH 09/26] missing includes and static cast --- code/sound/fsspeech.cpp | 2 +- code/sound/speech_linux.cpp | 2 +- code/sound/speech_mac.cpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index d649e2603d6..f25055f5b26 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -80,7 +80,7 @@ static SCP_vector ttsvoice_enumerator() { SCP_vector vals; auto voices = speech_enumerate_voices(); - for (size_t i = 0; i < voices.size(); ++i) { + for (int i = 0; i < static_cast(voices.size()); ++i) { vals.push_back(i); } return vals; diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 0e276646159..f1204989fe6 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -2,7 +2,7 @@ #include #include "globalincs/pstypes.h" #include "utils/unicode.h" -#include "external_dll/externalcode.h" +#include "speech.h" // Adapted from libspeechd.h / speechd_types.h // https://github.com/brailcom/speechd/tree/master/src/api/c diff --git a/code/sound/speech_mac.cpp b/code/sound/speech_mac.cpp index 17e7e2313d5..cc560b15ec0 100644 --- a/code/sound/speech_mac.cpp +++ b/code/sound/speech_mac.cpp @@ -5,6 +5,7 @@ #include "globalincs/pstypes.h" #include "utils/unicode.h" +#include "speech.h" static SCP_vector cached_voices; static bool voices_cached = false; From 4d71c38154c6c146cd0304fce213185e27602539 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 6 Apr 2026 22:48:27 -0300 Subject: [PATCH 10/26] do not change mac file type --- code/sound/{speech_mac.cpp => speech_mac.mm} | 0 code/source_groups.cmake | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename code/sound/{speech_mac.cpp => speech_mac.mm} (100%) diff --git a/code/sound/speech_mac.cpp b/code/sound/speech_mac.mm similarity index 100% rename from code/sound/speech_mac.cpp rename to code/sound/speech_mac.mm diff --git a/code/source_groups.cmake b/code/source_groups.cmake index a5d2481c5be..b53bbb2749c 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -1632,7 +1632,7 @@ if (WIN32) elseif (APPLE) add_file_folder("Sound" ${file_root_sound} - sound/speech_mac.cpp + sound/speech_mac.mm ) elseif (UNIX) add_file_folder("Sound" From 0c3534c41a847cf4202bb0017449b92687898e0e Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 6 Apr 2026 23:14:40 -0300 Subject: [PATCH 11/26] fix clang tidy warnings 1 --- code/sound/fsspeech.cpp | 7 +++---- code/sound/speech_linux.cpp | 4 ++-- code/sound/speech_win.cpp | 32 ++++++++++++++++---------------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index f25055f5b26..612eb16ead5 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -219,15 +219,14 @@ bool fsspeech_init() { // Get the settings from the registry for (int i = 0; i < FSSPEECH_FROM_MAX; i++) { - FSSpeech_play_from[i] = - os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false; + FSSpeech_play_from[i] = static_cast(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0)); nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false")); } - int volume = os_config_read_uint(NULL, "SpeechVolume", 100); + int volume = os_config_read_uint(nullptr, "SpeechVolume", 100); speech_set_volume((unsigned short)volume); - int voice = os_config_read_uint(NULL, "SpeechVoice", 0); + int voice = os_config_read_uint(nullptr, "SpeechVoice", 0); speech_set_voice(voice); } diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index f1204989fe6..eb4a3b6c77c 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -229,7 +229,7 @@ SCP_vector speech_enumerate_voices() SPDConnection* connection = spd; if ( !Speech_init ) { - connection = p_spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE); + connection = p_spd_open("fso_voice_list", "client", nullptr, SPD_MODE_SINGLE); if (!connection) { mprintf(("Speech: Unable to connect to speech-dispatcher\n")); voices_cached = true; @@ -240,7 +240,7 @@ SCP_vector speech_enumerate_voices() SPDVoice** voices = p_spd_list_synthesis_voices(connection); - for (int i = 0; voices[i] != NULL; i++) { + for (int i = 0; voices[i] != nullptr; i++) { SCP_string lang = voices[i]->language; // There are too many we cant add them all // Only add English voices diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index 5d9605f9395..d83ad0906d1 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -11,7 +11,7 @@ #pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" ) #endif // NDEBUG #endif // _WIN32 -#else // FS2_SPEECH +#elif defined(_WIN32) // FS2_SPEECH #ifdef LAUNCHER #include "stdafx.h" @@ -60,7 +60,7 @@ bool speech_init() { HRESULT hr = CoCreateInstance( CLSID_SpVoice, - NULL, + nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&Voice_device); @@ -106,7 +106,7 @@ bool speech_play(const SCP_string& text) } speech_stop(); - return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, NULL)); + return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, nullptr)); } bool speech_pause() @@ -124,7 +124,7 @@ bool speech_resume() bool speech_stop() { if(Speech_init == false) return true; - return SUCCEEDED(Voice_device->Speak( NULL, SPF_PURGEBEFORESPEAK, NULL )); + return SUCCEEDED(Voice_device->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr)); } bool speech_set_volume(unsigned short volume) @@ -144,7 +144,7 @@ bool speech_set_voice(int voice) ULONG num_voices = 0; //Enumerate the available voices - hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); + hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum); if(FAILED(hr)) return false; @@ -158,7 +158,7 @@ bool speech_set_voice(int voice) { cpVoiceToken.Release(); - hr = cpEnum->Next( 1, &cpVoiceToken, NULL ); + hr = cpEnum->Next( 1, &cpVoiceToken, nullptr); if(FAILED(hr)) { return false; @@ -179,7 +179,7 @@ bool speech_is_speaking() HRESULT hr; SPVOICESTATUS pStatus; - hr = Voice_device->GetStatus(&pStatus, NULL); + hr = Voice_device->GetStatus(&pStatus, nullptr); if (FAILED(hr)) return false; return (pStatus.dwRunningState != SPRS_DONE); @@ -193,7 +193,7 @@ SCP_vector speech_enumerate_voices() HRESULT hr = CoCreateInstance( CLSID_SpVoice, - NULL, + nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&Voice_device); @@ -203,12 +203,12 @@ SCP_vector speech_enumerate_voices() } // This code is mostly copied from wxLauncher - ISpObjectTokenCategory * comTokenCategory = NULL; - IEnumSpObjectTokens * comVoices = NULL; + ISpObjectTokenCategory * comTokenCategory = nullptr; + IEnumSpObjectTokens * comVoices = nullptr; ULONG comVoicesCount = 0; // Generate enumeration of voices - hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, NULL, + hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory); if (FAILED(hr)) { return SCP_vector(); @@ -219,7 +219,7 @@ SCP_vector speech_enumerate_voices() return SCP_vector(); } - hr = comTokenCategory->EnumTokens(NULL, NULL, &comVoices); + hr = comTokenCategory->EnumTokens(nullptr, nullptr, &comVoices); if (FAILED(hr)) { return SCP_vector(); } @@ -231,12 +231,12 @@ SCP_vector speech_enumerate_voices() SCP_vector voices; while (comVoicesCount > 0) { - ISpObjectToken * comAVoice = NULL; + ISpObjectToken * comAVoice = nullptr; - comVoices->Next(1, &comAVoice, NULL); // retrieve just one + comVoices->Next(1, &comAVoice, nullptr); // retrieve just one - LPWSTR id = NULL; - comAVoice->GetStringValue(NULL, &id); + LPWSTR id = nullptr; + comAVoice->GetStringValue(nullptr, &id); auto idlength = wcslen(id); auto buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr); From be08a77fe899b71143d395dd636adbc3323318d4 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Wed, 8 Apr 2026 19:27:46 -0300 Subject: [PATCH 12/26] set tts rate --- code/sound/fsspeech.cpp | 25 ++++++++++++++++++++++++- code/sound/speech.h | 2 ++ code/sound/speech_linux.cpp | 26 +++++++++++++++++++++++--- code/sound/speech_mac.mm | 16 ++++++++++++++++ code/sound/speech_win.cpp | 16 ++++++++++++++++ 5 files changed, 81 insertions(+), 4 deletions(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 612eb16ead5..834069a65dd 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -31,6 +31,15 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] = char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = ""; size_t Speech_buffer_len; +static bool ttsrate_change(float new_val, bool initial) +{ + if (initial) { + return false; + } + speech_set_rate(new_val); + return true; +} + static bool ttsingame_change(bool new_val, bool initial) { if (initial) { @@ -120,7 +129,7 @@ static auto SpeechVoiceOption = options::OptionBuilder("Speech.Voice", .flags({ options::OptionFlags::ForceMultiValueSelection }) .default_val(0) .change_listener(ttsvoice_change) - .importance(2) + .importance(3) .finish(); static auto SpeechVolumeOption = options::OptionBuilder("Speech.Volume", @@ -130,6 +139,16 @@ static auto SpeechVolumeOption = options::OptionBuilder("Speech.Volume", .range(0.0f, 100.0f) .default_val(100.0f) .change_listener(ttsvolume_change) + .importance(2) + .finish(); + +static auto SpeechRateOption = options::OptionBuilder("Speech.Rate", + std::pair{"TTS Rate", -1}, + std::pair{"Speed of the TTS voice (100 = normal)", -1}) + .category(std::make_pair("Audio", 1826)) + .range(50.0f, 150.0f) + .default_val(100.0f) + .change_listener(ttsrate_change) .importance(1) .finish(); @@ -214,6 +233,7 @@ bool fsspeech_init() speech_enumerate_voices(); speech_set_volume((unsigned short)SpeechVolumeOption->getValue()); speech_set_voice(SpeechVoiceOption->getValue()); + speech_set_rate(SpeechRateOption->getValue()); } else { @@ -228,6 +248,9 @@ bool fsspeech_init() int voice = os_config_read_uint(nullptr, "SpeechVoice", 0); speech_set_voice(voice); + + int rate = os_config_read_uint(nullptr, "SpeechRate", 100); + speech_set_rate(static_cast(rate)); } speech_inited = 1; diff --git a/code/sound/speech.h b/code/sound/speech.h index e16eeef2a43..6f73c2f5264 100644 --- a/code/sound/speech.h +++ b/code/sound/speech.h @@ -22,6 +22,7 @@ bool speech_stop(); bool speech_set_volume(unsigned short volume); bool speech_set_voice(int voice); +bool speech_set_rate(float rate); bool speech_is_speaking(); @@ -37,6 +38,7 @@ inline bool speech_resume() { return false; } inline bool speech_stop() { return false; } inline bool speech_set_volume(unsigned short /*volume*/) { return false; } inline bool speech_set_voice(int /*voice*/) { return false; } +inline bool speech_set_rate(float /*rate*/) { return false; } inline bool speech_is_speaking() { return false; } inline SCP_vector speech_enumerate_voices() { diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index eb4a3b6c77c..4c3486fab6e 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -38,6 +38,7 @@ typedef int (*pfn_spd_resume)(SPDConnection*); typedef int (*pfn_spd_stop)(SPDConnection*); typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int); typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*); +typedef int (*pfn_spd_set_rate)(SPDConnection*, signed int); typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*); typedef void (*pfn_free_spd_voices)(SPDVoice**); @@ -50,6 +51,7 @@ static pfn_spd_stop p_spd_stop = nullptr; static pfn_spd_set_volume p_spd_set_volume = nullptr; static pfn_spd_set_synthesis_voice p_spd_set_synthesis_voice = nullptr; static pfn_spd_list_synthesis_voices p_spd_list_synthesis_voices = nullptr; +static pfn_spd_set_rate p_spd_set_rate = nullptr; static pfn_free_spd_voices p_free_spd_voices = nullptr; // Load speech-dispatcher with dlopen and load symbols @@ -76,11 +78,12 @@ static bool ensure_speechd_lib() p_spd_set_volume = (pfn_spd_set_volume) dlsym(lib_handle, "spd_set_volume"); p_spd_set_synthesis_voice = (pfn_spd_set_synthesis_voice) dlsym(lib_handle, "spd_set_synthesis_voice"); p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices) dlsym(lib_handle, "spd_list_synthesis_voices"); + p_spd_set_rate = (pfn_spd_set_rate) dlsym(lib_handle, "spd_set_rate"); p_free_spd_voices = (pfn_free_spd_voices) dlsym(lib_handle, "free_spd_voices"); - if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || - !p_spd_resume || !p_spd_stop || !p_spd_set_volume || - !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) { + if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || + !p_spd_set_volume || !p_spd_set_rate || !p_spd_set_synthesis_voice || + !p_spd_list_synthesis_voices || !p_free_spd_voices || !p_spd_set_rate) { mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror())); dlclose(lib_handle); lib_handle = nullptr; @@ -204,6 +207,23 @@ bool speech_set_voice(int voice) return true; } +bool speech_set_rate(float rate_percent) +{ + if (!Speech_init) { + return false; + } + + // 50 / +150 -> 100 = normal -> range -100 / +100 + signed int rate = static_cast((rate_percent - 100.0f) * 2.0f); + if (rate < -100) + rate = -100; + if (rate > 100) + rate = 100; + + p_spd_set_rate(spd, rate); + return true; +} + bool speech_is_speaking() { if ( !Speech_init ) { diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm index cc560b15ec0..5c0f92ab3a9 100644 --- a/code/sound/speech_mac.mm +++ b/code/sound/speech_mac.mm @@ -125,6 +125,22 @@ bool speech_set_voice(int voice) return true; } +bool speech_set_rate(float rate_percent) +{ + if (!Speech_init) { + return false; + } + + // 180 wpm = normal + float rate = 180.0f * (rate_percent / 100.0f); + + [synth setObject:[NSNumber numberWithFloat:rate] + forProperty:NSSpeechRateProperty + error:nil]; + + return true; +} + bool speech_is_speaking() { if ( !Speech_init ) { diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index d83ad0906d1..13ca1fd4d2e 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -173,6 +173,22 @@ bool speech_set_voice(int voice) return false; } +bool speech_set_rate(float rate_percent) +{ + if (!Speech_init) { + return false; + } + + // 50 / +150 -> 100 = normal -> range -10 / +10 + long rate = static_cast((rate_percent - 100.0f) * 0.1f); + if (rate < -10) + rate = -10; + if (rate > 10) + rate = 10; + + return SUCCEEDED(Voice_device->SetRate(rate)); +} + // Goober5000 bool speech_is_speaking() { From 5e564ade6c137d0e3032ba48370a3838ae8b0f4b Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Wed, 8 Apr 2026 19:32:42 -0300 Subject: [PATCH 13/26] set localization ids --- code/localization/localize.cpp | 2 +- code/sound/fsspeech.cpp | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/code/localization/localize.cpp b/code/localization/localize.cpp index 5ad87fe4bfc..8c6132f4462 100644 --- a/code/localization/localize.cpp +++ b/code/localization/localize.cpp @@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr; // NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous, // but internal strings should still increment XSTR_SIZE to avoid collisions. // retail XSTR_SIZE = 1570 -// #define XSTR_SIZE 1892 // This is the next available ID +// #define XSTR_SIZE 1929 // This is the next available ID // struct to allow for strings.tbl-determined x offset // offset is 0 for english, by default diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 834069a65dd..9be3c52c14c 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -120,8 +120,8 @@ static bool ttsvoice_change(int id, bool initial) } static auto SpeechVoiceOption = options::OptionBuilder("Speech.Voice", - std::pair{"TTS Voice", -1}, - std::pair{"The voice used to read text", -1}) + std::pair{"TTS Voice", 1915}, + std::pair{"The voice used to read text", 1916}) .category(std::make_pair("Audio", 1826)) .level(options::ExpertLevel::Beginner) .enumerator(ttsvoice_enumerator) @@ -133,8 +133,8 @@ static auto SpeechVoiceOption = options::OptionBuilder("Speech.Voice", .finish(); static auto SpeechVolumeOption = options::OptionBuilder("Speech.Volume", - std::pair{"TTS Volume", -1}, - std::pair{"Volume used for playing TTS speech", -1}) + std::pair{"TTS Volume", 1917}, + std::pair{"Volume used for playing TTS speech", 1918}) .category(std::make_pair("Audio", 1826)) .range(0.0f, 100.0f) .default_val(100.0f) @@ -143,8 +143,8 @@ static auto SpeechVolumeOption = options::OptionBuilder("Speech.Volume", .finish(); static auto SpeechRateOption = options::OptionBuilder("Speech.Rate", - std::pair{"TTS Rate", -1}, - std::pair{"Speed of the TTS voice (100 = normal)", -1}) + std::pair{"TTS Rate", 1919}, + std::pair{"Speed of the TTS voice (100 = normal)", 1920}) .category(std::make_pair("Audio", 1826)) .range(50.0f, 150.0f) .default_val(100.0f) @@ -153,8 +153,8 @@ static auto SpeechRateOption = options::OptionBuilder("Speech.Rate", .finish(); static auto SpeechBriefingOption = options::OptionBuilder("Speech.Briefing", - std::pair{"TTS in briefings", -1}, - std::pair{"Enable or disable TTS in briefings", -1}) + std::pair{"TTS in briefings", 1921}, + std::pair{"Enable or disable TTS in briefings", 1922}) .category(std::make_pair("Audio", 1826)) .level(options::ExpertLevel::Beginner) .change_listener(ttsbriefing_change) @@ -163,8 +163,8 @@ static auto SpeechBriefingOption = options::OptionBuilder("Speech.Briefing .finish(); static auto SpeechTechroomOption = options::OptionBuilder("Speech.Techroom", - std::pair{"TTS in techroom", -1}, - std::pair{"Enable or disable TTS in techroom", -1}) + std::pair{"TTS in techroom", 1923}, + std::pair{"Enable or disable TTS in techroom", 1924}) .category(std::make_pair("Audio", 1826)) .level(options::ExpertLevel::Beginner) .change_listener(ttstechroom_change) @@ -173,8 +173,8 @@ static auto SpeechTechroomOption = options::OptionBuilder("Speech.Techroom .finish(); static auto SpeechIngameOption = options::OptionBuilder("Speech.Ingame", - std::pair{"TTS in-game", -1}, - std::pair{"Enable or disable TTS in-game", -1}) + std::pair{"TTS in-game", 1925}, + std::pair{"Enable or disable TTS in-game", 1926}) .category(std::make_pair("Audio", 1826)) .level(options::ExpertLevel::Beginner) .change_listener(ttsingame_change) @@ -183,8 +183,8 @@ static auto SpeechIngameOption = options::OptionBuilder("Speech.Ingame", .finish(); static auto SpeechMultiOption = options::OptionBuilder("Speech.Multi", - std::pair{"TTS in multiplayer", -1}, - std::pair{"Enable or disable TTS in multiplayer", -1}) + std::pair{"TTS in multiplayer", 1927}, + std::pair{"Enable or disable TTS in multiplayer", 1928}) .category(std::make_pair("Audio", 1826)) .level(options::ExpertLevel::Beginner) .change_listener(ttsmulti_change) From 205eaef418ddae33b8b1d196435b1cb4fb1022ea Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Wed, 8 Apr 2026 19:56:14 -0300 Subject: [PATCH 14/26] fix clang tidy warnings 2 --- code/sound/speech_linux.cpp | 7 +++---- code/sound/speech_win.cpp | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 4c3486fab6e..a077fdb6267 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -81,9 +81,8 @@ static bool ensure_speechd_lib() p_spd_set_rate = (pfn_spd_set_rate) dlsym(lib_handle, "spd_set_rate"); p_free_spd_voices = (pfn_free_spd_voices) dlsym(lib_handle, "free_spd_voices"); - if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || - !p_spd_set_volume || !p_spd_set_rate || !p_spd_set_synthesis_voice || - !p_spd_list_synthesis_voices || !p_free_spd_voices || !p_spd_set_rate) { + if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || !p_spd_set_volume + || !p_spd_set_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) { mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror())); dlclose(lib_handle); lib_handle = nullptr; @@ -214,7 +213,7 @@ bool speech_set_rate(float rate_percent) } // 50 / +150 -> 100 = normal -> range -100 / +100 - signed int rate = static_cast((rate_percent - 100.0f) * 2.0f); + auto rate = static_cast((rate_percent - 100.0f) * 2.0f); if (rate < -100) rate = -100; if (rate > 100) diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index 13ca1fd4d2e..e8a831040b2 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -180,7 +180,7 @@ bool speech_set_rate(float rate_percent) } // 50 / +150 -> 100 = normal -> range -10 / +10 - long rate = static_cast((rate_percent - 100.0f) * 0.1f); + auto rate = static_cast((rate_percent - 100.0f) * 0.1f); if (rate < -10) rate = -10; if (rate > 10) From 5d479802bb42a770b21e96da7f739047b9251c5c Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Thu, 9 Apr 2026 19:00:55 -0300 Subject: [PATCH 15/26] correct symbol name --- code/sound/speech_linux.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index a077fdb6267..a5ce573c5d7 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -38,7 +38,7 @@ typedef int (*pfn_spd_resume)(SPDConnection*); typedef int (*pfn_spd_stop)(SPDConnection*); typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int); typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*); -typedef int (*pfn_spd_set_rate)(SPDConnection*, signed int); +typedef int (*pfn_spd_set_voice_rate)(SPDConnection*, signed int); typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*); typedef void (*pfn_free_spd_voices)(SPDVoice**); @@ -51,7 +51,7 @@ static pfn_spd_stop p_spd_stop = nullptr; static pfn_spd_set_volume p_spd_set_volume = nullptr; static pfn_spd_set_synthesis_voice p_spd_set_synthesis_voice = nullptr; static pfn_spd_list_synthesis_voices p_spd_list_synthesis_voices = nullptr; -static pfn_spd_set_rate p_spd_set_rate = nullptr; +static pfn_spd_set_voice_rate p_spd_set_voice_rate = nullptr; static pfn_free_spd_voices p_free_spd_voices = nullptr; // Load speech-dispatcher with dlopen and load symbols @@ -78,11 +78,11 @@ static bool ensure_speechd_lib() p_spd_set_volume = (pfn_spd_set_volume) dlsym(lib_handle, "spd_set_volume"); p_spd_set_synthesis_voice = (pfn_spd_set_synthesis_voice) dlsym(lib_handle, "spd_set_synthesis_voice"); p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices) dlsym(lib_handle, "spd_list_synthesis_voices"); - p_spd_set_rate = (pfn_spd_set_rate) dlsym(lib_handle, "spd_set_rate"); + p_spd_set_voice_rate = (pfn_spd_set_voice_rate) dlsym(lib_handle, "spd_set_voice_rate"); p_free_spd_voices = (pfn_free_spd_voices) dlsym(lib_handle, "free_spd_voices"); if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || !p_spd_set_volume - || !p_spd_set_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) { + || !p_spd_set_voice_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) { mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror())); dlclose(lib_handle); lib_handle = nullptr; @@ -213,13 +213,13 @@ bool speech_set_rate(float rate_percent) } // 50 / +150 -> 100 = normal -> range -100 / +100 - auto rate = static_cast((rate_percent - 100.0f) * 2.0f); + auto rate = static_cast(rate_percent - 100.0f); if (rate < -100) rate = -100; if (rate > 100) rate = 100; - p_spd_set_rate(spd, rate); + p_spd_set_voice_rate(spd, rate); return true; } From 0c27de6b6a1ba1bdb54dd80cf16ec7fe5e6a5ec6 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 19 Apr 2026 14:56:45 -0300 Subject: [PATCH 16/26] Remove voice cache and fix win enumerate_voices overriding voice selection --- code/sound/fsspeech.cpp | 2 - code/sound/speech_linux.cpp | 19 +++------- code/sound/speech_mac.mm | 8 ---- code/sound/speech_win.cpp | 74 +++++++++++++++---------------------- 4 files changed, 35 insertions(+), 68 deletions(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 9be3c52c14c..c0d1b506753 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -229,8 +229,6 @@ bool fsspeech_init() FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue(); FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue(); FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue(); - // Early caching of voices names, needed for sapi not to override initial voice selection - speech_enumerate_voices(); speech_set_volume((unsigned short)SpeechVolumeOption->getValue()); speech_set_voice(SpeechVoiceOption->getValue()); speech_set_rate(SpeechRateOption->getValue()); diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index a5ce573c5d7..4075cb16ece 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -94,8 +94,6 @@ static bool ensure_speechd_lib() // Speech handling starts here -static SCP_vector cached_voices; -static bool voices_cached = false; static bool Speech_init = false; static SPDConnection* spd = nullptr; @@ -197,11 +195,13 @@ bool speech_set_voice(int voice) return false; } - if (voice < 0 || static_cast(voice) >= cached_voices.size()) { + auto voices = speech_enumerate_voices(); + + if (voice < 0 || static_cast(voice) >= voices.size()) { return false; } - p_spd_set_synthesis_voice(spd, cached_voices[voice].c_str()); + p_spd_set_synthesis_voice(spd, voices[voice].c_str()); return true; } @@ -234,15 +234,9 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { - if (voices_cached) { - return cached_voices; - } - SCP_vector fsoVoices; if (!ensure_speechd_lib()) { - voices_cached = true; - cached_voices = fsoVoices; return fsoVoices; } @@ -251,8 +245,6 @@ SCP_vector speech_enumerate_voices() connection = p_spd_open("fso_voice_list", "client", nullptr, SPD_MODE_SINGLE); if (!connection) { mprintf(("Speech: Unable to connect to speech-dispatcher\n")); - voices_cached = true; - cached_voices = fsoVoices; return fsoVoices; } } @@ -274,8 +266,7 @@ SCP_vector speech_enumerate_voices() if ( !Speech_init ) { p_spd_close(connection); } - voices_cached = true; - cached_voices = fsoVoices; + return fsoVoices; } diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm index 5c0f92ab3a9..d9baa5cb6b5 100644 --- a/code/sound/speech_mac.mm +++ b/code/sound/speech_mac.mm @@ -7,8 +7,6 @@ #include "utils/unicode.h" #include "speech.h" -static SCP_vector cached_voices; -static bool voices_cached = false; static NSSpeechSynthesizer *synth = nil; static bool Speech_init = false; @@ -152,10 +150,6 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { - if (voices_cached) { - return cached_voices; - } - NSArray *voices = [NSSpeechSynthesizer availableVoices]; SCP_vector fsoVoices; @@ -167,8 +161,6 @@ bool speech_is_speaking() fsoVoices.push_back([name UTF8String]); } - voices_cached = true; - cached_voices = fsoVoices; return fsoVoices; } diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index e8a831040b2..134b54b7f67 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -52,8 +52,6 @@ ISpVoice *Voice_device; #include "utils/unicode.h" #include "speech.h" -static SCP_vector cached_voices; -static bool voices_cached = false; bool Speech_init = false; bool speech_init() @@ -134,7 +132,8 @@ bool speech_set_volume(unsigned short volume) bool speech_set_voice(int voice) { - if (voice < 0 || static_cast(voice) >= cached_voices.size()) { + auto voices = speech_enumerate_voices(); + if (voice < 0 || static_cast(voice) >= voices.size()) { return false; } @@ -203,79 +202,66 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { - if (voices_cached) { - return cached_voices; - } - - HRESULT hr = CoCreateInstance( - CLSID_SpVoice, - nullptr, - CLSCTX_ALL, - IID_ISpVoice, - (void **)&Voice_device); - - if (FAILED(hr)) { - return SCP_vector(); - } + SCP_vector voices; - // This code is mostly copied from wxLauncher - ISpObjectTokenCategory * comTokenCategory = nullptr; - IEnumSpObjectTokens * comVoices = nullptr; + ISpObjectTokenCategory* comTokenCategory = nullptr; + IEnumSpObjectTokens* comVoices = nullptr; ULONG comVoicesCount = 0; - // Generate enumeration of voices - hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, + HRESULT hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory); + if (FAILED(hr)) { - return SCP_vector(); + return voices; } hr = comTokenCategory->SetId(SPCAT_VOICES, false); if (FAILED(hr)) { - return SCP_vector(); + comTokenCategory->Release(); + return voices; } hr = comTokenCategory->EnumTokens(nullptr, nullptr, &comVoices); if (FAILED(hr)) { - return SCP_vector(); + comTokenCategory->Release(); + return voices; } hr = comVoices->GetCount(&comVoicesCount); if (FAILED(hr)) { - return SCP_vector(); + comVoices->Release(); + comTokenCategory->Release(); + return voices; } - SCP_vector voices; while (comVoicesCount > 0) { - ISpObjectToken * comAVoice = nullptr; + ISpObjectToken* comAVoice = nullptr; - comVoices->Next(1, &comAVoice, nullptr); // retrieve just one + comVoices->Next(1, &comAVoice, nullptr); LPWSTR id = nullptr; comAVoice->GetStringValue(nullptr, &id); - auto idlength = wcslen(id); - auto buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr); - - if (buffer_size > 0) { - SCP_string voiceName; - voiceName.resize(buffer_size); - buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr); - - voices.push_back(voiceName); + if (id) { + auto idlength = wcslen(id); + int buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr); + + if (buffer_size > 0) { + SCP_string voiceName; + voiceName.resize(buffer_size); + WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr); + voices.push_back(voiceName); + } + CoTaskMemFree(id); } - CoTaskMemFree(id); comAVoice->Release(); comVoicesCount--; } + comVoices->Release(); comTokenCategory->Release(); - //only release the voice_device when getting flags - if (!Speech_init) - Voice_device->Release(); - voices_cached = true; - cached_voices = voices; + return voices; } From 127e55a3fe48cce77d109eb7ab3b308014a55700 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 19 Apr 2026 15:05:59 -0300 Subject: [PATCH 17/26] fix mac rate Done by notimaginative --- code/sound/speech_mac.mm | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm index d9baa5cb6b5..5aec8b4fe15 100644 --- a/code/sound/speech_mac.mm +++ b/code/sound/speech_mac.mm @@ -9,7 +9,7 @@ static NSSpeechSynthesizer *synth = nil; static bool Speech_init = false; - +static int voice_default_rate = 200; bool speech_init() { @@ -120,6 +120,13 @@ bool speech_set_voice(int voice) [synth setVoice: [voices objectAtIndex:voice]]; + // reset voice to defaults + [synth setObject:nil forProperty:NSSpeechResetProperty error:nil]; + + // get default rate for voice + NSNumber *voiceRate = [synth objectForProperty:NSSpeechRateProperty error:nil]; + voice_default_rate = voiceRate ? [voiceRate intValue] : 200; // median normal rate as default + return true; } @@ -129,12 +136,14 @@ bool speech_set_rate(float rate_percent) return false; } - // 180 wpm = normal - float rate = 180.0f * (rate_percent / 100.0f); + CAP(rate_percent, 25.0f, 300.f); - [synth setObject:[NSNumber numberWithFloat:rate] - forProperty:NSSpeechRateProperty - error:nil]; + int rate = fl2i(voice_default_rate * (rate_percent / 100.0f)); + + [synth + setObject:[NSNumber numberWithInt:rate] + forProperty:NSSpeechRateProperty error:nil + ]; return true; } From 6338623569dff07a45f8451f17b7ce0ca5272e83 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 19 Apr 2026 15:43:05 -0300 Subject: [PATCH 18/26] requested changes --- CMakeLists.txt | 6 +++- cmake/finder/FindSpeech.cmake | 2 +- code/sound/speech_linux.cpp | 60 +++++++++++++++++------------------ code/sound/speech_win.cpp | 6 ++-- code/source_groups.cmake | 2 +- 5 files changed, 41 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bf1923e2f0..6acedb4b79c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,11 @@ IF(RESET_INSTALL_PREFIX) ENDIF(NOT $ENV{FS2PATH} STREQUAL "") ENDIF(RESET_INSTALL_PREFIX) -OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON) +IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux") + OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON) +ELSE() + OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF) +ENDIF() IF (WIN32) OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON) diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake index f8c28300833..c7cc6b50b4c 100644 --- a/cmake/finder/FindSpeech.cmake +++ b/cmake/finder/FindSpeech.cmake @@ -11,7 +11,7 @@ if (WIN32) endif() elseif(APPLE) # it should just work -elseif(UNIX) +elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") # uses speech-dispatcher with dlopen else() message(SEND_ERROR "Text to Speech is not supported on this platform!") diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 4075cb16ece..6279fd39281 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -214,10 +214,7 @@ bool speech_set_rate(float rate_percent) // 50 / +150 -> 100 = normal -> range -100 / +100 auto rate = static_cast(rate_percent - 100.0f); - if (rate < -100) - rate = -100; - if (rate > 100) - rate = 100; + CAP(rate, -100, 100); p_spd_set_voice_rate(spd, rate); return true; @@ -235,36 +232,39 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { SCP_vector fsoVoices; - - if (!ensure_speechd_lib()) { - return fsoVoices; - } - SPDConnection* connection = spd; - if ( !Speech_init ) { - connection = p_spd_open("fso_voice_list", "client", nullptr, SPD_MODE_SINGLE); - if (!connection) { - mprintf(("Speech: Unable to connect to speech-dispatcher\n")); - return fsoVoices; - } + if (!ensure_speechd_lib()) { + return fsoVoices; } - SPDVoice** voices = p_spd_list_synthesis_voices(connection); - - for (int i = 0; voices[i] != nullptr; i++) { - SCP_string lang = voices[i]->language; - // There are too many we cant add them all - // Only add English voices - if(lang.find("en") == 0) { - SCP_string voiceName; - voiceName = voices[i]->name ? voices[i]->name : "unknown"; - fsoVoices.push_back(voiceName); - } - } + if (!Speech_init || !spd) { + mprintf(("Speech: Speech system is not initialized.\n")); + return fsoVoices; + } - p_free_spd_voices(voices); - if ( !Speech_init ) { - p_spd_close(connection); + SPDVoice** voices = p_spd_list_synthesis_voices(spd); + + if (voices) + { + int num_voices = 0; + //Count voices + while (voices[num_voices] != nullptr) { + num_voices++; + } + + for (int i = 0; voices[i] != nullptr; i++) { + // There are too many we cant add them all + // Only add English voices + if (num_voices < 600 || (voices[i]->language && strncmp(voices[i]->language, "en", 2) == 0)) { + SCP_string voiceName = voices[i]->name ? voices[i]->name : "unknown"; + fsoVoices.push_back(voiceName); + } + } + p_free_spd_voices(voices); + } + else + { + mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n")); } return fsoVoices; diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index 134b54b7f67..3814ee5bab9 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -180,10 +180,12 @@ bool speech_set_rate(float rate_percent) // 50 / +150 -> 100 = normal -> range -10 / +10 auto rate = static_cast((rate_percent - 100.0f) * 0.1f); - if (rate < -10) + if (rate < -10) { rate = -10; - if (rate > 10) + } + else if (rate > 10) { rate = 10; + } return SUCCEEDED(Voice_device->SetRate(rate)); } diff --git a/code/source_groups.cmake b/code/source_groups.cmake index 4930bb1c2eb..dbba52510dc 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -1636,7 +1636,7 @@ elseif (APPLE) ${file_root_sound} sound/speech_mac.mm ) -elseif (UNIX) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux") add_file_folder("Sound" ${file_root_sound} sound/speech_linux.cpp From 191400f1061652b555fc0a3d0ad7db25131f0d35 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 19 Apr 2026 16:45:31 -0300 Subject: [PATCH 19/26] re-add voice cache for linux --- code/sound/speech_linux.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 6279fd39281..3fc4f1324ae 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -93,7 +93,8 @@ static bool ensure_speechd_lib() } // Speech handling starts here - +static SCP_vector cached_voices; +static bool voices_cached = false; static bool Speech_init = false; static SPDConnection* spd = nullptr; @@ -129,6 +130,8 @@ void speech_deinit() dlclose(lib_handle); lib_handle = nullptr; } + voices_cached = false; + cached_voices.clear(); } bool speech_play(const SCP_string& text) @@ -231,6 +234,10 @@ bool speech_is_speaking() SCP_vector speech_enumerate_voices() { + if (voices_cached) { + return cached_voices; + } + SCP_vector fsoVoices; if (!ensure_speechd_lib()) { @@ -255,7 +262,7 @@ SCP_vector speech_enumerate_voices() for (int i = 0; voices[i] != nullptr; i++) { // There are too many we cant add them all // Only add English voices - if (num_voices < 600 || (voices[i]->language && strncmp(voices[i]->language, "en", 2) == 0)) { + if (num_voices < 600 || (voices[i]->language && strstr(voices[i]->language, "en") != nullptr)) { SCP_string voiceName = voices[i]->name ? voices[i]->name : "unknown"; fsoVoices.push_back(voiceName); } @@ -267,6 +274,8 @@ SCP_vector speech_enumerate_voices() mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n")); } + voices_cached = true; + cached_voices = fsoVoices; return fsoVoices; } From 8470aa989e00dab7f26080d2f48fc27c903de669 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 19 Apr 2026 16:54:21 -0300 Subject: [PATCH 20/26] Open connection for linux get flags --- code/sound/speech_linux.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 3fc4f1324ae..9f50b093ace 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -244,12 +244,16 @@ SCP_vector speech_enumerate_voices() return fsoVoices; } - if (!Speech_init || !spd) { - mprintf(("Speech: Speech system is not initialized.\n")); - return fsoVoices; + SPDConnection* connection = spd; + if (!Speech_init) { + connection = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); + if (!connection) { + mprintf(("Speech: Unable to connect to speech-dispatcher\n")); + return fsoVoices; + } } - SPDVoice** voices = p_spd_list_synthesis_voices(spd); + SPDVoice** voices = p_spd_list_synthesis_voices(connection); if (voices) { @@ -274,6 +278,9 @@ SCP_vector speech_enumerate_voices() mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n")); } + if (!Speech_init) { + p_spd_close(connection); + voices_cached = true; cached_voices = fsoVoices; return fsoVoices; From 4c41528fbc294b979d122d75ff1b5da40c1c145c Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Sun, 19 Apr 2026 16:56:15 -0300 Subject: [PATCH 21/26] fix missing } --- code/sound/speech_linux.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 9f50b093ace..8965be97901 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -280,6 +280,7 @@ SCP_vector speech_enumerate_voices() if (!Speech_init) { p_spd_close(connection); + } voices_cached = true; cached_voices = fsoVoices; From 02da89d3eba19c4d6731a31d49ea93754a36016e Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 27 Apr 2026 23:10:35 -0300 Subject: [PATCH 22/26] change voice option combobox to std::pair --- code/options/Option.h | 2 +- code/sound/fsspeech.cpp | 59 ++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/code/options/Option.h b/code/options/Option.h index 44032a80f1a..791f1057107 100644 --- a/code/options/Option.h +++ b/code/options/Option.h @@ -608,7 +608,7 @@ class OptionBuilder { _instance.setPreset(val.first, json_dump_string_new(_instance.getSerializer()(val.second), JSON_COMPACT | JSON_ENSURE_ASCII | JSON_ENCODE_ANY)); } - auto opt_ptr = make_shared>(_instance); + auto opt_ptr = std::make_shared>(_instance); if (std::holds_alternative>(_title)) { const auto& xstr_info = std::get>(_title); diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index c0d1b506753..bf3a74f46c2 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -12,7 +12,6 @@ #include "sound/speech.h" #include "options/Option.h" - extern int Cmdline_freespace_no_sound; const size_t MAX_SPEECH_BUFFER_LEN = 4096; @@ -85,49 +84,65 @@ static bool ttsvolume_change(float new_val, bool initial) return true; } -static SCP_vector ttsvoice_enumerator() +static std::pair ttsvoice_deserializer(const json_t* el) +{ + int id; + char* name = nullptr; + + json_error_t err; + if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) { + throw json_exception(err); + } + + return std::make_pair(id, name); +} + +static json_t* ttsvoice_serializer(const std::pair& value) { - SCP_vector vals; + return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str()); +} + +static SCP_vector> ttsvoice_enumerator() +{ + SCP_vector< std::pair> vals; auto voices = speech_enumerate_voices(); - for (int i = 0; i < static_cast(voices.size()); ++i) { - vals.push_back(i); + + if (voices.empty()) { + vals.emplace_back(std::make_pair(0, "No voices loaded")); + } + else { + for (int i = 0; i < static_cast(voices.size()); ++i) { + vals.emplace_back(std::make_pair(i, voices[i])); + } } return vals; } -static SCP_string ttsvoice_display(int id) +static SCP_string ttsvoice_display(std::pair vi) { - auto voices = speech_enumerate_voices(); - if (voices.empty() || id < 0 || static_cast(id) >= voices.size()) { - return "No voices loaded"; - } - SCP_string out; - sprintf(out, "(%d) %s", id + 1, voices[id].c_str()); - return out; + return vi.second; } -static bool ttsvoice_change(int id, bool initial) +static bool ttsvoice_change(std::pair new_voice, bool initial) { if (initial) { return false; } - auto voices = speech_enumerate_voices(); - if (voices.empty() || id < 0 || static_cast(id) >= voices.size()) { - return false; - } - speech_set_voice(id); + speech_set_voice(new_voice.first); return true; } -static auto SpeechVoiceOption = options::OptionBuilder("Speech.Voice", +static auto SpeechVoiceOption = options::OptionBuilder>("Speech.Voice", std::pair{"TTS Voice", 1915}, std::pair{"The voice used to read text", 1916}) .category(std::make_pair("Audio", 1826)) .level(options::ExpertLevel::Beginner) + .default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value .enumerator(ttsvoice_enumerator) .display(ttsvoice_display) + .serializer(ttsvoice_serializer) + .deserializer(ttsvoice_deserializer) .flags({ options::OptionFlags::ForceMultiValueSelection }) - .default_val(0) .change_listener(ttsvoice_change) .importance(3) .finish(); @@ -230,7 +245,7 @@ bool fsspeech_init() FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue(); FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue(); speech_set_volume((unsigned short)SpeechVolumeOption->getValue()); - speech_set_voice(SpeechVoiceOption->getValue()); + speech_set_voice(SpeechVoiceOption->getValue().first); speech_set_rate(SpeechRateOption->getValue()); } else From e90860fd78e7f78723e0379a71c0d6a50256830d Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Mon, 27 Apr 2026 23:12:39 -0300 Subject: [PATCH 23/26] delete duplicated voice id sanitizer on windows set voice --- code/sound/speech_win.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index 3814ee5bab9..c9621a86f24 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -131,12 +131,7 @@ bool speech_set_volume(unsigned short volume) } bool speech_set_voice(int voice) -{ - auto voices = speech_enumerate_voices(); - if (voice < 0 || static_cast(voice) >= voices.size()) { - return false; - } - +{ HRESULT hr; CComPtr cpVoiceToken; CComPtr cpEnum; From afb7846954d0b4f6dd6c99995b731a57764d4203 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Tue, 28 Apr 2026 19:35:49 -0300 Subject: [PATCH 24/26] Use pairs for speech_enumerate_voices() and adapt linux speech --- code/cmdline/cmdline.cpp | 2 +- code/sound/fsspeech.cpp | 25 ++++++++++-------- code/sound/speech.h | 6 ++--- code/sound/speech_linux.cpp | 52 ++++++++++--------------------------- code/sound/speech_mac.mm | 8 +++--- code/sound/speech_win.cpp | 7 ++--- 6 files changed, 40 insertions(+), 60 deletions(-) diff --git a/code/cmdline/cmdline.cpp b/code/cmdline/cmdline.cpp index 7dae2532cab..e200327a118 100644 --- a/code/cmdline/cmdline.cpp +++ b/code/cmdline/cmdline.cpp @@ -1414,7 +1414,7 @@ static json_t* json_get_v1() { auto voices = speech_enumerate_voices(); for (auto& voice : voices) { - json_array_append_new(voices_array, json_string(voice.c_str())); + json_array_append_new(voices_array, json_string(voice.second.c_str())); } json_object_set_new(root, "voices", voices_array); diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index bf3a74f46c2..6b78a4ed58a 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -102,33 +102,36 @@ static json_t* ttsvoice_serializer(const std::pair& value) return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str()); } +static SCP_vector> voice_list_cache; + static SCP_vector> ttsvoice_enumerator() { - SCP_vector< std::pair> vals; - auto voices = speech_enumerate_voices(); - - if (voices.empty()) { - vals.emplace_back(std::make_pair(0, "No voices loaded")); + if(voice_list_cache.empty()) { + auto voices = speech_enumerate_voices(); + + if (voices.empty()) { + voices.emplace_back(std::make_pair(0, "No voices loaded")); + } + voice_list_cache = voices; + return voices; } else { - for (int i = 0; i < static_cast(voices.size()); ++i) { - vals.emplace_back(std::make_pair(i, voices[i])); - } + return voice_list_cache; } - return vals; } -static SCP_string ttsvoice_display(std::pair vi) +static SCP_string ttsvoice_display(const std::pair vi) { return vi.second; } -static bool ttsvoice_change(std::pair new_voice, bool initial) +static bool ttsvoice_change(const std::pair new_voice, bool initial) { if (initial) { return false; } speech_set_voice(new_voice.first); + voice_list_cache.clear(); return true; } diff --git a/code/sound/speech.h b/code/sound/speech.h index 6f73c2f5264..07d7d9debf6 100644 --- a/code/sound/speech.h +++ b/code/sound/speech.h @@ -26,7 +26,7 @@ bool speech_set_rate(float rate); bool speech_is_speaking(); -SCP_vector speech_enumerate_voices(); +SCP_vector> speech_enumerate_voices(); #else @@ -41,8 +41,8 @@ inline bool speech_set_voice(int /*voice*/) { return false; } inline bool speech_set_rate(float /*rate*/) { return false; } inline bool speech_is_speaking() { return false; } -inline SCP_vector speech_enumerate_voices() { - return SCP_vector(); +inline SCP_vector> speech_enumerate_voices() { + return SCP_vector>(); } #endif diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp index 8965be97901..e996ecf22bb 100644 --- a/code/sound/speech_linux.cpp +++ b/code/sound/speech_linux.cpp @@ -93,8 +93,6 @@ static bool ensure_speechd_lib() } // Speech handling starts here -static SCP_vector cached_voices; -static bool voices_cached = false; static bool Speech_init = false; static SPDConnection* spd = nullptr; @@ -130,8 +128,6 @@ void speech_deinit() dlclose(lib_handle); lib_handle = nullptr; } - voices_cached = false; - cached_voices.clear(); } bool speech_play(const SCP_string& text) @@ -204,7 +200,7 @@ bool speech_set_voice(int voice) return false; } - p_spd_set_synthesis_voice(spd, voices[voice].c_str()); + p_spd_set_synthesis_voice(spd, voices[voice].second.c_str()); return true; } @@ -232,58 +228,38 @@ bool speech_is_speaking() return false; } -SCP_vector speech_enumerate_voices() +SCP_vector> speech_enumerate_voices() { - if (voices_cached) { - return cached_voices; - } - - SCP_vector fsoVoices; - - if (!ensure_speechd_lib()) { - return fsoVoices; - } + SCP_vector> fsoVoices; - SPDConnection* connection = spd; if (!Speech_init) { - connection = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); - if (!connection) { + if (!ensure_speechd_lib()) { + return fsoVoices; + } + spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); + if (!spd) { mprintf(("Speech: Unable to connect to speech-dispatcher\n")); return fsoVoices; } } - SPDVoice** voices = p_spd_list_synthesis_voices(connection); - - if (voices) - { - int num_voices = 0; - //Count voices - while (voices[num_voices] != nullptr) { - num_voices++; - } + SPDVoice** voices = p_spd_list_synthesis_voices(spd); + if (voices) { for (int i = 0; voices[i] != nullptr; i++) { - // There are too many we cant add them all - // Only add English voices - if (num_voices < 600 || (voices[i]->language && strstr(voices[i]->language, "en") != nullptr)) { - SCP_string voiceName = voices[i]->name ? voices[i]->name : "unknown"; - fsoVoices.push_back(voiceName); - } + fsoVoices.emplace_back(std::make_pair(i, voices[i]->name)); } p_free_spd_voices(voices); } - else - { + else { mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n")); } if (!Speech_init) { - p_spd_close(connection); + p_spd_close(spd); + spd = nullptr; } - voices_cached = true; - cached_voices = fsoVoices; return fsoVoices; } diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm index 5aec8b4fe15..cb18966ca37 100644 --- a/code/sound/speech_mac.mm +++ b/code/sound/speech_mac.mm @@ -157,17 +157,17 @@ bool speech_is_speaking() return [synth isSpeaking]; } -SCP_vector speech_enumerate_voices() +SCP_vector> speech_enumerate_voices() { NSArray *voices = [NSSpeechSynthesizer availableVoices]; - SCP_vector fsoVoices; + SCP_vector> fsoVoices; + int voiceID = 0; for (NSString *voiceIdentifier in voices) { NSDictionary *attributes = [NSSpeechSynthesizer attributesForVoice:voiceIdentifier]; NSString *name = [attributes objectForKey:NSVoiceName]; - - fsoVoices.push_back([name UTF8String]); + fsoVoices.emplace_back(std::make_pair(voiceID++, [name UTF8String])); } return fsoVoices; diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp index c9621a86f24..a3d723c093a 100644 --- a/code/sound/speech_win.cpp +++ b/code/sound/speech_win.cpp @@ -197,9 +197,9 @@ bool speech_is_speaking() return (pStatus.dwRunningState != SPRS_DONE); } -SCP_vector speech_enumerate_voices() +SCP_vector> speech_enumerate_voices() { - SCP_vector voices; + SCP_vector> voices; ISpObjectTokenCategory* comTokenCategory = nullptr; IEnumSpObjectTokens* comVoices = nullptr; @@ -231,6 +231,7 @@ SCP_vector speech_enumerate_voices() return voices; } + int voiceID = 0; while (comVoicesCount > 0) { ISpObjectToken* comAVoice = nullptr; @@ -247,7 +248,7 @@ SCP_vector speech_enumerate_voices() SCP_string voiceName; voiceName.resize(buffer_size); WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr); - voices.push_back(voiceName); + voices.emplace_back(std::make_pair(voiceID++, voiceName)); } CoTaskMemFree(id); } From 9fa88407066eac4f7256cb24b1e2c2b897482ae0 Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Tue, 28 Apr 2026 20:04:14 -0300 Subject: [PATCH 25/26] use reference --- code/sound/fsspeech.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 6b78a4ed58a..3b301a7a1f4 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -120,12 +120,12 @@ static SCP_vector> ttsvoice_enumerator() } } -static SCP_string ttsvoice_display(const std::pair vi) +static SCP_string ttsvoice_display(const std::pair& vi) { return vi.second; } -static bool ttsvoice_change(const std::pair new_voice, bool initial) +static bool ttsvoice_change(const std::pair& new_voice, bool initial) { if (initial) { return false; From bf1512fd399671c17cb7ff143784a0d75a592def Mon Sep 17 00:00:00 2001 From: Salvador Cipolla Date: Wed, 29 Apr 2026 20:21:58 -0300 Subject: [PATCH 26/26] actually free vector memory --- code/sound/fsspeech.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 3b301a7a1f4..02f23f221a5 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -132,6 +132,7 @@ static bool ttsvoice_change(const std::pair& new_voice, bool in } speech_set_voice(new_voice.first); voice_list_cache.clear(); + voice_list_cache.shrink_to_fit(); return true; }