From 9ad34dcb97ff57063e606784e6443953bc1a569d Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 5 Apr 2026 16:40:04 -0300
Subject: [PATCH 01/26] add imgui speech options

---
 code/sound/fsspeech.cpp | 172 +++++++++++++++++++++++++++++++++++++---
 code/sound/speech.cpp   |  15 ++--
 2 files changed, 171 insertions(+), 16 deletions(-)
diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 65ef525bb3a..da153620c1f 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -10,6 +10,7 @@
 #include "osapi/osregistry.h"
 #include "sound/fsspeech.h"
 #include "sound/speech.h"
+#include "options/Option.h"
 
 
 extern int Cmdline_freespace_no_sound;
@@ -30,6 +31,141 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] =
 char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = "";
 size_t  Speech_buffer_len;
 
+static bool ttsingame_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val;
+	return true;
+}
+
+static bool ttsmulti_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val;
+	return true;
+}
+
+static bool ttsbriefing_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val;
+	return true;
+}
+
+static bool ttstechroom_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val;
+	return true;
+}
+
+static bool ttsvolume_change(float new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_volume((unsigned short) new_val);
+	return true;
+}
+
+static SCP_vector<int> ttsvoice_enumerator()
+{
+	SCP_vector<int> vals;
+	auto voices = speech_enumerate_voices();
+	for (int i = 0; i < voices.size(); ++i) {
+		vals.push_back(i);
+	}
+	return vals;
+}
+
+static SCP_string ttsvoice_display(int id)
+{
+	SCP_string out;
+	auto voices = speech_enumerate_voices();
+	sprintf(out, "(%d) %s", id + 1, voices[id].c_str());
+	return out;
+}
+
+static bool ttsvoice_change(int id, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_voice(id);
+	return true;
+}
+
+static auto SpeechVoiceOption = options::OptionBuilder<int>("Speech.Voice",
+	std::pair<const char*, int>{"TTS Voice", -1},
+	std::pair<const char*, int>{"The voice used to read text", -1})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.enumerator(ttsvoice_enumerator)
+	.display(ttsvoice_display)
+	.flags({ options::OptionFlags::ForceMultiValueSelection })
+	.default_val(0)
+	.change_listener(ttsvoice_change)
+	.importance(2)
+	.finish();
+
+static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
+	std::pair<const char*, int>{"TTS Volume", -1},
+	std::pair<const char*, int>{"Volume used for playing TTS speech", -1})
+	.category(std::make_pair("Audio", 1826))
+	.range(0.0f, 100.0f)
+	.default_val(100.0f)
+	.change_listener(ttsvolume_change)
+	.importance(1)
+	.finish();
+
+static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing",
+	std::pair<const char*, int>{"TTS in briefings", -1},
+	std::pair<const char*, int>{"Enable or disable TTS in briefings", -1})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsbriefing_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom",
+	std::pair<const char*, int>{"TTS in techroom", -1},
+	std::pair<const char*, int>{"Enable or disable TTS in techroom", -1})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttstechroom_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
+	std::pair<const char*, int>{"TTS in-game", -1},
+	std::pair<const char*, int>{"Enable or disable TTS in-game", -1})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsingame_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
+	std::pair<const char*, int>{"TTS in multiplayer", -1},
+	std::pair<const char*, int>{"Enable or disable TTS in multiplayer", -1})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsmulti_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
 bool fsspeech_init()
 {
 	if (speech_inited) {
@@ -45,18 +181,32 @@ bool fsspeech_init()
 		return false;
 	}
 
-	// Get the settings from the registry
-	for(int i = 0; i < FSSPEECH_FROM_MAX; i++) {
-		FSSpeech_play_from[i] =
-			os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
-		nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
+	if (Using_in_game_options) 
+	{
+		FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
+		// Early caching of voices names, needed for sapi not to override initial voice selection
+		speech_enumerate_voices();
+		speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
+		speech_set_voice(SpeechVoiceOption->getValue());
+	}
+	else 
+	{
+		// Get the settings from the registry
+		for (int i = 0; i < FSSPEECH_FROM_MAX; i++) {
+			FSSpeech_play_from[i] =
+				os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
+			nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
+		}
+
+		int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
+		speech_set_volume((unsigned short)volume);
+
+		int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
+		speech_set_voice(voice);
 	}
-
-	int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
-	speech_set_volume((unsigned short) volume);
-
-	int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
-	speech_set_voice(voice);
 
 	speech_inited = 1;
 
diff --git a/code/sound/speech.cpp b/code/sound/speech.cpp
index 7967950ac10..f958f32d1fb 100644
--- a/code/sound/speech.cpp
+++ b/code/sound/speech.cpp
@@ -66,12 +66,12 @@
 #include <cwchar>
 #include <cstdio>
 #pragma warning(pop)
-
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
 #include "speech.h"
 
-
+static SCP_vector<SCP_string> cached_voices;
+static bool voices_cached = false;
 bool Speech_init = false;
 
 bool speech_init()
@@ -303,6 +303,9 @@ bool speech_is_speaking()
 
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
+	if (voices_cached) {
+		return cached_voices;
+	}
 #ifdef _WIN32
 	HRESULT hr = CoCreateInstance(
 		CLSID_SpVoice,
@@ -368,9 +371,11 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 	}
 
 	comTokenCategory->Release();
-
-	Voice_device->Release();
-
+	//only release the voice_device when getting flags
+	if (!Speech_init)
+		Voice_device->Release();
+	voices_cached = true;
+	cached_voices = voices;
 	return voices;
 #else
 	STUB_FUNCTION;

From 839d6b662da27d054b0d481133f7139c893eebe3 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 5 Apr 2026 18:47:13 -0300
Subject: [PATCH 02/26] adapt existing windows sapi speech implementation

---
 code/sound/fsspeech.cpp                   |  26 +++-
 code/sound/speech.h                       |   4 +-
 code/sound/{speech.cpp => speech_win.cpp} | 157 +++-------------------
 code/source_groups.cmake                  |   8 +-
 4 files changed, 50 insertions(+), 145 deletions(-)
 rename code/sound/{speech.cpp => speech_win.cpp} (68%)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index da153620c1f..ecc7eb64b1a 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -166,6 +166,22 @@ static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
 	.importance(0)
 	.finish();
 
+void sanitize_text(const char* input, SCP_string& output) {
+	output.clear();
+	bool saw_dollar = false;
+	for (auto ch : unicode::codepoint_range(input)) {
+		if (ch == UNICODE_CHAR('$')) {
+			saw_dollar = true;
+			continue;
+		}
+		else if (saw_dollar) {
+			saw_dollar = false;
+			continue;
+		}
+		unicode::encode(ch, std::back_inserter(output));
+	}
+}
+
 bool fsspeech_init()
 {
 	if (speech_inited) {
@@ -225,6 +241,11 @@ void fsspeech_deinit()
 
 void fsspeech_play(int type, const char *text)
 {
+	if (text == nullptr) {
+		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
+		return;
+	}
+
 	if (!speech_inited) {
 		nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n"));
 		return;
@@ -240,7 +261,10 @@ void fsspeech_play(int type, const char *text)
 		return;
 	}
 
-	speech_play(text);
+	SCP_string sanitized_string;
+	sanitize_text(text, sanitized_string);
+
+	speech_play(sanitized_string);
 }
 
 void fsspeech_stop()
diff --git a/code/sound/speech.h b/code/sound/speech.h
index 3f731dd5a7f..e16eeef2a43 100644
--- a/code/sound/speech.h
+++ b/code/sound/speech.h
@@ -15,7 +15,7 @@
 
 bool speech_init();
 void speech_deinit();
-bool speech_play(const char *text);
+bool speech_play(const SCP_string& text);
 bool speech_pause();
 bool speech_resume();
 bool speech_stop();
@@ -31,7 +31,7 @@ SCP_vector<SCP_string> speech_enumerate_voices();
 
 inline bool speech_init() { return false; }
 inline void speech_deinit() {}
-inline bool speech_play(const char* /*text*/) { return false; }
+inline bool speech_play(const SCP_string& /*text*/) { return false; }
 inline bool speech_pause() { return false; }
 inline bool speech_resume() { return false; }
 inline bool speech_stop() { return false; }
diff --git a/code/sound/speech.cpp b/code/sound/speech_win.cpp
similarity index 68%
rename from code/sound/speech.cpp
rename to code/sound/speech_win.cpp
index f958f32d1fb..90698a52417 100644
--- a/code/sound/speech.cpp
+++ b/code/sound/speech_win.cpp
@@ -5,26 +5,18 @@
  * created based on the source.
  *
 */ 
-
-
-
-
-
 #ifndef FS2_SPEECH
-#if defined(_WIN32) || defined(__APPLE__)
+#if defined(_WIN32)
 #if NDEBUG
 	#pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" )
 #endif // NDEBUG
-#endif // _WIN32 or __APPLE__
-#elif !defined(__APPLE__) // to end-of-file ...
-
+#endif // _WIN32
+#else // FS2_SPEECH
 
 #ifdef LAUNCHER
 #include "stdafx.h"
 #endif	//LAUNCHER
 
-#ifdef _WIN32
-
 // Since we define these ourself we need to undefine them for the sapi header
 #pragma push_macro("strcpy_s")
 #pragma push_macro("strncpy_s")
@@ -37,10 +29,9 @@
 #undef memset
 #undef memcpy
 
-	#include <windows.h>
-	#include <sapi.h>
-
-	#include <sphelper.h>
+#include <windows.h>
+#include <sapi.h>
+#include <sphelper.h>
 
 #pragma pushpop_macro("strcpy_s")
 #pragma pushpop_macro("strncpy_s")
@@ -48,16 +39,7 @@
 #pragma pushpop_macro("memset")
 #pragma pushpop_macro("memcpy")
 
-	ISpVoice *Voice_device;
-#elif defined(SCP_UNIX)
-	#include <fcntl.h>
-//	#include <stdio.h>
-
-	int speech_dev = -1;
-//	FILE *speech_dev = NULL;
-#else 
-	#pragma error( "ERROR: Unknown platform, speech (FS2_SPEECH) is not supported" )
-#endif	//_WIN32
+ISpVoice *Voice_device;
 
 #pragma warning(push)
 #pragma warning(disable: 4995)
@@ -76,7 +58,6 @@ bool Speech_init = false;
 
 bool speech_init()
 {
-#ifdef _WIN32
     HRESULT hr = CoCreateInstance(
 		CLSID_SpVoice, 
 		NULL, 
@@ -85,19 +66,6 @@ bool speech_init()
 		(void **)&Voice_device);
 
 	Speech_init = SUCCEEDED(hr);
-#else
-
-	speech_dev = open("/dev/speech", O_WRONLY | O_DIRECT);
-//	speech_dev = fopen("/dev/speech", "w");
-
-	if (speech_dev == -1) {
-//	if (speech_dev == NULL) {
-		mprintf(("Couldn't open '/dev/speech', turning text-to-speech off...\n"));
-		return false;
-	}
-
-	Speech_init = true;
-#endif
 
 	nprintf(("Speech", "Speech init %s\n", Speech_init ? "succeeded!" : "failed!"));
 	return Speech_init;
@@ -106,44 +74,22 @@ bool speech_init()
 void speech_deinit()
 {
 	if(Speech_init == false) return;
-
-#ifdef _WIN32
 	Voice_device->Release();
-#else
-	close(speech_dev);
-//	fclose(speech_dev);
-#endif
 }
 
-bool speech_play(const char *text)
+bool speech_play(const SCP_string& text)
 {
-	nprintf(("Speech", "Attempting to play speech string %s...\n", text));
+	nprintf(("Speech", "Attempting to play speech string %s...\n", text.c_str()));
 
 	if(Speech_init == false) return true;
-	if (text == NULL) {
-		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
-		return false;
-	}
-
-#ifdef _WIN32
-	SCP_string work_buffer;
-
-	bool saw_dollar = false;
-	for (auto ch : unicode::codepoint_range(text)) {
-		if (ch == UNICODE_CHAR('$')) {
-			// Skip $ escape sequences which appear in briefing text
-			saw_dollar = true;
-			continue;
-		} else if (saw_dollar) {
-			saw_dollar = false;
-			continue;
-		}
 
-		unicode::encode(ch, std::back_inserter(work_buffer));
+	if (text.empty()) {
+		nprintf(("Speech", "Not playing speech because passed text is empty.\n"));
+		return false;
 	}
 
 	// Determine the needed amount of data
-	auto num_chars = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int) work_buffer.size(), nullptr, 0);
+	auto num_chars = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), nullptr, 0);
 
 	if (num_chars <= 0) {
 		// Error
@@ -153,7 +99,7 @@ bool speech_play(const char *text)
 	std::wstring wide_string;
 	wide_string.resize(num_chars);
 
-	auto err = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int)work_buffer.size(), &wide_string[0], num_chars);
+	auto err = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), &wide_string[0], num_chars);
 
 	if (err <= 0) {
 		return false;
@@ -161,88 +107,33 @@ bool speech_play(const char *text)
 
 	speech_stop();
 	return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, NULL));
-#else
-	int len = strlen(text);
-	char Conversion_buffer[MAX_SPEECH_CHAR_LEN];
-
-	if(len > (MAX_SPEECH_CHAR_LEN - 1)) {
-		len = MAX_SPEECH_CHAR_LEN - 1;
-	}
-
-	int count = 0;
-	for(int i = 0; i < len; i++) {
-		if(text[i] == '$') {
-			i++;
-			continue;
-		}
-
-		Conversion_buffer[count] = text[i];
-		count++;
-	}
-
-	Conversion_buffer[count] = '\0';
-
-	if ( write(speech_dev, Conversion_buffer, count) == -1 )
-		return false;
-//	if (fwrite(Conversion_buffer, count, 1, speech_dev))
-//		fflush(speech_dev);
-//	else
-//		return false;
-
-	return true;
-#endif	//_WIN32
 }
 
 bool speech_pause()
 {
 	if(Speech_init == false) return true;
-#ifdef _WIN32
 	return SUCCEEDED(Voice_device->Pause());
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
 }
 
 bool speech_resume()
 {
 	if(Speech_init == false) return true;
-#ifdef _WIN32
 	return SUCCEEDED(Voice_device->Resume());
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
 }
 
 bool speech_stop()
 {
 	if(Speech_init == false) return true;
-#ifdef _WIN32
     return SUCCEEDED(Voice_device->Speak( NULL, SPF_PURGEBEFORESPEAK, NULL ));
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
 }
 
 bool speech_set_volume(unsigned short volume)
 {
-#ifdef _WIN32
     return SUCCEEDED(Voice_device->SetVolume(volume));
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
 }
 
 bool speech_set_voice(int voice)
 {
-#ifdef _WIN32
 	HRESULT                             hr;
 	CComPtr<ISpObjectToken>             cpVoiceToken;
 	CComPtr<IEnumSpObjectTokens>        cpEnum;
@@ -276,17 +167,11 @@ bool speech_set_voice(int voice)
 		count++;
 	}
 	return false;
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
 }
 
 // Goober5000
 bool speech_is_speaking()
 {
-#ifdef _WIN32
 	HRESULT			hr;
 	SPVOICESTATUS	pStatus;
 
@@ -294,11 +179,6 @@ bool speech_is_speaking()
 	if (FAILED(hr)) return false;
 
 	return (pStatus.dwRunningState != SPRS_DONE);
-#else
-	STUB_FUNCTION;
-
-	return false;
-#endif
 }
 
 SCP_vector<SCP_string> speech_enumerate_voices()
@@ -306,7 +186,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 	if (voices_cached) {
 		return cached_voices;
 	}
-#ifdef _WIN32
+
 	HRESULT hr = CoCreateInstance(
 		CLSID_SpVoice,
 		NULL,
@@ -377,11 +257,6 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 	voices_cached = true;
 	cached_voices = voices;
 	return voices;
-#else
-	STUB_FUNCTION;
-
-	return SCP_vector<SCP_string>();
-#endif
 }
 
-#endif // FS2_SPEECH
+#endif // FS2_SPEECH
\ No newline at end of file
diff --git a/code/source_groups.cmake b/code/source_groups.cmake
index 54e6bf58501..e28d696cd82 100644
--- a/code/source_groups.cmake
+++ b/code/source_groups.cmake
@@ -1619,12 +1619,18 @@ add_file_folder("Sound"
 	sound/rtvoice.h
 	sound/sound.cpp
 	sound/sound.h
-	sound/speech.cpp
 	sound/speech.h
 	sound/voicerec.cpp
 	sound/voicerec.h
 )
 
+if (WIN32)
+	add_file_folder("Sound"
+		${file_root_sound}
+		sound/speech_win.cpp
+	)
+endif()
+
 if (APPLE)
 	add_file_folder("Sound"
 		${file_root_sound}

From 618cf58e96ced0d9c07ec003f6192cd5484de095 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 5 Apr 2026 18:56:52 -0300
Subject: [PATCH 03/26] adapt existing mac speech integration

---
 code/sound/{speech.mm => speech_mac.cpp} | 37 ++++++++----------------
 code/source_groups.cmake                 |  2 +-
 2 files changed, 13 insertions(+), 26 deletions(-)
 rename code/sound/{speech.mm => speech_mac.cpp} (81%)

diff --git a/code/sound/speech.mm b/code/sound/speech_mac.cpp
similarity index 81%
rename from code/sound/speech.mm
rename to code/sound/speech_mac.cpp
index 0cb45534028..17e7e2313d5 100644
--- a/code/sound/speech.mm
+++ b/code/sound/speech_mac.cpp
@@ -6,7 +6,8 @@
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
 
-
+static SCP_vector<SCP_string> cached_voices;
+static bool voices_cached = false;
 static NSSpeechSynthesizer *synth = nil;
 static bool Speech_init = false;
 
@@ -36,40 +37,20 @@ void speech_deinit()
 	Speech_init = false;
 }
 
-bool speech_play(const char *text)
+bool speech_play(const SCP_string& text)
 {
 	if ( !Speech_init ) {
 		return false;
 	}
 
-	if ( !text || !strlen(text) ) {
-		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
-		return false;
-	}
-
-	SCP_string work_buffer;
-
-	bool saw_dollar = false;
-	for (auto ch : unicode::codepoint_range(text)) {
-		if (ch == UNICODE_CHAR('$')) {
-			// Skip $ escape sequences which appear in briefing text
-			saw_dollar = true;
-			continue;
-		} else if (saw_dollar) {
-			saw_dollar = false;
-			continue;
-		}
-
-		unicode::encode(ch, std::back_inserter(work_buffer));
-	}
-
-	if (work_buffer.empty()) {
+	if (text.empty()) {
+		nprintf(("Speech", "Not playing speech because passed text is empty.\n"));
 		return false;
 	}
 
 	[synth startSpeakingString:
 		[NSString stringWithUTF8String:
-			work_buffer.c_str()
+			text.c_str()
 		]
 	];
 
@@ -154,6 +135,10 @@ bool speech_is_speaking()
 
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
+	if (voices_cached) {
+		return cached_voices;
+	}
+
 	NSArray *voices = [NSSpeechSynthesizer availableVoices];
 
 	SCP_vector<SCP_string> fsoVoices;
@@ -165,6 +150,8 @@ bool speech_is_speaking()
 		fsoVoices.push_back([name UTF8String]);
 	}
 
+	voices_cached = true;
+	cached_voices = fsoVoices;
 	return fsoVoices;
 }
 
diff --git a/code/source_groups.cmake b/code/source_groups.cmake
index e28d696cd82..e0fcce0fd58 100644
--- a/code/source_groups.cmake
+++ b/code/source_groups.cmake
@@ -1634,7 +1634,7 @@ endif()
 if (APPLE)
 	add_file_folder("Sound"
 		${file_root_sound}
-		sound/speech.mm
+		sound/speech_mac.cpp
 	)
 endif()
 

From 5a94f8762dd17e25812098d66d37a7305b40c451 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 5 Apr 2026 19:18:32 -0300
Subject: [PATCH 04/26] add speech linux stubs

---
 cmake/finder/FindSpeech.cmake |   4 ++
 code/sound/speech_linux.cpp   | 116 ++++++++++++++++++++++++++++++++++
 code/source_groups.cmake      |   9 ++-
 3 files changed, 126 insertions(+), 3 deletions(-)
 create mode 100644 code/sound/speech_linux.cpp

diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake
index b85b5b7fe9a..172f7910137 100644
--- a/cmake/finder/FindSpeech.cmake
+++ b/cmake/finder/FindSpeech.cmake
@@ -11,6 +11,10 @@ if (WIN32)
 	endif()
 elseif(APPLE)
 	# it should just work
+elseif(UNIX)
+	# speech-dispatcher
+	find_package(Speechd REQUIRED)
+	target_link_libraries(speech INTERFACE Speechd::Speechd)
 else()
 	message(SEND_ERROR "Text to Speech is not supported on this platform!")
 endif()
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
new file mode 100644
index 00000000000..406276bf77f
--- /dev/null
+++ b/code/sound/speech_linux.cpp
@@ -0,0 +1,116 @@
+#ifdef FS2_SPEECH
+#include <speechd/libspeechd.h>
+#include "globalincs/pstypes.h"
+#include "utils/unicode.h"
+
+static SCP_vector<SCP_string> cached_voices;
+static bool voices_cached = false;
+
+bool speech_init()
+{
+	if (Speech_init) {
+		return true;
+	}
+
+
+	Speech_init = true;
+	return true;
+}
+
+void speech_deinit()
+{
+	if ( !Speech_init ) {
+		return;
+	}
+
+	Speech_init = false;
+}
+
+bool speech_play(const SCP_string& text)
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	if (text.empty()) {
+		nprintf(("Speech", "Not playing speech because passed text is empty.\n"));
+		return false;
+	}
+
+
+	return true;
+}
+
+bool speech_pause()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+
+	return true;
+}
+
+bool speech_resume()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+
+	return true;
+}
+
+bool speech_stop()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+
+	return true;
+}
+
+bool speech_set_volume(unsigned short volume)
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+
+	return true;
+}
+
+bool speech_set_voice(int voice)
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	return true;
+}
+
+bool speech_is_speaking()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	return false;
+}
+
+SCP_vector<SCP_string> speech_enumerate_voices()
+{
+	if (voices_cached) {
+		return cached_voices;
+	}
+
+	SCP_vector<SCP_string> fsoVoices;
+
+
+	voices_cached = true;
+	cached_voices = fsoVoices;
+	return fsoVoices;
+}
+
+#endif
\ No newline at end of file
diff --git a/code/source_groups.cmake b/code/source_groups.cmake
index e0fcce0fd58..a5d2481c5be 100644
--- a/code/source_groups.cmake
+++ b/code/source_groups.cmake
@@ -1629,13 +1629,16 @@ if (WIN32)
 		${file_root_sound}
 		sound/speech_win.cpp
 	)
-endif()
-
-if (APPLE)
+elseif (APPLE)
 	add_file_folder("Sound"
 		${file_root_sound}
 		sound/speech_mac.cpp
 	)
+elseif (UNIX)
+	add_file_folder("Sound"
+		${file_root_sound}
+		sound/speech_linux.cpp
+	)
 endif()
 
 if (FSO_BUILD_WITH_FFMPEG)

From 1efe01bae6029d97b61918b4b07e3b832bc62ff7 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 5 Apr 2026 22:20:08 -0300
Subject: [PATCH 05/26] add speech support in linux

---
 CMakeLists.txt                |  8 ++---
 cmake/finder/FindSpeech.cmake |  8 +++--
 code/sound/fsspeech.cpp       |  2 +-
 code/sound/speech_linux.cpp   | 61 ++++++++++++++++++++++++++++-------
 4 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82075aa4d1b..9bf1923e2f0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,9 +74,7 @@ IF(RESET_INSTALL_PREFIX)
 	ENDIF(NOT $ENV{FS2PATH} STREQUAL "")
 ENDIF(RESET_INSTALL_PREFIX)
 
-IF(WIN32 OR APPLE)
-	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
-ENDIF(WIN32 OR APPLE)
+OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
 
 IF (WIN32)
 	OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON)
@@ -227,9 +225,7 @@ include(package)
 include(doxygen)
 
 # Print used options to log
-IF(WIN32 OR APPLE)
-	message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
-ENDIF()
+message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
 IF (WIN32)
 	message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}")
 	message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}")
diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake
index 172f7910137..0724e0f13af 100644
--- a/cmake/finder/FindSpeech.cmake
+++ b/cmake/finder/FindSpeech.cmake
@@ -12,9 +12,11 @@ if (WIN32)
 elseif(APPLE)
 	# it should just work
 elseif(UNIX)
-	# speech-dispatcher
-	find_package(Speechd REQUIRED)
-	target_link_libraries(speech INTERFACE Speechd::Speechd)
+	# speech-dispatcher-> libspeechd-dev
+	find_package(PkgConfig REQUIRED)
+	pkg_check_modules(SPEECHD REQUIRED speech-dispatcher)
+	target_include_directories(speech INTERFACE ${SPEECHD_INCLUDE_DIRS})
+	target_link_libraries(speech INTERFACE ${SPEECHD_LIBRARIES})
 else()
 	message(SEND_ERROR "Text to Speech is not supported on this platform!")
 endif()
diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index ecc7eb64b1a..0333c9e8564 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -80,7 +80,7 @@ static SCP_vector<int> ttsvoice_enumerator()
 {
 	SCP_vector<int> vals;
 	auto voices = speech_enumerate_voices();
-	for (int i = 0; i < voices.size(); ++i) {
+	for (size_t i = 0; i < voices.size(); ++i) {
 		vals.push_back(i);
 	}
 	return vals;
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 406276bf77f..7f7ccf001c6 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -1,17 +1,24 @@
 #ifdef FS2_SPEECH
-#include <speechd/libspeechd.h>
+#include <libspeechd.h>
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
 
 static SCP_vector<SCP_string> cached_voices;
 static bool voices_cached = false;
+static bool Speech_init = false;
+static SPDConnection* spd = nullptr;
 
 bool speech_init()
 {
 	if (Speech_init) {
 		return true;
 	}
-
+	
+    spd = spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
+    if (!spd) {
+        mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
+        return false;
+    }
 
 	Speech_init = true;
 	return true;
@@ -22,7 +29,7 @@ void speech_deinit()
 	if ( !Speech_init ) {
 		return;
 	}
-
+	spd_close(spd);
 	Speech_init = false;
 }
 
@@ -37,8 +44,7 @@ bool speech_play(const SCP_string& text)
 		return false;
 	}
 
-
-	return true;
+	return (spd_say(spd, SPD_TEXT, text.c_str()) >= 0);
 }
 
 bool speech_pause()
@@ -47,7 +53,8 @@ bool speech_pause()
 		return false;
 	}
 
-
+	spd_pause(spd);
+	
 	return true;
 }
 
@@ -57,7 +64,8 @@ bool speech_resume()
 		return false;
 	}
 
-
+	spd_resume(spd);
+	
 	return true;
 }
 
@@ -67,7 +75,8 @@ bool speech_stop()
 		return false;
 	}
 
-
+	spd_stop(spd);
+	
 	return true;
 }
 
@@ -77,7 +86,8 @@ bool speech_set_volume(unsigned short volume)
 		return false;
 	}
 
-
+	spd_set_volume(spd, volume); 
+	
 	return true;
 }
 
@@ -86,7 +96,9 @@ bool speech_set_voice(int voice)
 	if ( !Speech_init ) {
 		return false;
 	}
-
+	
+	spd_set_synthesis_voice(spd, cached_voices[voice].c_str());
+	
 	return true;
 }
 
@@ -107,10 +119,37 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 
 	SCP_vector<SCP_string> fsoVoices;
 
+    SPDConnection* connection = spd;
+    if ( !Speech_init ) {
+    	connection = spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE);
+    	if (!connection) {
+        	mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
+        	voices_cached = true;
+        	cached_voices = fsoVoices;
+        	return fsoVoices;
+    	}
+	}
 
+    SPDVoice** voices = spd_list_synthesis_voices(connection);
+    
+    for (int i = 0; voices[i] != NULL; i++) {
+    	SCP_string lang = voices[i]->language;
+    	// There are too many we cant add them all
+    	// Only add English voices
+    	if(lang.find("en") == 0) {
+    		SCP_string voiceName;
+    		voiceName = voices[i]->name ? voices[i]->name : "unknown";
+        	fsoVoices.push_back(voiceName);
+        }
+    }
+
+    //spd_free_voices(voices);
+    if ( !Speech_init ) {
+    	spd_close(connection);
+	}
 	voices_cached = true;
 	cached_voices = fsoVoices;
 	return fsoVoices;
 }
 
-#endif
\ No newline at end of file
+#endif

From ecacd4f3d3bb901813d362c022fd4a32f52394fa Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 6 Apr 2026 20:24:48 -0300
Subject: [PATCH 06/26] Add array checks

---
 code/sound/fsspeech.cpp     | 9 ++++++++-
 code/sound/speech_linux.cpp | 4 ++++
 code/sound/speech_win.cpp   | 4 ++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 0333c9e8564..d649e2603d6 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -88,8 +88,11 @@ static SCP_vector<int> ttsvoice_enumerator()
 
 static SCP_string ttsvoice_display(int id)
 {
-	SCP_string out;
 	auto voices = speech_enumerate_voices();
+	if (voices.empty() || id < 0 || static_cast<size_t>(id) >= voices.size()) {
+        return "No voices loaded";
+    }
+    SCP_string out;
 	sprintf(out, "(%d) %s", id + 1, voices[id].c_str());
 	return out;
 }
@@ -99,6 +102,10 @@ static bool ttsvoice_change(int id, bool initial)
 	if (initial) {
 		return false;
 	}
+	auto voices = speech_enumerate_voices();
+	if (voices.empty() || id < 0 || static_cast<size_t>(id) >= voices.size()) {
+        return false;
+    }
 	speech_set_voice(id);
 	return true;
 }
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 7f7ccf001c6..b44a59e87c2 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -97,6 +97,10 @@ bool speech_set_voice(int voice)
 		return false;
 	}
 	
+	if (voice < 0 || static_cast<size_t>(voice) >= cached_voices.size()) {
+        return false;
+    }
+	
 	spd_set_synthesis_voice(spd, cached_voices[voice].c_str());
 	
 	return true;
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index 90698a52417..5d9605f9395 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -134,6 +134,10 @@ bool speech_set_volume(unsigned short volume)
 
 bool speech_set_voice(int voice)
 {
+	if (voice < 0 || static_cast<size_t>(voice) >= cached_voices.size()) {
+        return false;
+    }
+	
 	HRESULT                             hr;
 	CComPtr<ISpObjectToken>             cpVoiceToken;
 	CComPtr<IEnumSpObjectTokens>        cpEnum;

From ae8e56bd682e7416b3250fc760b34dce21cd2c64 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 6 Apr 2026 20:31:53 -0300
Subject: [PATCH 07/26] Use dlopen for speech-dispatcher

---
 cmake/finder/FindSpeech.cmake |   6 +-
 code/sound/speech_linux.cpp   | 134 ++++++++++++++++++++++++++++++----
 2 files changed, 120 insertions(+), 20 deletions(-)

diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake
index 0724e0f13af..f8c28300833 100644
--- a/cmake/finder/FindSpeech.cmake
+++ b/cmake/finder/FindSpeech.cmake
@@ -12,11 +12,7 @@ if (WIN32)
 elseif(APPLE)
 	# it should just work
 elseif(UNIX)
-	# speech-dispatcher-> libspeechd-dev
-	find_package(PkgConfig REQUIRED)
-	pkg_check_modules(SPEECHD REQUIRED speech-dispatcher)
-	target_include_directories(speech INTERFACE ${SPEECHD_INCLUDE_DIRS})
-	target_link_libraries(speech INTERFACE ${SPEECHD_LIBRARIES})
+	# uses speech-dispatcher with dlopen
 else()
 	message(SEND_ERROR "Text to Speech is not supported on this platform!")
 endif()
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index b44a59e87c2..f6c4b1ce84b 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -1,7 +1,96 @@
 #ifdef FS2_SPEECH
-#include <libspeechd.h>
+#include <dlfcn.h> 
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
+#include "external_dll/externalcode.h"
+
+// Adapted from libspeechd.h / speechd_types.h
+// https://github.com/brailcom/speechd/tree/master/src/api/c
+
+typedef struct SPDConnection SPDConnection;
+
+typedef struct {
+    char *name;
+    char *language;
+    char *variant;
+} SPDVoice;
+
+typedef enum {
+    SPD_MODE_SINGLE = 0,
+    SPD_MODE_THREADED = 1
+} SPDConnectionMode;
+
+typedef enum {
+    SPD_IMPORTANT   = 1,
+    SPD_MESSAGE     = 2,
+    SPD_TEXT        = 3,
+    SPD_NOTIFICATION = 4,
+    SPD_PROGRESS    = 5
+} SPDPriority;
+
+static void* lib_handle = nullptr;
+
+typedef SPDConnection* (*pfn_spd_open)(const char*, const char*, const char*, SPDConnectionMode);
+typedef void (*pfn_spd_close)(SPDConnection*);
+typedef int (*pfn_spd_say)(SPDConnection*, SPDPriority, const char*);
+typedef int (*pfn_spd_pause)(SPDConnection*);
+typedef int (*pfn_spd_resume)(SPDConnection*);
+typedef int (*pfn_spd_stop)(SPDConnection*);
+typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int);
+typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*);
+typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*);
+typedef void (*pfn_free_spd_voices)(SPDVoice**);
+
+static pfn_spd_open                		p_spd_open = nullptr;
+static pfn_spd_close                	p_spd_close = nullptr;
+static pfn_spd_say                  	p_spd_say = nullptr;
+static pfn_spd_pause                	p_spd_pause = nullptr;
+static pfn_spd_resume               	p_spd_resume = nullptr;
+static pfn_spd_stop                 	p_spd_stop = nullptr;
+static pfn_spd_set_volume           	p_spd_set_volume = nullptr;
+static pfn_spd_set_synthesis_voice  	p_spd_set_synthesis_voice = nullptr;
+static pfn_spd_list_synthesis_voices	p_spd_list_synthesis_voices = nullptr;
+static pfn_free_spd_voices 				p_free_spd_voices = nullptr;
+
+// Load speech-dispatcher with dlopen and load symbols
+static bool ensure_speechd_lib()
+{
+    if (lib_handle) return true;
+    lib_handle = dlopen("libspeechd.so.3", RTLD_LAZY | RTLD_LOCAL);
+    if (!lib_handle) {
+		lib_handle = dlopen("libspeechd.so", RTLD_LAZY | RTLD_LOCAL);
+    }
+
+    if (!lib_handle) {
+        mprintf(("Speech: Unable to load libspeechd.so: %s\n", dlerror()));
+        return false;
+    }
+    
+    // used symbols
+    p_spd_open                	= (pfn_spd_open)               		dlsym(lib_handle, "spd_open");
+    p_spd_close              	= (pfn_spd_close)              		dlsym(lib_handle, "spd_close");
+    p_spd_say                 	= (pfn_spd_say)                		dlsym(lib_handle, "spd_say");
+    p_spd_pause               	= (pfn_spd_pause)              		dlsym(lib_handle, "spd_pause");
+    p_spd_resume              	= (pfn_spd_resume)             		dlsym(lib_handle, "spd_resume");
+    p_spd_stop                	= (pfn_spd_stop)               		dlsym(lib_handle, "spd_stop");
+    p_spd_set_volume          	= (pfn_spd_set_volume)         		dlsym(lib_handle, "spd_set_volume");
+    p_spd_set_synthesis_voice 	= (pfn_spd_set_synthesis_voice)		dlsym(lib_handle, "spd_set_synthesis_voice");
+    p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices)	dlsym(lib_handle, "spd_list_synthesis_voices");
+    p_free_spd_voices 			= (pfn_free_spd_voices)				dlsym(lib_handle, "free_spd_voices");
+
+    if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause ||
+        !p_spd_resume || !p_spd_stop || !p_spd_set_volume ||
+        !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) {
+        mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror()));
+        dlclose(lib_handle);
+        lib_handle = nullptr;
+        return false;
+    }
+
+    return true;
+}
+
+// Speech handling starts here
 
 static SCP_vector<SCP_string> cached_voices;
 static bool voices_cached = false;
@@ -13,8 +102,12 @@ bool speech_init()
 	if (Speech_init) {
 		return true;
 	}
-	
-    spd = spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
+	    
+	if (!ensure_speechd_lib()) {
+        return false;
+    }
+    
+    spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
     if (!spd) {
         mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
         return false;
@@ -29,8 +122,13 @@ void speech_deinit()
 	if ( !Speech_init ) {
 		return;
 	}
-	spd_close(spd);
+	p_spd_close(spd);
 	Speech_init = false;
+	spd = nullptr;
+    if (lib_handle) { 
+		dlclose(lib_handle); 
+		lib_handle = nullptr; 
+	}
 }
 
 bool speech_play(const SCP_string& text)
@@ -44,7 +142,7 @@ bool speech_play(const SCP_string& text)
 		return false;
 	}
 
-	return (spd_say(spd, SPD_TEXT, text.c_str()) >= 0);
+	return (p_spd_say(spd, SPD_TEXT, text.c_str()) >= 0);
 }
 
 bool speech_pause()
@@ -53,7 +151,7 @@ bool speech_pause()
 		return false;
 	}
 
-	spd_pause(spd);
+	p_spd_pause(spd);
 	
 	return true;
 }
@@ -64,7 +162,7 @@ bool speech_resume()
 		return false;
 	}
 
-	spd_resume(spd);
+	p_spd_resume(spd);
 	
 	return true;
 }
@@ -75,7 +173,7 @@ bool speech_stop()
 		return false;
 	}
 
-	spd_stop(spd);
+	p_spd_stop(spd);
 	
 	return true;
 }
@@ -86,7 +184,7 @@ bool speech_set_volume(unsigned short volume)
 		return false;
 	}
 
-	spd_set_volume(spd, volume); 
+	p_spd_set_volume(spd, volume); 
 	
 	return true;
 }
@@ -100,8 +198,8 @@ bool speech_set_voice(int voice)
 	if (voice < 0 || static_cast<size_t>(voice) >= cached_voices.size()) {
         return false;
     }
-	
-	spd_set_synthesis_voice(spd, cached_voices[voice].c_str());
+    
+	p_spd_set_synthesis_voice(spd, cached_voices[voice].c_str());
 	
 	return true;
 }
@@ -122,10 +220,16 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 	}
 
 	SCP_vector<SCP_string> fsoVoices;
+	
+	if (!ensure_speechd_lib()) {
+        voices_cached = true;
+        cached_voices = fsoVoices;
+        return fsoVoices;
+    }
 
     SPDConnection* connection = spd;
     if ( !Speech_init ) {
-    	connection = spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE);
+    	connection = p_spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE);
     	if (!connection) {
         	mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
         	voices_cached = true;
@@ -134,7 +238,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
     	}
 	}
 
-    SPDVoice** voices = spd_list_synthesis_voices(connection);
+    SPDVoice** voices = p_spd_list_synthesis_voices(connection);
     
     for (int i = 0; voices[i] != NULL; i++) {
     	SCP_string lang = voices[i]->language;
@@ -147,9 +251,9 @@ SCP_vector<SCP_string> speech_enumerate_voices()
         }
     }
 
-    //spd_free_voices(voices);
+    p_free_spd_voices(voices);
     if ( !Speech_init ) {
-    	spd_close(connection);
+    	p_spd_close(connection);
 	}
 	voices_cached = true;
 	cached_voices = fsoVoices;

From 191061d7e483b8fa763bda3cd484e00e276a2c13 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 6 Apr 2026 21:42:19 -0300
Subject: [PATCH 08/26] corrrect lib name

---
 code/sound/speech_linux.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index f6c4b1ce84b..0e276646159 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -56,7 +56,7 @@ static pfn_free_spd_voices 				p_free_spd_voices = nullptr;
 static bool ensure_speechd_lib()
 {
     if (lib_handle) return true;
-    lib_handle = dlopen("libspeechd.so.3", RTLD_LAZY | RTLD_LOCAL);
+    lib_handle = dlopen("libspeechd.so.2", RTLD_LAZY | RTLD_LOCAL);
     if (!lib_handle) {
 		lib_handle = dlopen("libspeechd.so", RTLD_LAZY | RTLD_LOCAL);
     }

From fc5a017706f1f7a713dd4abf4f7ae72605954089 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 6 Apr 2026 22:38:03 -0300
Subject: [PATCH 09/26] missing includes and static cast

---
 code/sound/fsspeech.cpp     | 2 +-
 code/sound/speech_linux.cpp | 2 +-
 code/sound/speech_mac.cpp   | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index d649e2603d6..f25055f5b26 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -80,7 +80,7 @@ static SCP_vector<int> ttsvoice_enumerator()
 {
 	SCP_vector<int> vals;
 	auto voices = speech_enumerate_voices();
-	for (size_t i = 0; i < voices.size(); ++i) {
+	for (int i = 0; i < static_cast<int>(voices.size()); ++i) {
 		vals.push_back(i);
 	}
 	return vals;
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 0e276646159..f1204989fe6 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -2,7 +2,7 @@
 #include <dlfcn.h> 
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
-#include "external_dll/externalcode.h"
+#include "speech.h"
 
 // Adapted from libspeechd.h / speechd_types.h
 // https://github.com/brailcom/speechd/tree/master/src/api/c
diff --git a/code/sound/speech_mac.cpp b/code/sound/speech_mac.cpp
index 17e7e2313d5..cc560b15ec0 100644
--- a/code/sound/speech_mac.cpp
+++ b/code/sound/speech_mac.cpp
@@ -5,6 +5,7 @@
 
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
+#include "speech.h"
 
 static SCP_vector<SCP_string> cached_voices;
 static bool voices_cached = false;

From 4d71c38154c6c146cd0304fce213185e27602539 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 6 Apr 2026 22:48:27 -0300
Subject: [PATCH 10/26] do not change mac file type

---
 code/sound/{speech_mac.cpp => speech_mac.mm} | 0
 code/source_groups.cmake                     | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename code/sound/{speech_mac.cpp => speech_mac.mm} (100%)

diff --git a/code/sound/speech_mac.cpp b/code/sound/speech_mac.mm
similarity index 100%
rename from code/sound/speech_mac.cpp
rename to code/sound/speech_mac.mm
diff --git a/code/source_groups.cmake b/code/source_groups.cmake
index a5d2481c5be..b53bbb2749c 100644
--- a/code/source_groups.cmake
+++ b/code/source_groups.cmake
@@ -1632,7 +1632,7 @@ if (WIN32)
 elseif (APPLE)
 	add_file_folder("Sound"
 		${file_root_sound}
-		sound/speech_mac.cpp
+		sound/speech_mac.mm
 	)
 elseif (UNIX)
 	add_file_folder("Sound"

From 0c3534c41a847cf4202bb0017449b92687898e0e Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 6 Apr 2026 23:14:40 -0300
Subject: [PATCH 11/26] fix clang tidy warnings 1

---
 code/sound/fsspeech.cpp     |  7 +++----
 code/sound/speech_linux.cpp |  4 ++--
 code/sound/speech_win.cpp   | 32 ++++++++++++++++----------------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index f25055f5b26..612eb16ead5 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -219,15 +219,14 @@ bool fsspeech_init()
 	{
 		// Get the settings from the registry
 		for (int i = 0; i < FSSPEECH_FROM_MAX; i++) {
-			FSSpeech_play_from[i] =
-				os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
+			FSSpeech_play_from[i] = static_cast<bool>(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0));
 			nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
 		}
 
-		int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
+		int volume = os_config_read_uint(nullptr, "SpeechVolume", 100);
 		speech_set_volume((unsigned short)volume);
 
-		int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
+		int voice = os_config_read_uint(nullptr, "SpeechVoice", 0);
 		speech_set_voice(voice);
 	}
 
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index f1204989fe6..eb4a3b6c77c 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -229,7 +229,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 
     SPDConnection* connection = spd;
     if ( !Speech_init ) {
-    	connection = p_spd_open("fso_voice_list", "client", NULL, SPD_MODE_SINGLE);
+    	connection = p_spd_open("fso_voice_list", "client", nullptr, SPD_MODE_SINGLE);
     	if (!connection) {
         	mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
         	voices_cached = true;
@@ -240,7 +240,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 
     SPDVoice** voices = p_spd_list_synthesis_voices(connection);
     
-    for (int i = 0; voices[i] != NULL; i++) {
+    for (int i = 0; voices[i] != nullptr; i++) {
     	SCP_string lang = voices[i]->language;
     	// There are too many we cant add them all
     	// Only add English voices
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index 5d9605f9395..d83ad0906d1 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -11,7 +11,7 @@
 	#pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" )
 #endif // NDEBUG
 #endif // _WIN32
-#else // FS2_SPEECH
+#elif defined(_WIN32) // FS2_SPEECH
 
 #ifdef LAUNCHER
 #include "stdafx.h"
@@ -60,7 +60,7 @@ bool speech_init()
 {
     HRESULT hr = CoCreateInstance(
 		CLSID_SpVoice, 
-		NULL, 
+		nullptr, 
 		CLSCTX_ALL, 
 		IID_ISpVoice, 
 		(void **)&Voice_device);
@@ -106,7 +106,7 @@ bool speech_play(const SCP_string& text)
 	}
 
 	speech_stop();
-	return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, NULL));
+	return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, nullptr));
 }
 
 bool speech_pause()
@@ -124,7 +124,7 @@ bool speech_resume()
 bool speech_stop()
 {
 	if(Speech_init == false) return true;
-    return SUCCEEDED(Voice_device->Speak( NULL, SPF_PURGEBEFORESPEAK, NULL ));
+    return SUCCEEDED(Voice_device->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr));
 }
 
 bool speech_set_volume(unsigned short volume)
@@ -144,7 +144,7 @@ bool speech_set_voice(int voice)
 	ULONG                               num_voices = 0;
 
 	//Enumerate the available voices 
-	hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
+	hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum);
 
 	if(FAILED(hr)) return false;
 
@@ -158,7 +158,7 @@ bool speech_set_voice(int voice)
 	{
 		cpVoiceToken.Release();
 		
-		hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
+		hr = cpEnum->Next( 1, &cpVoiceToken, nullptr);
 
 		if(FAILED(hr)) {
 			return false;
@@ -179,7 +179,7 @@ bool speech_is_speaking()
 	HRESULT			hr;
 	SPVOICESTATUS	pStatus;
 
-	hr = Voice_device->GetStatus(&pStatus, NULL);
+	hr = Voice_device->GetStatus(&pStatus, nullptr);
 	if (FAILED(hr)) return false;
 
 	return (pStatus.dwRunningState != SPRS_DONE);
@@ -193,7 +193,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 
 	HRESULT hr = CoCreateInstance(
 		CLSID_SpVoice,
-		NULL,
+		nullptr,
 		CLSCTX_ALL,
 		IID_ISpVoice,
 		(void **)&Voice_device);
@@ -203,12 +203,12 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 	}
 
 	// This code is mostly copied from wxLauncher
-	ISpObjectTokenCategory * comTokenCategory = NULL;
-	IEnumSpObjectTokens * comVoices = NULL;
+	ISpObjectTokenCategory * comTokenCategory = nullptr;
+	IEnumSpObjectTokens * comVoices = nullptr;
 	ULONG comVoicesCount = 0;
 
 	// Generate enumeration of voices
-	hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, NULL,
+	hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr,
 		CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory);
 	if (FAILED(hr)) {
 		return SCP_vector<SCP_string>();
@@ -219,7 +219,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 		return SCP_vector<SCP_string>();
 	}
 
-	hr = comTokenCategory->EnumTokens(NULL, NULL, &comVoices);
+	hr = comTokenCategory->EnumTokens(nullptr, nullptr, &comVoices);
 	if (FAILED(hr)) {
 		return SCP_vector<SCP_string>();
 	}
@@ -231,12 +231,12 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 
 	SCP_vector<SCP_string> voices;
 	while (comVoicesCount > 0) {
-		ISpObjectToken * comAVoice = NULL;
+		ISpObjectToken * comAVoice = nullptr;
 
-		comVoices->Next(1, &comAVoice, NULL); // retrieve just one
+		comVoices->Next(1, &comAVoice, nullptr); // retrieve just one
 
-		LPWSTR id = NULL;
-		comAVoice->GetStringValue(NULL, &id);
+		LPWSTR id = nullptr;
+		comAVoice->GetStringValue(nullptr, &id);
 
 		auto idlength = wcslen(id);
 		auto buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr);

From be08a77fe899b71143d395dd636adbc3323318d4 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Wed, 8 Apr 2026 19:27:46 -0300
Subject: [PATCH 12/26] set tts rate

---
 code/sound/fsspeech.cpp     | 25 ++++++++++++++++++++++++-
 code/sound/speech.h         |  2 ++
 code/sound/speech_linux.cpp | 26 +++++++++++++++++++++++---
 code/sound/speech_mac.mm    | 16 ++++++++++++++++
 code/sound/speech_win.cpp   | 16 ++++++++++++++++
 5 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 612eb16ead5..834069a65dd 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -31,6 +31,15 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] =
 char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = "";
 size_t  Speech_buffer_len;
 
+static bool ttsrate_change(float new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_rate(new_val);
+	return true;
+}
+
 static bool ttsingame_change(bool new_val, bool initial)
 {
 	if (initial) {
@@ -120,7 +129,7 @@ static auto SpeechVoiceOption = options::OptionBuilder<int>("Speech.Voice",
 	.flags({ options::OptionFlags::ForceMultiValueSelection })
 	.default_val(0)
 	.change_listener(ttsvoice_change)
-	.importance(2)
+	.importance(3)
 	.finish();
 
 static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
@@ -130,6 +139,16 @@ static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
 	.range(0.0f, 100.0f)
 	.default_val(100.0f)
 	.change_listener(ttsvolume_change)
+	.importance(2)
+	.finish();
+
+static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
+	std::pair<const char*, int>{"TTS Rate", -1},
+	std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", -1})
+	.category(std::make_pair("Audio", 1826))
+	.range(50.0f, 150.0f)
+	.default_val(100.0f)
+	.change_listener(ttsrate_change)
 	.importance(1)
 	.finish();
 
@@ -214,6 +233,7 @@ bool fsspeech_init()
 		speech_enumerate_voices();
 		speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
 		speech_set_voice(SpeechVoiceOption->getValue());
+		speech_set_rate(SpeechRateOption->getValue());
 	}
 	else 
 	{
@@ -228,6 +248,9 @@ bool fsspeech_init()
 
 		int voice = os_config_read_uint(nullptr, "SpeechVoice", 0);
 		speech_set_voice(voice);
+
+		int rate = os_config_read_uint(nullptr, "SpeechRate", 100);
+		speech_set_rate(static_cast<float>(rate));
 	}
 
 	speech_inited = 1;
diff --git a/code/sound/speech.h b/code/sound/speech.h
index e16eeef2a43..6f73c2f5264 100644
--- a/code/sound/speech.h
+++ b/code/sound/speech.h
@@ -22,6 +22,7 @@ bool speech_stop();
 
 bool speech_set_volume(unsigned short volume);
 bool speech_set_voice(int voice);
+bool speech_set_rate(float rate);
 
 bool speech_is_speaking();
 
@@ -37,6 +38,7 @@ inline bool speech_resume() { return false; }
 inline bool speech_stop() { return false; }
 inline bool speech_set_volume(unsigned short /*volume*/) { return false; }
 inline bool speech_set_voice(int /*voice*/) { return false; }
+inline bool speech_set_rate(float /*rate*/) { return false; } 
 inline bool speech_is_speaking() { return false; }
 
 inline SCP_vector<SCP_string> speech_enumerate_voices() {
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index eb4a3b6c77c..4c3486fab6e 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -38,6 +38,7 @@ typedef int (*pfn_spd_resume)(SPDConnection*);
 typedef int (*pfn_spd_stop)(SPDConnection*);
 typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int);
 typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*);
+typedef int (*pfn_spd_set_rate)(SPDConnection*, signed int);
 typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*);
 typedef void (*pfn_free_spd_voices)(SPDVoice**);
 
@@ -50,6 +51,7 @@ static pfn_spd_stop                 	p_spd_stop = nullptr;
 static pfn_spd_set_volume           	p_spd_set_volume = nullptr;
 static pfn_spd_set_synthesis_voice  	p_spd_set_synthesis_voice = nullptr;
 static pfn_spd_list_synthesis_voices	p_spd_list_synthesis_voices = nullptr;
+static pfn_spd_set_rate					p_spd_set_rate = nullptr;
 static pfn_free_spd_voices 				p_free_spd_voices = nullptr;
 
 // Load speech-dispatcher with dlopen and load symbols
@@ -76,11 +78,12 @@ static bool ensure_speechd_lib()
     p_spd_set_volume          	= (pfn_spd_set_volume)         		dlsym(lib_handle, "spd_set_volume");
     p_spd_set_synthesis_voice 	= (pfn_spd_set_synthesis_voice)		dlsym(lib_handle, "spd_set_synthesis_voice");
     p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices)	dlsym(lib_handle, "spd_list_synthesis_voices");
+	p_spd_set_rate				= (pfn_spd_set_rate)				dlsym(lib_handle, "spd_set_rate");
     p_free_spd_voices 			= (pfn_free_spd_voices)				dlsym(lib_handle, "free_spd_voices");
 
-    if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause ||
-        !p_spd_resume || !p_spd_stop || !p_spd_set_volume ||
-        !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) {
+    if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop ||
+		!p_spd_set_volume || !p_spd_set_rate || !p_spd_set_synthesis_voice || 
+		!p_spd_list_synthesis_voices || !p_free_spd_voices || !p_spd_set_rate) {
         mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror()));
         dlclose(lib_handle);
         lib_handle = nullptr;
@@ -204,6 +207,23 @@ bool speech_set_voice(int voice)
 	return true;
 }
 
+bool speech_set_rate(float rate_percent)
+{
+	if (!Speech_init) {
+		return false;
+	}
+
+	// 50 / +150 -> 100 = normal -> range -100 / +100
+	signed int rate = static_cast<signed int>((rate_percent - 100.0f) * 2.0f);
+	if (rate < -100)
+		rate = -100;
+	if (rate > 100)
+		rate = 100;
+
+	p_spd_set_rate(spd, rate);
+	return true;
+}
+
 bool speech_is_speaking()
 {
 	if ( !Speech_init ) {
diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm
index cc560b15ec0..5c0f92ab3a9 100644
--- a/code/sound/speech_mac.mm
+++ b/code/sound/speech_mac.mm
@@ -125,6 +125,22 @@ bool speech_set_voice(int voice)
 	return true;
 }
 
+bool speech_set_rate(float rate_percent)
+{
+    if (!Speech_init) {
+        return false;
+    }
+
+    // 180 wpm = normal
+    float rate = 180.0f * (rate_percent / 100.0f);
+
+    [synth setObject:[NSNumber numberWithFloat:rate]
+            forProperty:NSSpeechRateProperty
+                   error:nil];
+
+    return true;
+}
+
 bool speech_is_speaking()
 {
 	if ( !Speech_init ) {
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index d83ad0906d1..13ca1fd4d2e 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -173,6 +173,22 @@ bool speech_set_voice(int voice)
 	return false;
 }
 
+bool speech_set_rate(float rate_percent)
+{
+	if (!Speech_init) {
+		return false;
+	}
+
+	// 50 / +150 -> 100 = normal -> range -10 / +10 
+    long rate = static_cast<long>((rate_percent - 100.0f) * 0.1f);
+	if (rate < -10)
+		rate = -10;
+	if (rate > 10)
+		rate = 10;
+
+	return SUCCEEDED(Voice_device->SetRate(rate));
+}
+
 // Goober5000
 bool speech_is_speaking()
 {

From 5e564ade6c137d0e3032ba48370a3838ae8b0f4b Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Wed, 8 Apr 2026 19:32:42 -0300
Subject: [PATCH 13/26] set localization ids

---
 code/localization/localize.cpp |  2 +-
 code/sound/fsspeech.cpp        | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/code/localization/localize.cpp b/code/localization/localize.cpp
index 5ad87fe4bfc..8c6132f4462 100644
--- a/code/localization/localize.cpp
+++ b/code/localization/localize.cpp
@@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr;
 // NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous,
 // but internal strings should still increment XSTR_SIZE to avoid collisions.
 // retail XSTR_SIZE = 1570
-// #define XSTR_SIZE	1892 // This is the next available ID
+// #define XSTR_SIZE	1929 // This is the next available ID
 
 // struct to allow for strings.tbl-determined x offset
 // offset is 0 for english, by default
diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 834069a65dd..9be3c52c14c 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -120,8 +120,8 @@ static bool ttsvoice_change(int id, bool initial)
 }
 
 static auto SpeechVoiceOption = options::OptionBuilder<int>("Speech.Voice",
-	std::pair<const char*, int>{"TTS Voice", -1},
-	std::pair<const char*, int>{"The voice used to read text", -1})
+	std::pair<const char*, int>{"TTS Voice", 1915},
+	std::pair<const char*, int>{"The voice used to read text", 1916})
 	.category(std::make_pair("Audio", 1826))
 	.level(options::ExpertLevel::Beginner)
 	.enumerator(ttsvoice_enumerator)
@@ -133,8 +133,8 @@ static auto SpeechVoiceOption = options::OptionBuilder<int>("Speech.Voice",
 	.finish();
 
 static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
-	std::pair<const char*, int>{"TTS Volume", -1},
-	std::pair<const char*, int>{"Volume used for playing TTS speech", -1})
+	std::pair<const char*, int>{"TTS Volume", 1917},
+	std::pair<const char*, int>{"Volume used for playing TTS speech", 1918})
 	.category(std::make_pair("Audio", 1826))
 	.range(0.0f, 100.0f)
 	.default_val(100.0f)
@@ -143,8 +143,8 @@ static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
 	.finish();
 
 static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
-	std::pair<const char*, int>{"TTS Rate", -1},
-	std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", -1})
+	std::pair<const char*, int>{"TTS Rate", 1919},
+	std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", 1920})
 	.category(std::make_pair("Audio", 1826))
 	.range(50.0f, 150.0f)
 	.default_val(100.0f)
@@ -153,8 +153,8 @@ static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
 	.finish();
 
 static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing",
-	std::pair<const char*, int>{"TTS in briefings", -1},
-	std::pair<const char*, int>{"Enable or disable TTS in briefings", -1})
+	std::pair<const char*, int>{"TTS in briefings", 1921},
+	std::pair<const char*, int>{"Enable or disable TTS in briefings", 1922})
 	.category(std::make_pair("Audio", 1826))
 	.level(options::ExpertLevel::Beginner)
 	.change_listener(ttsbriefing_change)
@@ -163,8 +163,8 @@ static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing
 	.finish();
 
 static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom",
-	std::pair<const char*, int>{"TTS in techroom", -1},
-	std::pair<const char*, int>{"Enable or disable TTS in techroom", -1})
+	std::pair<const char*, int>{"TTS in techroom", 1923},
+	std::pair<const char*, int>{"Enable or disable TTS in techroom", 1924})
 	.category(std::make_pair("Audio", 1826))
 	.level(options::ExpertLevel::Beginner)
 	.change_listener(ttstechroom_change)
@@ -173,8 +173,8 @@ static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom
 	.finish();
 
 static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
-	std::pair<const char*, int>{"TTS in-game", -1},
-	std::pair<const char*, int>{"Enable or disable TTS in-game", -1})
+	std::pair<const char*, int>{"TTS in-game", 1925},
+	std::pair<const char*, int>{"Enable or disable TTS in-game", 1926})
 	.category(std::make_pair("Audio", 1826))
 	.level(options::ExpertLevel::Beginner)
 	.change_listener(ttsingame_change)
@@ -183,8 +183,8 @@ static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
 	.finish();
 
 static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
-	std::pair<const char*, int>{"TTS in multiplayer", -1},
-	std::pair<const char*, int>{"Enable or disable TTS in multiplayer", -1})
+	std::pair<const char*, int>{"TTS in multiplayer", 1927},
+	std::pair<const char*, int>{"Enable or disable TTS in multiplayer", 1928})
 	.category(std::make_pair("Audio", 1826))
 	.level(options::ExpertLevel::Beginner)
 	.change_listener(ttsmulti_change)

From 205eaef418ddae33b8b1d196435b1cb4fb1022ea Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Wed, 8 Apr 2026 19:56:14 -0300
Subject: [PATCH 14/26] fix clang tidy warnings 2

---
 code/sound/speech_linux.cpp | 7 +++----
 code/sound/speech_win.cpp   | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 4c3486fab6e..a077fdb6267 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -81,9 +81,8 @@ static bool ensure_speechd_lib()
 	p_spd_set_rate				= (pfn_spd_set_rate)				dlsym(lib_handle, "spd_set_rate");
     p_free_spd_voices 			= (pfn_free_spd_voices)				dlsym(lib_handle, "free_spd_voices");
 
-    if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop ||
-		!p_spd_set_volume || !p_spd_set_rate || !p_spd_set_synthesis_voice || 
-		!p_spd_list_synthesis_voices || !p_free_spd_voices || !p_spd_set_rate) {
+    if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || !p_spd_set_volume 
+		|| !p_spd_set_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) {
         mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror()));
         dlclose(lib_handle);
         lib_handle = nullptr;
@@ -214,7 +213,7 @@ bool speech_set_rate(float rate_percent)
 	}
 
 	// 50 / +150 -> 100 = normal -> range -100 / +100
-	signed int rate = static_cast<signed int>((rate_percent - 100.0f) * 2.0f);
+	auto rate = static_cast<signed int>((rate_percent - 100.0f) * 2.0f);
 	if (rate < -100)
 		rate = -100;
 	if (rate > 100)
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index 13ca1fd4d2e..e8a831040b2 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -180,7 +180,7 @@ bool speech_set_rate(float rate_percent)
 	}
 
 	// 50 / +150 -> 100 = normal -> range -10 / +10 
-    long rate = static_cast<long>((rate_percent - 100.0f) * 0.1f);
+    auto rate = static_cast<long>((rate_percent - 100.0f) * 0.1f);
 	if (rate < -10)
 		rate = -10;
 	if (rate > 10)

From 5d479802bb42a770b21e96da7f739047b9251c5c Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Thu, 9 Apr 2026 19:00:55 -0300
Subject: [PATCH 15/26] correct symbol name

---
 code/sound/speech_linux.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index a077fdb6267..a5ce573c5d7 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -38,7 +38,7 @@ typedef int (*pfn_spd_resume)(SPDConnection*);
 typedef int (*pfn_spd_stop)(SPDConnection*);
 typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int);
 typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*);
-typedef int (*pfn_spd_set_rate)(SPDConnection*, signed int);
+typedef int (*pfn_spd_set_voice_rate)(SPDConnection*, signed int);
 typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*);
 typedef void (*pfn_free_spd_voices)(SPDVoice**);
 
@@ -51,7 +51,7 @@ static pfn_spd_stop                 	p_spd_stop = nullptr;
 static pfn_spd_set_volume           	p_spd_set_volume = nullptr;
 static pfn_spd_set_synthesis_voice  	p_spd_set_synthesis_voice = nullptr;
 static pfn_spd_list_synthesis_voices	p_spd_list_synthesis_voices = nullptr;
-static pfn_spd_set_rate					p_spd_set_rate = nullptr;
+static pfn_spd_set_voice_rate			p_spd_set_voice_rate = nullptr;
 static pfn_free_spd_voices 				p_free_spd_voices = nullptr;
 
 // Load speech-dispatcher with dlopen and load symbols
@@ -78,11 +78,11 @@ static bool ensure_speechd_lib()
     p_spd_set_volume          	= (pfn_spd_set_volume)         		dlsym(lib_handle, "spd_set_volume");
     p_spd_set_synthesis_voice 	= (pfn_spd_set_synthesis_voice)		dlsym(lib_handle, "spd_set_synthesis_voice");
     p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices)	dlsym(lib_handle, "spd_list_synthesis_voices");
-	p_spd_set_rate				= (pfn_spd_set_rate)				dlsym(lib_handle, "spd_set_rate");
+	p_spd_set_voice_rate		= (pfn_spd_set_voice_rate)			dlsym(lib_handle, "spd_set_voice_rate");
     p_free_spd_voices 			= (pfn_free_spd_voices)				dlsym(lib_handle, "free_spd_voices");
 
     if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || !p_spd_set_volume 
-		|| !p_spd_set_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) {
+		|| !p_spd_set_voice_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) {
         mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror()));
         dlclose(lib_handle);
         lib_handle = nullptr;
@@ -213,13 +213,13 @@ bool speech_set_rate(float rate_percent)
 	}
 
 	// 50 / +150 -> 100 = normal -> range -100 / +100
-	auto rate = static_cast<signed int>((rate_percent - 100.0f) * 2.0f);
+	auto rate = static_cast<signed int>(rate_percent - 100.0f);
 	if (rate < -100)
 		rate = -100;
 	if (rate > 100)
 		rate = 100;
 
-	p_spd_set_rate(spd, rate);
+	p_spd_set_voice_rate(spd, rate);
 	return true;
 }
 

From 0c27de6b6a1ba1bdb54dd80cf16ec7fe5e6a5ec6 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 19 Apr 2026 14:56:45 -0300
Subject: [PATCH 16/26] Remove voice cache and fix win enumerate_voices
 overriding voice selection

---
 code/sound/fsspeech.cpp     |  2 -
 code/sound/speech_linux.cpp | 19 +++-------
 code/sound/speech_mac.mm    |  8 ----
 code/sound/speech_win.cpp   | 74 +++++++++++++++----------------------
 4 files changed, 35 insertions(+), 68 deletions(-)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 9be3c52c14c..c0d1b506753 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -229,8 +229,6 @@ bool fsspeech_init()
 		FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue();
 		FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
 		FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
-		// Early caching of voices names, needed for sapi not to override initial voice selection
-		speech_enumerate_voices();
 		speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
 		speech_set_voice(SpeechVoiceOption->getValue());
 		speech_set_rate(SpeechRateOption->getValue());
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index a5ce573c5d7..4075cb16ece 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -94,8 +94,6 @@ static bool ensure_speechd_lib()
 
 // Speech handling starts here
 
-static SCP_vector<SCP_string> cached_voices;
-static bool voices_cached = false;
 static bool Speech_init = false;
 static SPDConnection* spd = nullptr;
 
@@ -197,11 +195,13 @@ bool speech_set_voice(int voice)
 		return false;
 	}
 	
-	if (voice < 0 || static_cast<size_t>(voice) >= cached_voices.size()) {
+	auto voices = speech_enumerate_voices();
+
+	if (voice < 0 || static_cast<size_t>(voice) >= voices.size()) {
         return false;
     }
     
-	p_spd_set_synthesis_voice(spd, cached_voices[voice].c_str());
+	p_spd_set_synthesis_voice(spd, voices[voice].c_str());
 	
 	return true;
 }
@@ -234,15 +234,9 @@ bool speech_is_speaking()
 
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
-	if (voices_cached) {
-		return cached_voices;
-	}
-
 	SCP_vector<SCP_string> fsoVoices;
 	
 	if (!ensure_speechd_lib()) {
-        voices_cached = true;
-        cached_voices = fsoVoices;
         return fsoVoices;
     }
 
@@ -251,8 +245,6 @@ SCP_vector<SCP_string> speech_enumerate_voices()
     	connection = p_spd_open("fso_voice_list", "client", nullptr, SPD_MODE_SINGLE);
     	if (!connection) {
         	mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
-        	voices_cached = true;
-        	cached_voices = fsoVoices;
         	return fsoVoices;
     	}
 	}
@@ -274,8 +266,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
     if ( !Speech_init ) {
     	p_spd_close(connection);
 	}
-	voices_cached = true;
-	cached_voices = fsoVoices;
+
 	return fsoVoices;
 }
 
diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm
index 5c0f92ab3a9..d9baa5cb6b5 100644
--- a/code/sound/speech_mac.mm
+++ b/code/sound/speech_mac.mm
@@ -7,8 +7,6 @@
 #include "utils/unicode.h"
 #include "speech.h"
 
-static SCP_vector<SCP_string> cached_voices;
-static bool voices_cached = false;
 static NSSpeechSynthesizer *synth = nil;
 static bool Speech_init = false;
 
@@ -152,10 +150,6 @@ bool speech_is_speaking()
 
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
-	if (voices_cached) {
-		return cached_voices;
-	}
-
 	NSArray *voices = [NSSpeechSynthesizer availableVoices];
 
 	SCP_vector<SCP_string> fsoVoices;
@@ -167,8 +161,6 @@ bool speech_is_speaking()
 		fsoVoices.push_back([name UTF8String]);
 	}
 
-	voices_cached = true;
-	cached_voices = fsoVoices;
 	return fsoVoices;
 }
 
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index e8a831040b2..134b54b7f67 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -52,8 +52,6 @@ ISpVoice *Voice_device;
 #include "utils/unicode.h"
 #include "speech.h"
 
-static SCP_vector<SCP_string> cached_voices;
-static bool voices_cached = false;
 bool Speech_init = false;
 
 bool speech_init()
@@ -134,7 +132,8 @@ bool speech_set_volume(unsigned short volume)
 
 bool speech_set_voice(int voice)
 {
-	if (voice < 0 || static_cast<size_t>(voice) >= cached_voices.size()) {
+	auto voices = speech_enumerate_voices();
+	if (voice < 0 || static_cast<size_t>(voice) >= voices.size()) {
         return false;
     }
 	
@@ -203,79 +202,66 @@ bool speech_is_speaking()
 
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
-	if (voices_cached) {
-		return cached_voices;
-	}
-
-	HRESULT hr = CoCreateInstance(
-		CLSID_SpVoice,
-		nullptr,
-		CLSCTX_ALL,
-		IID_ISpVoice,
-		(void **)&Voice_device);
-
-	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
-	}
+	SCP_vector<SCP_string> voices;
 
-	// This code is mostly copied from wxLauncher
-	ISpObjectTokenCategory * comTokenCategory = nullptr;
-	IEnumSpObjectTokens * comVoices = nullptr;
+	ISpObjectTokenCategory* comTokenCategory = nullptr;
+	IEnumSpObjectTokens* comVoices = nullptr;
 	ULONG comVoicesCount = 0;
 
-	// Generate enumeration of voices
-	hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr,
+	HRESULT hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr,
 		CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory);
+
 	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
+		return voices;
 	}
 
 	hr = comTokenCategory->SetId(SPCAT_VOICES, false);
 	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
+		comTokenCategory->Release();
+		return voices;
 	}
 
 	hr = comTokenCategory->EnumTokens(nullptr, nullptr, &comVoices);
 	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
+		comTokenCategory->Release();
+		return voices;
 	}
 
 	hr = comVoices->GetCount(&comVoicesCount);
 	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
+		comVoices->Release();
+		comTokenCategory->Release();
+		return voices;
 	}
 
-	SCP_vector<SCP_string> voices;
 	while (comVoicesCount > 0) {
-		ISpObjectToken * comAVoice = nullptr;
+		ISpObjectToken* comAVoice = nullptr;
 
-		comVoices->Next(1, &comAVoice, nullptr); // retrieve just one
+		comVoices->Next(1, &comAVoice, nullptr);
 
 		LPWSTR id = nullptr;
 		comAVoice->GetStringValue(nullptr, &id);
 
-		auto idlength = wcslen(id);
-		auto buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr);
-
-		if (buffer_size > 0) {
-			SCP_string voiceName;
-			voiceName.resize(buffer_size);
-			buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr);
-
-			voices.push_back(voiceName);
+		if (id) {
+			auto idlength = wcslen(id);
+			int buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr);
+
+			if (buffer_size > 0) {
+				SCP_string voiceName;
+				voiceName.resize(buffer_size);
+				WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr);
+				voices.push_back(voiceName);
+			}
+			CoTaskMemFree(id);
 		}
 
-		CoTaskMemFree(id);
 		comAVoice->Release();
 		comVoicesCount--;
 	}
 
+	comVoices->Release();
 	comTokenCategory->Release();
-	//only release the voice_device when getting flags
-	if (!Speech_init)
-		Voice_device->Release();
-	voices_cached = true;
-	cached_voices = voices;
+
 	return voices;
 }
 

From 127e55a3fe48cce77d109eb7ab3b308014a55700 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 19 Apr 2026 15:05:59 -0300
Subject: [PATCH 17/26] fix mac rate

Done by notimaginative
---
 code/sound/speech_mac.mm | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm
index d9baa5cb6b5..5aec8b4fe15 100644
--- a/code/sound/speech_mac.mm
+++ b/code/sound/speech_mac.mm
@@ -9,7 +9,7 @@
 
 static NSSpeechSynthesizer *synth = nil;
 static bool Speech_init = false;
-
+static int voice_default_rate = 200;
 
 bool speech_init()
 {
@@ -120,6 +120,13 @@ bool speech_set_voice(int voice)
 
 	[synth setVoice: [voices objectAtIndex:voice]];
 
+	// reset voice to defaults
+	[synth setObject:nil forProperty:NSSpeechResetProperty error:nil];
+
+	// get default rate for voice
+	NSNumber *voiceRate = [synth objectForProperty:NSSpeechRateProperty error:nil];
+	voice_default_rate = voiceRate ? [voiceRate intValue] : 200; // median normal rate as default
+
 	return true;
 }
 
@@ -129,12 +136,14 @@ bool speech_set_rate(float rate_percent)
         return false;
     }
 
-    // 180 wpm = normal
-    float rate = 180.0f * (rate_percent / 100.0f);
+	CAP(rate_percent, 25.0f, 300.f);
 
-    [synth setObject:[NSNumber numberWithFloat:rate]
-            forProperty:NSSpeechRateProperty
-                   error:nil];
+	int rate = fl2i(voice_default_rate * (rate_percent / 100.0f));
+
+	[synth
+		setObject:[NSNumber numberWithInt:rate]
+		forProperty:NSSpeechRateProperty error:nil
+	];
 
     return true;
 }

From 6338623569dff07a45f8451f17b7ce0ca5272e83 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 19 Apr 2026 15:43:05 -0300
Subject: [PATCH 18/26] requested changes

---
 CMakeLists.txt                |  6 +++-
 cmake/finder/FindSpeech.cmake |  2 +-
 code/sound/speech_linux.cpp   | 60 +++++++++++++++++------------------
 code/sound/speech_win.cpp     |  6 ++--
 code/source_groups.cmake      |  2 +-
 5 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9bf1923e2f0..6acedb4b79c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,7 +74,11 @@ IF(RESET_INSTALL_PREFIX)
 	ENDIF(NOT $ENV{FS2PATH} STREQUAL "")
 ENDIF(RESET_INSTALL_PREFIX)
 
-OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
+IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
+	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
+ELSE()
+	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF)
+ENDIF()
 
 IF (WIN32)
 	OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON)
diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake
index f8c28300833..c7cc6b50b4c 100644
--- a/cmake/finder/FindSpeech.cmake
+++ b/cmake/finder/FindSpeech.cmake
@@ -11,7 +11,7 @@ if (WIN32)
 	endif()
 elseif(APPLE)
 	# it should just work
-elseif(UNIX)
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	# uses speech-dispatcher with dlopen
 else()
 	message(SEND_ERROR "Text to Speech is not supported on this platform!")
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 4075cb16ece..6279fd39281 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -214,10 +214,7 @@ bool speech_set_rate(float rate_percent)
 
 	// 50 / +150 -> 100 = normal -> range -100 / +100
 	auto rate = static_cast<signed int>(rate_percent - 100.0f);
-	if (rate < -100)
-		rate = -100;
-	if (rate > 100)
-		rate = 100;
+	CAP(rate, -100, 100);
 
 	p_spd_set_voice_rate(spd, rate);
 	return true;
@@ -235,36 +232,39 @@ bool speech_is_speaking()
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
 	SCP_vector<SCP_string> fsoVoices;
-	
-	if (!ensure_speechd_lib()) {
-        return fsoVoices;
-    }
 
-    SPDConnection* connection = spd;
-    if ( !Speech_init ) {
-    	connection = p_spd_open("fso_voice_list", "client", nullptr, SPD_MODE_SINGLE);
-    	if (!connection) {
-        	mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
-        	return fsoVoices;
-    	}
+	if (!ensure_speechd_lib()) {
+		return fsoVoices;
 	}
 
-    SPDVoice** voices = p_spd_list_synthesis_voices(connection);
-    
-    for (int i = 0; voices[i] != nullptr; i++) {
-    	SCP_string lang = voices[i]->language;
-    	// There are too many we cant add them all
-    	// Only add English voices
-    	if(lang.find("en") == 0) {
-    		SCP_string voiceName;
-    		voiceName = voices[i]->name ? voices[i]->name : "unknown";
-        	fsoVoices.push_back(voiceName);
-        }
-    }
+	if (!Speech_init || !spd) {
+		mprintf(("Speech: Speech system is not initialized.\n"));
+		return fsoVoices;
+	}
 
-    p_free_spd_voices(voices);
-    if ( !Speech_init ) {
-    	p_spd_close(connection);
+	SPDVoice** voices = p_spd_list_synthesis_voices(spd);
+
+	if (voices)
+	{
+		int num_voices = 0;
+		//Count voices
+		while (voices[num_voices] != nullptr) {
+			num_voices++;
+		}
+
+		for (int i = 0; voices[i] != nullptr; i++) {
+			// There are too many we cant add them all
+			// Only add English voices
+			if (num_voices < 600 || (voices[i]->language && strncmp(voices[i]->language, "en", 2) == 0)) {
+				SCP_string voiceName = voices[i]->name ? voices[i]->name : "unknown";
+				fsoVoices.push_back(voiceName);
+			}
+		}
+		p_free_spd_voices(voices);
+	}
+	else
+	{
+		mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n"));
 	}
 
 	return fsoVoices;
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index 134b54b7f67..3814ee5bab9 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -180,10 +180,12 @@ bool speech_set_rate(float rate_percent)
 
 	// 50 / +150 -> 100 = normal -> range -10 / +10 
     auto rate = static_cast<long>((rate_percent - 100.0f) * 0.1f);
-	if (rate < -10)
+	if (rate < -10) {
 		rate = -10;
-	if (rate > 10)
+	}
+	else if (rate > 10) {
 		rate = 10;
+	}
 
 	return SUCCEEDED(Voice_device->SetRate(rate));
 }
diff --git a/code/source_groups.cmake b/code/source_groups.cmake
index 4930bb1c2eb..dbba52510dc 100644
--- a/code/source_groups.cmake
+++ b/code/source_groups.cmake
@@ -1636,7 +1636,7 @@ elseif (APPLE)
 		${file_root_sound}
 		sound/speech_mac.mm
 	)
-elseif (UNIX)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	add_file_folder("Sound"
 		${file_root_sound}
 		sound/speech_linux.cpp

From 191400f1061652b555fc0a3d0ad7db25131f0d35 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 19 Apr 2026 16:45:31 -0300
Subject: [PATCH 19/26] re-add voice cache for linux

---
 code/sound/speech_linux.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 6279fd39281..3fc4f1324ae 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -93,7 +93,8 @@ static bool ensure_speechd_lib()
 }
 
 // Speech handling starts here
-
+static SCP_vector<SCP_string> cached_voices;
+static bool voices_cached = false;
 static bool Speech_init = false;
 static SPDConnection* spd = nullptr;
 
@@ -129,6 +130,8 @@ void speech_deinit()
 		dlclose(lib_handle); 
 		lib_handle = nullptr; 
 	}
+	voices_cached = false;
+	cached_voices.clear();
 }
 
 bool speech_play(const SCP_string& text)
@@ -231,6 +234,10 @@ bool speech_is_speaking()
 
 SCP_vector<SCP_string> speech_enumerate_voices()
 {
+	if (voices_cached) {
+		return cached_voices;
+	}
+
 	SCP_vector<SCP_string> fsoVoices;
 
 	if (!ensure_speechd_lib()) {
@@ -255,7 +262,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 		for (int i = 0; voices[i] != nullptr; i++) {
 			// There are too many we cant add them all
 			// Only add English voices
-			if (num_voices < 600 || (voices[i]->language && strncmp(voices[i]->language, "en", 2) == 0)) {
+			if (num_voices < 600 || (voices[i]->language && strstr(voices[i]->language, "en") != nullptr)) {
 				SCP_string voiceName = voices[i]->name ? voices[i]->name : "unknown";
 				fsoVoices.push_back(voiceName);
 			}
@@ -267,6 +274,8 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 		mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n"));
 	}
 
+	voices_cached = true;
+	cached_voices = fsoVoices;
 	return fsoVoices;
 }
 

From 8470aa989e00dab7f26080d2f48fc27c903de669 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 19 Apr 2026 16:54:21 -0300
Subject: [PATCH 20/26] Open connection for linux get flags

---
 code/sound/speech_linux.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 3fc4f1324ae..9f50b093ace 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -244,12 +244,16 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 		return fsoVoices;
 	}
 
-	if (!Speech_init || !spd) {
-		mprintf(("Speech: Speech system is not initialized.\n"));
-		return fsoVoices;
+	SPDConnection* connection = spd;
+	if (!Speech_init) {
+		connection = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
+		if (!connection) {
+			mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
+			return fsoVoices;
+		}
 	}
 
-	SPDVoice** voices = p_spd_list_synthesis_voices(spd);
+	SPDVoice** voices = p_spd_list_synthesis_voices(connection);
 
 	if (voices)
 	{
@@ -274,6 +278,9 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 		mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n"));
 	}
 
+	if (!Speech_init) {
+		p_spd_close(connection);
+
 	voices_cached = true;
 	cached_voices = fsoVoices;
 	return fsoVoices;

From 4c41528fbc294b979d122d75ff1b5da40c1c145c Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Sun, 19 Apr 2026 16:56:15 -0300
Subject: [PATCH 21/26] fix missing }

---
 code/sound/speech_linux.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 9f50b093ace..8965be97901 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -280,6 +280,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 
 	if (!Speech_init) {
 		p_spd_close(connection);
+	}
 
 	voices_cached = true;
 	cached_voices = fsoVoices;

From 02da89d3eba19c4d6731a31d49ea93754a36016e Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 27 Apr 2026 23:10:35 -0300
Subject: [PATCH 22/26] change voice option combobox to std::pair

---
 code/options/Option.h   |  2 +-
 code/sound/fsspeech.cpp | 59 ++++++++++++++++++++++++++---------------
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/code/options/Option.h b/code/options/Option.h
index 44032a80f1a..791f1057107 100644
--- a/code/options/Option.h
+++ b/code/options/Option.h
@@ -608,7 +608,7 @@ class OptionBuilder {
 			_instance.setPreset(val.first, json_dump_string_new(_instance.getSerializer()(val.second),
 			                                                    JSON_COMPACT | JSON_ENSURE_ASCII | JSON_ENCODE_ANY));
 		}
-		auto opt_ptr = make_shared<Option<T>>(_instance);
+		auto opt_ptr = std::make_shared<Option<T>>(_instance);
 
 		if (std::holds_alternative<std::pair<const char*, int>>(_title)) {
 			const auto& xstr_info = std::get<std::pair<const char*, int>>(_title);
diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index c0d1b506753..bf3a74f46c2 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -12,7 +12,6 @@
 #include "sound/speech.h"
 #include "options/Option.h"
 
-
 extern int Cmdline_freespace_no_sound;
 
 const size_t MAX_SPEECH_BUFFER_LEN = 4096;
@@ -85,49 +84,65 @@ static bool ttsvolume_change(float new_val, bool initial)
 	return true;
 }
 
-static SCP_vector<int> ttsvoice_enumerator()
+static std::pair<int, SCP_string> ttsvoice_deserializer(const json_t* el)
+{
+	int id;
+	char* name = nullptr;
+
+	json_error_t err;
+	if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) {
+		throw json_exception(err);
+	}
+
+	return std::make_pair(id, name);
+}
+
+static json_t* ttsvoice_serializer(const std::pair<int, SCP_string>& value)
 {
-	SCP_vector<int> vals;
+	return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str());
+}
+
+static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
+{
+	SCP_vector< std::pair<int, SCP_string>> vals;
 	auto voices = speech_enumerate_voices();
-	for (int i = 0; i < static_cast<int>(voices.size()); ++i) {
-		vals.push_back(i);
+
+	if (voices.empty()) {
+		vals.emplace_back(std::make_pair(0, "No voices loaded"));
+	}
+	else {
+		for (int i = 0; i < static_cast<int>(voices.size()); ++i) {
+			vals.emplace_back(std::make_pair(i, voices[i]));
+		}
 	}
 	return vals;
 }
 
-static SCP_string ttsvoice_display(int id)
+static SCP_string ttsvoice_display(std::pair<int, SCP_string> vi)
 {
-	auto voices = speech_enumerate_voices();
-	if (voices.empty() || id < 0 || static_cast<size_t>(id) >= voices.size()) {
-        return "No voices loaded";
-    }
-    SCP_string out;
-	sprintf(out, "(%d) %s", id + 1, voices[id].c_str());
-	return out;
+	return vi.second;
 }
 
-static bool ttsvoice_change(int id, bool initial)
+static bool ttsvoice_change(std::pair<int, SCP_string> new_voice, bool initial)
 {
 	if (initial) {
 		return false;
 	}
-	auto voices = speech_enumerate_voices();
-	if (voices.empty() || id < 0 || static_cast<size_t>(id) >= voices.size()) {
-        return false;
-    }
-	speech_set_voice(id);
+	speech_set_voice(new_voice.first);
 	return true;
 }
 
-static auto SpeechVoiceOption = options::OptionBuilder<int>("Speech.Voice",
+static auto SpeechVoiceOption = options::OptionBuilder<std::pair<int, SCP_string>>("Speech.Voice",
 	std::pair<const char*, int>{"TTS Voice", 1915},
 	std::pair<const char*, int>{"The voice used to read text", 1916})
 	.category(std::make_pair("Audio", 1826))
 	.level(options::ExpertLevel::Beginner)
+	.default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value
 	.enumerator(ttsvoice_enumerator)
 	.display(ttsvoice_display)
+	.serializer(ttsvoice_serializer)
+	.deserializer(ttsvoice_deserializer)
 	.flags({ options::OptionFlags::ForceMultiValueSelection })
-	.default_val(0)
 	.change_listener(ttsvoice_change)
 	.importance(3)
 	.finish();
@@ -230,7 +245,7 @@ bool fsspeech_init()
 		FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
 		FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
 		speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
-		speech_set_voice(SpeechVoiceOption->getValue());
+		speech_set_voice(SpeechVoiceOption->getValue().first);
 		speech_set_rate(SpeechRateOption->getValue());
 	}
 	else 

From e90860fd78e7f78723e0379a71c0d6a50256830d Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Mon, 27 Apr 2026 23:12:39 -0300
Subject: [PATCH 23/26] delete duplicated voice id sanitizer on windows set
 voice

---
 code/sound/speech_win.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index 3814ee5bab9..c9621a86f24 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -131,12 +131,7 @@ bool speech_set_volume(unsigned short volume)
 }
 
 bool speech_set_voice(int voice)
-{
-	auto voices = speech_enumerate_voices();
-	if (voice < 0 || static_cast<size_t>(voice) >= voices.size()) {
-        return false;
-    }
-	
+{	
 	HRESULT                             hr;
 	CComPtr<ISpObjectToken>             cpVoiceToken;
 	CComPtr<IEnumSpObjectTokens>        cpEnum;

From afb7846954d0b4f6dd6c99995b731a57764d4203 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Tue, 28 Apr 2026 19:35:49 -0300
Subject: [PATCH 24/26] Use pairs for speech_enumerate_voices() and adapt linux
 speech

---
 code/cmdline/cmdline.cpp    |  2 +-
 code/sound/fsspeech.cpp     | 25 ++++++++++--------
 code/sound/speech.h         |  6 ++---
 code/sound/speech_linux.cpp | 52 ++++++++++---------------------------
 code/sound/speech_mac.mm    |  8 +++---
 code/sound/speech_win.cpp   |  7 ++---
 6 files changed, 40 insertions(+), 60 deletions(-)

diff --git a/code/cmdline/cmdline.cpp b/code/cmdline/cmdline.cpp
index 7dae2532cab..e200327a118 100644
--- a/code/cmdline/cmdline.cpp
+++ b/code/cmdline/cmdline.cpp
@@ -1414,7 +1414,7 @@ static json_t* json_get_v1() {
 		auto voices = speech_enumerate_voices();
 
 		for (auto& voice : voices) {
-			json_array_append_new(voices_array, json_string(voice.c_str()));
+			json_array_append_new(voices_array, json_string(voice.second.c_str()));
 		}
 
 		json_object_set_new(root, "voices", voices_array);
diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index bf3a74f46c2..6b78a4ed58a 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -102,33 +102,36 @@ static json_t* ttsvoice_serializer(const std::pair<int, SCP_string>& value)
 	return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str());
 }
 
+static SCP_vector<std::pair<int, SCP_string>> voice_list_cache;
+
 static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
 {
-	SCP_vector< std::pair<int, SCP_string>> vals;
-	auto voices = speech_enumerate_voices();
-
-	if (voices.empty()) {
-		vals.emplace_back(std::make_pair(0, "No voices loaded"));
+	if(voice_list_cache.empty()) {
+		auto voices = speech_enumerate_voices();
+	
+		if (voices.empty()) {
+			voices.emplace_back(std::make_pair(0, "No voices loaded"));
+		}
+		voice_list_cache = voices;
+		return voices;
 	}
 	else {
-		for (int i = 0; i < static_cast<int>(voices.size()); ++i) {
-			vals.emplace_back(std::make_pair(i, voices[i]));
-		}
+		return voice_list_cache;
 	}
-	return vals;
 }
 
-static SCP_string ttsvoice_display(std::pair<int, SCP_string> vi)
+static SCP_string ttsvoice_display(const std::pair<int, SCP_string> vi)
 {
 	return vi.second;
 }
 
-static bool ttsvoice_change(std::pair<int, SCP_string> new_voice, bool initial)
+static bool ttsvoice_change(const std::pair<int, SCP_string> new_voice, bool initial)
 {
 	if (initial) {
 		return false;
 	}
 	speech_set_voice(new_voice.first);
+	voice_list_cache.clear();
 	return true;
 }
 
diff --git a/code/sound/speech.h b/code/sound/speech.h
index 6f73c2f5264..07d7d9debf6 100644
--- a/code/sound/speech.h
+++ b/code/sound/speech.h
@@ -26,7 +26,7 @@ bool speech_set_rate(float rate);
 
 bool speech_is_speaking();
 
-SCP_vector<SCP_string> speech_enumerate_voices();
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices();
 
 #else
 
@@ -41,8 +41,8 @@ inline bool speech_set_voice(int /*voice*/) { return false; }
 inline bool speech_set_rate(float /*rate*/) { return false; } 
 inline bool speech_is_speaking() { return false; }
 
-inline SCP_vector<SCP_string> speech_enumerate_voices() {
-	return SCP_vector<SCP_string>();
+inline SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices() {
+	return SCP_vector<std::pair<int, SCP_string>>();
 }
 
 #endif
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
index 8965be97901..e996ecf22bb 100644
--- a/code/sound/speech_linux.cpp
+++ b/code/sound/speech_linux.cpp
@@ -93,8 +93,6 @@ static bool ensure_speechd_lib()
 }
 
 // Speech handling starts here
-static SCP_vector<SCP_string> cached_voices;
-static bool voices_cached = false;
 static bool Speech_init = false;
 static SPDConnection* spd = nullptr;
 
@@ -130,8 +128,6 @@ void speech_deinit()
 		dlclose(lib_handle); 
 		lib_handle = nullptr; 
 	}
-	voices_cached = false;
-	cached_voices.clear();
 }
 
 bool speech_play(const SCP_string& text)
@@ -204,7 +200,7 @@ bool speech_set_voice(int voice)
         return false;
     }
     
-	p_spd_set_synthesis_voice(spd, voices[voice].c_str());
+	p_spd_set_synthesis_voice(spd, voices[voice].second.c_str());
 	
 	return true;
 }
@@ -232,58 +228,38 @@ bool speech_is_speaking()
 	return false;
 }
 
-SCP_vector<SCP_string> speech_enumerate_voices()
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices()
 {
-	if (voices_cached) {
-		return cached_voices;
-	}
-
-	SCP_vector<SCP_string> fsoVoices;
-
-	if (!ensure_speechd_lib()) {
-		return fsoVoices;
-	}
+	SCP_vector<std::pair<int, SCP_string>> fsoVoices;
 
-	SPDConnection* connection = spd;
 	if (!Speech_init) {
-		connection = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
-		if (!connection) {
+		if (!ensure_speechd_lib()) {
+			return fsoVoices;
+		}
+		spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
+		if (!spd) {
 			mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
 			return fsoVoices;
 		}
 	}
 
-	SPDVoice** voices = p_spd_list_synthesis_voices(connection);
-
-	if (voices)
-	{
-		int num_voices = 0;
-		//Count voices
-		while (voices[num_voices] != nullptr) {
-			num_voices++;
-		}
+	SPDVoice** voices = p_spd_list_synthesis_voices(spd);
 
+	if (voices) {
 		for (int i = 0; voices[i] != nullptr; i++) {
-			// There are too many we cant add them all
-			// Only add English voices
-			if (num_voices < 600 || (voices[i]->language && strstr(voices[i]->language, "en") != nullptr)) {
-				SCP_string voiceName = voices[i]->name ? voices[i]->name : "unknown";
-				fsoVoices.push_back(voiceName);
-			}
+			fsoVoices.emplace_back(std::make_pair(i, voices[i]->name));
 		}
 		p_free_spd_voices(voices);
 	}
-	else
-	{
+	else {
 		mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n"));
 	}
 
 	if (!Speech_init) {
-		p_spd_close(connection);
+		p_spd_close(spd);
+		spd = nullptr;
 	}
 
-	voices_cached = true;
-	cached_voices = fsoVoices;
 	return fsoVoices;
 }
 
diff --git a/code/sound/speech_mac.mm b/code/sound/speech_mac.mm
index 5aec8b4fe15..cb18966ca37 100644
--- a/code/sound/speech_mac.mm
+++ b/code/sound/speech_mac.mm
@@ -157,17 +157,17 @@ bool speech_is_speaking()
 	return [synth isSpeaking];
 }
 
-SCP_vector<SCP_string> speech_enumerate_voices()
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices()
 {
 	NSArray *voices = [NSSpeechSynthesizer availableVoices];
 
-	SCP_vector<SCP_string> fsoVoices;
+	SCP_vector<std::pair<int, SCP_string>> fsoVoices;
 
+	int voiceID = 0;
 	for (NSString *voiceIdentifier in voices) {
 		NSDictionary *attributes = [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
 		NSString *name = [attributes objectForKey:NSVoiceName];
-
-		fsoVoices.push_back([name UTF8String]);
+		fsoVoices.emplace_back(std::make_pair(voiceID++, [name UTF8String]));
 	}
 
 	return fsoVoices;
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
index c9621a86f24..a3d723c093a 100644
--- a/code/sound/speech_win.cpp
+++ b/code/sound/speech_win.cpp
@@ -197,9 +197,9 @@ bool speech_is_speaking()
 	return (pStatus.dwRunningState != SPRS_DONE);
 }
 
-SCP_vector<SCP_string> speech_enumerate_voices()
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices()
 {
-	SCP_vector<SCP_string> voices;
+	SCP_vector<std::pair<int, SCP_string>> voices;
 
 	ISpObjectTokenCategory* comTokenCategory = nullptr;
 	IEnumSpObjectTokens* comVoices = nullptr;
@@ -231,6 +231,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 		return voices;
 	}
 
+	int voiceID = 0;
 	while (comVoicesCount > 0) {
 		ISpObjectToken* comAVoice = nullptr;
 
@@ -247,7 +248,7 @@ SCP_vector<SCP_string> speech_enumerate_voices()
 				SCP_string voiceName;
 				voiceName.resize(buffer_size);
 				WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr);
-				voices.push_back(voiceName);
+				voices.emplace_back(std::make_pair(voiceID++, voiceName));
 			}
 			CoTaskMemFree(id);
 		}

From 9fa88407066eac4f7256cb24b1e2c2b897482ae0 Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Tue, 28 Apr 2026 20:04:14 -0300
Subject: [PATCH 25/26] use reference

---
 code/sound/fsspeech.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 6b78a4ed58a..3b301a7a1f4 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -120,12 +120,12 @@ static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
 	}
 }
 
-static SCP_string ttsvoice_display(const std::pair<int, SCP_string> vi)
+static SCP_string ttsvoice_display(const std::pair<int, SCP_string>& vi)
 {
 	return vi.second;
 }
 
-static bool ttsvoice_change(const std::pair<int, SCP_string> new_voice, bool initial)
+static bool ttsvoice_change(const std::pair<int, SCP_string>& new_voice, bool initial)
 {
 	if (initial) {
 		return false;

From bf1512fd399671c17cb7ff143784a0d75a592def Mon Sep 17 00:00:00 2001
From: Salvador Cipolla <shivanuo@hotmail.com>
Date: Wed, 29 Apr 2026 20:21:58 -0300
Subject: [PATCH 26/26] actually free vector memory

---
 code/sound/fsspeech.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 3b301a7a1f4..02f23f221a5 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -132,6 +132,7 @@ static bool ttsvoice_change(const std::pair<int, SCP_string>& new_voice, bool in
 	}
 	speech_set_voice(new_voice.first);
 	voice_list_cache.clear();
+	voice_list_cache.shrink_to_fit();
 	return true;
 }