scp-fs2open · Shivansps · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -74,9 +74,11 @@ IF(RESET_INSTALL_PREFIX)
 	ENDIF(NOT $ENV{FS2PATH} STREQUAL "")
 ENDIF(RESET_INSTALL_PREFIX)
 
-IF(WIN32 OR APPLE)
+IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
-ENDIF(WIN32 OR APPLE)
+ELSE()
+	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF)
+ENDIF()
 
 IF (WIN32)
 	OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON)
@@ -227,9 +229,7 @@ include(package)
 include(doxygen)
 
 # Print used options to log
-IF(WIN32 OR APPLE)
-	message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
-ENDIF()
+message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
 IF (WIN32)
 	message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}")
 	message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}")

diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake
@@ -11,6 +11,8 @@ if (WIN32)
 	endif()
 elseif(APPLE)
 	# it should just work
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+	# uses speech-dispatcher with dlopen
 else()
 	message(SEND_ERROR "Text to Speech is not supported on this platform!")
 endif()

diff --git a/code/cmdline/cmdline.cpp b/code/cmdline/cmdline.cpp
@@ -1414,7 +1414,7 @@ static json_t* json_get_v1() {
 		auto voices = speech_enumerate_voices();
 
 		for (auto& voice : voices) {
-			json_array_append_new(voices_array, json_string(voice.c_str()));
+			json_array_append_new(voices_array, json_string(voice.second.c_str()));
 		}
 
 		json_object_set_new(root, "voices", voices_array);

diff --git a/code/localization/localize.cpp b/code/localization/localize.cpp
@@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr;
 // NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous,
 // but internal strings should still increment XSTR_SIZE to avoid collisions.
 // retail XSTR_SIZE = 1570
-// #define XSTR_SIZE	1915 // This is the next available ID
+// #define XSTR_SIZE	1929 // This is the next available ID
 
 // struct to allow for strings.tbl-determined x offset
 // offset is 0 for english, by default

diff --git a/code/options/Option.h b/code/options/Option.h
@@ -608,7 +608,7 @@ class OptionBuilder {
 			_instance.setPreset(val.first, json_dump_string_new(_instance.getSerializer()(val.second),
 			                                                    JSON_COMPACT | JSON_ENSURE_ASCII | JSON_ENCODE_ANY));
 		}
-		auto opt_ptr = make_shared<Option<T>>(_instance);
+		auto opt_ptr = std::make_shared<Option<T>>(_instance);
 
 		if (std::holds_alternative<std::pair<const char*, int>>(_title)) {
 			const auto& xstr_info = std::get<std::pair<const char*, int>>(_title);

diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
@@ -10,7 +10,7 @@
 #include "osapi/osregistry.h"
 #include "sound/fsspeech.h"
 #include "sound/speech.h"
-
+#include "options/Option.h"
 
 extern int Cmdline_freespace_no_sound;
 
@@ -30,6 +30,203 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] =
 char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = "";
 size_t  Speech_buffer_len;
 
+static bool ttsrate_change(float new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_rate(new_val);
+	return true;
+}
+
+static bool ttsingame_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val;
+	return true;
+}
+
+static bool ttsmulti_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val;
+	return true;
+}
+
+static bool ttsbriefing_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val;
+	return true;
+}
+
+static bool ttstechroom_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val;
+	return true;
+}
+
+static bool ttsvolume_change(float new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_volume((unsigned short) new_val);
+	return true;
+}
+
+static std::pair<int, SCP_string> ttsvoice_deserializer(const json_t* el)
+{
+	int id;
+	char* name = nullptr;
+
+	json_error_t err;
+	if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) {
+		throw json_exception(err);
+	}
+
+	return std::make_pair(id, name);
+}
+
+static json_t* ttsvoice_serializer(const std::pair<int, SCP_string>& value)
+{
+	return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str());
+}
+
+static SCP_vector<std::pair<int, SCP_string>> voice_list_cache;
+
+static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
+{
+	if(voice_list_cache.empty()) {
+		auto voices = speech_enumerate_voices();
+
+		if (voices.empty()) {
+			voices.emplace_back(std::make_pair(0, "No voices loaded"));
+		}
+		voice_list_cache = voices;
+		return voices;
+	}
+	else {
+		return voice_list_cache;
+	}
+}
+
+static SCP_string ttsvoice_display(const std::pair<int, SCP_string>& vi)
+{
+	return vi.second;
+}
+
+static bool ttsvoice_change(const std::pair<int, SCP_string>& new_voice, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_voice(new_voice.first);
+	voice_list_cache.clear();
+	voice_list_cache.shrink_to_fit();
+	return true;
+}
+
+static auto SpeechVoiceOption = options::OptionBuilder<std::pair<int, SCP_string>>("Speech.Voice",
+	std::pair<const char*, int>{"TTS Voice", 1915},
+	std::pair<const char*, int>{"The voice used to read text", 1916})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value
+	.enumerator(ttsvoice_enumerator)
+	.display(ttsvoice_display)
+	.serializer(ttsvoice_serializer)
+	.deserializer(ttsvoice_deserializer)
+	.flags({ options::OptionFlags::ForceMultiValueSelection })
+	.change_listener(ttsvoice_change)
+	.importance(3)
+	.finish();
+
+static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
+	std::pair<const char*, int>{"TTS Volume", 1917},
+	std::pair<const char*, int>{"Volume used for playing TTS speech", 1918})
+	.category(std::make_pair("Audio", 1826))
+	.range(0.0f, 100.0f)
+	.default_val(100.0f)
+	.change_listener(ttsvolume_change)
+	.importance(2)
+	.finish();
+
+static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
+	std::pair<const char*, int>{"TTS Rate", 1919},
+	std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", 1920})
+	.category(std::make_pair("Audio", 1826))
+	.range(50.0f, 150.0f)
+	.default_val(100.0f)
+	.change_listener(ttsrate_change)
+	.importance(1)
+	.finish();
+
+static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing",
+	std::pair<const char*, int>{"TTS in briefings", 1921},
+	std::pair<const char*, int>{"Enable or disable TTS in briefings", 1922})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsbriefing_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom",
+	std::pair<const char*, int>{"TTS in techroom", 1923},
+	std::pair<const char*, int>{"Enable or disable TTS in techroom", 1924})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttstechroom_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
+	std::pair<const char*, int>{"TTS in-game", 1925},
+	std::pair<const char*, int>{"Enable or disable TTS in-game", 1926})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsingame_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
+	std::pair<const char*, int>{"TTS in multiplayer", 1927},
+	std::pair<const char*, int>{"Enable or disable TTS in multiplayer", 1928})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsmulti_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+void sanitize_text(const char* input, SCP_string& output) {
+	output.clear();
+	bool saw_dollar = false;
+	for (auto ch : unicode::codepoint_range(input)) {
+		if (ch == UNICODE_CHAR('$')) {
+			saw_dollar = true;
+			continue;
+		}
+		else if (saw_dollar) {
+			saw_dollar = false;
+			continue;
+		}
+		unicode::encode(ch, std::back_inserter(output));
+	}
+}
+
 bool fsspeech_init()
 {
 	if (speech_inited) {
@@ -45,18 +242,33 @@ bool fsspeech_init()
 		return false;
 	}
 
-	// Get the settings from the registry
-	for(int i = 0; i < FSSPEECH_FROM_MAX; i++) {
-		FSSpeech_play_from[i] =
-			os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
-		nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
+	if (Using_in_game_options) 
+	{
+		FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
+		speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
+		speech_set_voice(SpeechVoiceOption->getValue().first);
+		speech_set_rate(SpeechRateOption->getValue());
+	}
+	else 
+	{
+		// Get the settings from the registry
+		for (int i = 0; i < FSSPEECH_FROM_MAX; i++) {
+			FSSpeech_play_from[i] = static_cast<bool>(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0));
+			nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
+		}
+
+		int volume = os_config_read_uint(nullptr, "SpeechVolume", 100);
+		speech_set_volume((unsigned short)volume);
+
+		int voice = os_config_read_uint(nullptr, "SpeechVoice", 0);
+		speech_set_voice(voice);
+
+		int rate = os_config_read_uint(nullptr, "SpeechRate", 100);
+		speech_set_rate(static_cast<float>(rate));
 	}
-
-	int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
-	speech_set_volume((unsigned short) volume);
-
-	int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
-	speech_set_voice(voice);
 
 	speech_inited = 1;
 
@@ -75,6 +287,11 @@ void fsspeech_deinit()
 
 void fsspeech_play(int type, const char *text)
 {
+	if (text == nullptr) {
+		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
+		return;
+	}
+
 	if (!speech_inited) {
 		nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n"));
 		return;
@@ -90,7 +307,10 @@ void fsspeech_play(int type, const char *text)
 		return;
 	}
 
-	speech_play(text);
+	SCP_string sanitized_string;
+	sanitize_text(text, sanitized_string);
+
+	speech_play(sanitized_string);
 }
 
 void fsspeech_stop()