Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9ad34dc
add imgui speech options
Shivansps Apr 5, 2026
839d6b6
adapt existing windows sapi speech implementation
Shivansps Apr 5, 2026
618cf58
adapt existing mac speech integration
Shivansps Apr 5, 2026
5a94f87
add speech linux stubs
Shivansps Apr 5, 2026
1efe01b
add speech support in linux
Shivansps Apr 6, 2026
ecacd4f
Add array checks
Shivansps Apr 6, 2026
ae8e56b
Use dlopen for speech-dispatcher
Shivansps Apr 6, 2026
191061d
corrrect lib name
Shivansps Apr 7, 2026
fc5a017
missing includes and static cast
Shivansps Apr 7, 2026
4d71c38
do not change mac file type
Shivansps Apr 7, 2026
0c3534c
fix clang tidy warnings 1
Shivansps Apr 7, 2026
be08a77
set tts rate
Shivansps Apr 8, 2026
5e564ad
set localization ids
Shivansps Apr 8, 2026
550ea1e
Merge branch 'master' into speech-rework
Shivansps Apr 8, 2026
205eaef
fix clang tidy warnings 2
Shivansps Apr 8, 2026
5b9d842
Merge branch 'speech-rework' of https://github.com/Shivansps/fs2open.…
Shivansps Apr 8, 2026
5d47980
correct symbol name
Shivansps Apr 9, 2026
0c27de6
Remove voice cache and fix win enumerate_voices overriding voice sele…
Shivansps Apr 19, 2026
127e55a
fix mac rate
Shivansps Apr 19, 2026
6338623
requested changes
Shivansps Apr 19, 2026
191400f
re-add voice cache for linux
Shivansps Apr 19, 2026
8470aa9
Open connection for linux get flags
Shivansps Apr 19, 2026
4c41528
fix missing }
Shivansps Apr 19, 2026
02da89d
change voice option combobox to std::pair
Shivansps Apr 28, 2026
e90860f
delete duplicated voice id sanitizer on windows set voice
Shivansps Apr 28, 2026
afb7846
Use pairs for speech_enumerate_voices() and adapt linux speech
Shivansps Apr 28, 2026
9fa8840
use reference
Shivansps Apr 28, 2026
bf1512f
actually free vector memory
Shivansps Apr 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,11 @@ IF(RESET_INSTALL_PREFIX)
ENDIF(NOT $ENV{FS2PATH} STREQUAL "")
ENDIF(RESET_INSTALL_PREFIX)

IF(WIN32 OR APPLE)
IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
ENDIF(WIN32 OR APPLE)
ELSE()
OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF)
ENDIF()

IF (WIN32)
OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON)
Expand Down Expand Up @@ -227,9 +229,7 @@ include(package)
include(doxygen)

# Print used options to log
IF(WIN32 OR APPLE)
message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
ENDIF()
message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
IF (WIN32)
message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}")
message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}")
Expand Down
2 changes: 2 additions & 0 deletions cmake/finder/FindSpeech.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ if (WIN32)
endif()
elseif(APPLE)
# it should just work
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
# uses speech-dispatcher with dlopen
else()
message(SEND_ERROR "Text to Speech is not supported on this platform!")
endif()
Expand Down
2 changes: 1 addition & 1 deletion code/cmdline/cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,7 @@ static json_t* json_get_v1() {
auto voices = speech_enumerate_voices();

for (auto& voice : voices) {
json_array_append_new(voices_array, json_string(voice.c_str()));
json_array_append_new(voices_array, json_string(voice.second.c_str()));
}

json_object_set_new(root, "voices", voices_array);
Expand Down
2 changes: 1 addition & 1 deletion code/localization/localize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr;
// NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous,
// but internal strings should still increment XSTR_SIZE to avoid collisions.
// retail XSTR_SIZE = 1570
// #define XSTR_SIZE 1915 // This is the next available ID
// #define XSTR_SIZE 1929 // This is the next available ID

// struct to allow for strings.tbl-determined x offset
// offset is 0 for english, by default
Expand Down
2 changes: 1 addition & 1 deletion code/options/Option.h
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ class OptionBuilder {
_instance.setPreset(val.first, json_dump_string_new(_instance.getSerializer()(val.second),
JSON_COMPACT | JSON_ENSURE_ASCII | JSON_ENCODE_ANY));
}
auto opt_ptr = make_shared<Option<T>>(_instance);
auto opt_ptr = std::make_shared<Option<T>>(_instance);

if (std::holds_alternative<std::pair<const char*, int>>(_title)) {
const auto& xstr_info = std::get<std::pair<const char*, int>>(_title);
Expand Down
246 changes: 233 additions & 13 deletions code/sound/fsspeech.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "osapi/osregistry.h"
#include "sound/fsspeech.h"
#include "sound/speech.h"

#include "options/Option.h"

extern int Cmdline_freespace_no_sound;

Expand All @@ -30,6 +30,203 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] =
char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = "";
size_t Speech_buffer_len;

static bool ttsrate_change(float new_val, bool initial)
{
if (initial) {
return false;
}
speech_set_rate(new_val);
return true;
}

static bool ttsingame_change(bool new_val, bool initial)
{
if (initial) {
return false;
}
FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val;
return true;
}

static bool ttsmulti_change(bool new_val, bool initial)
{
if (initial) {
return false;
}
FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val;
return true;
}

static bool ttsbriefing_change(bool new_val, bool initial)
{
if (initial) {
return false;
}
FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val;
return true;
}

static bool ttstechroom_change(bool new_val, bool initial)
{
if (initial) {
return false;
}
FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val;
return true;
}

static bool ttsvolume_change(float new_val, bool initial)
{
if (initial) {
return false;
}
speech_set_volume((unsigned short) new_val);
return true;
}

static std::pair<int, SCP_string> ttsvoice_deserializer(const json_t* el)
{
int id;
char* name = nullptr;

json_error_t err;
if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) {
throw json_exception(err);
}

return std::make_pair(id, name);
}

static json_t* ttsvoice_serializer(const std::pair<int, SCP_string>& value)
{
return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str());
}

static SCP_vector<std::pair<int, SCP_string>> voice_list_cache;

static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
{
if(voice_list_cache.empty()) {
auto voices = speech_enumerate_voices();

if (voices.empty()) {
voices.emplace_back(std::make_pair(0, "No voices loaded"));
}
voice_list_cache = voices;
return voices;
}
else {
return voice_list_cache;
}
}

static SCP_string ttsvoice_display(const std::pair<int, SCP_string>& vi)
{
return vi.second;
}

static bool ttsvoice_change(const std::pair<int, SCP_string>& new_voice, bool initial)
{
if (initial) {
return false;
}
speech_set_voice(new_voice.first);
voice_list_cache.clear();
voice_list_cache.shrink_to_fit();
return true;
}

static auto SpeechVoiceOption = options::OptionBuilder<std::pair<int, SCP_string>>("Speech.Voice",
std::pair<const char*, int>{"TTS Voice", 1915},
std::pair<const char*, int>{"The voice used to read text", 1916})
.category(std::make_pair("Audio", 1826))
.level(options::ExpertLevel::Beginner)
.default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value
.enumerator(ttsvoice_enumerator)
.display(ttsvoice_display)
.serializer(ttsvoice_serializer)
.deserializer(ttsvoice_deserializer)
.flags({ options::OptionFlags::ForceMultiValueSelection })
.change_listener(ttsvoice_change)
.importance(3)
.finish();

static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
std::pair<const char*, int>{"TTS Volume", 1917},
std::pair<const char*, int>{"Volume used for playing TTS speech", 1918})
.category(std::make_pair("Audio", 1826))
.range(0.0f, 100.0f)
.default_val(100.0f)
.change_listener(ttsvolume_change)
.importance(2)
.finish();

static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
std::pair<const char*, int>{"TTS Rate", 1919},
std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", 1920})
.category(std::make_pair("Audio", 1826))
.range(50.0f, 150.0f)
.default_val(100.0f)
.change_listener(ttsrate_change)
.importance(1)
.finish();

static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing",
std::pair<const char*, int>{"TTS in briefings", 1921},
std::pair<const char*, int>{"Enable or disable TTS in briefings", 1922})
.category(std::make_pair("Audio", 1826))
.level(options::ExpertLevel::Beginner)
.change_listener(ttsbriefing_change)
.default_val(true)
.importance(0)
.finish();

static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom",
std::pair<const char*, int>{"TTS in techroom", 1923},
std::pair<const char*, int>{"Enable or disable TTS in techroom", 1924})
.category(std::make_pair("Audio", 1826))
.level(options::ExpertLevel::Beginner)
.change_listener(ttstechroom_change)
.default_val(true)
.importance(0)
.finish();

static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
std::pair<const char*, int>{"TTS in-game", 1925},
std::pair<const char*, int>{"Enable or disable TTS in-game", 1926})
.category(std::make_pair("Audio", 1826))
.level(options::ExpertLevel::Beginner)
.change_listener(ttsingame_change)
.default_val(true)
.importance(0)
.finish();

static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
std::pair<const char*, int>{"TTS in multiplayer", 1927},
std::pair<const char*, int>{"Enable or disable TTS in multiplayer", 1928})
.category(std::make_pair("Audio", 1826))
.level(options::ExpertLevel::Beginner)
.change_listener(ttsmulti_change)
.default_val(true)
.importance(0)
.finish();

void sanitize_text(const char* input, SCP_string& output) {
output.clear();
bool saw_dollar = false;
for (auto ch : unicode::codepoint_range(input)) {
if (ch == UNICODE_CHAR('$')) {
saw_dollar = true;
continue;
}
else if (saw_dollar) {
saw_dollar = false;
continue;
}
unicode::encode(ch, std::back_inserter(output));
}
}

bool fsspeech_init()
{
if (speech_inited) {
Expand All @@ -45,18 +242,33 @@ bool fsspeech_init()
return false;
}

// Get the settings from the registry
for(int i = 0; i < FSSPEECH_FROM_MAX; i++) {
FSSpeech_play_from[i] =
os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
if (Using_in_game_options)
{
FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue();
FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue();
FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
speech_set_voice(SpeechVoiceOption->getValue().first);
speech_set_rate(SpeechRateOption->getValue());
}
else
{
// Get the settings from the registry
for (int i = 0; i < FSSPEECH_FROM_MAX; i++) {
FSSpeech_play_from[i] = static_cast<bool>(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0));
nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
}

int volume = os_config_read_uint(nullptr, "SpeechVolume", 100);
speech_set_volume((unsigned short)volume);

int voice = os_config_read_uint(nullptr, "SpeechVoice", 0);
speech_set_voice(voice);

int rate = os_config_read_uint(nullptr, "SpeechRate", 100);
speech_set_rate(static_cast<float>(rate));
}

int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
speech_set_volume((unsigned short) volume);

int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
speech_set_voice(voice);

speech_inited = 1;

Expand All @@ -75,6 +287,11 @@ void fsspeech_deinit()

void fsspeech_play(int type, const char *text)
{
if (text == nullptr) {
nprintf(("Speech", "Not playing speech because passed text is null.\n"));
return;
}

if (!speech_inited) {
nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n"));
return;
Expand All @@ -90,7 +307,10 @@ void fsspeech_play(int type, const char *text)
return;
}

speech_play(text);
SCP_string sanitized_string;
sanitize_text(text, sanitized_string);

speech_play(sanitized_string);
}

void fsspeech_stop()
Expand Down
Loading
Loading