Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ PUID=1000
PGID=1000
TZ=America/Vancouver
WHISPER_PORT=10300
WHISPER_MODEL=tiny-int8
WHISPER_MODEL=auto
WHISPER_LANG=auto
WHISPER_BEAM=1
WHISPER_BEAM=0

HARNESS_AGENT_HARNESS=codex
HARNESS_AGENT_COMMAND=
Expand Down
8 changes: 4 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
MOLTEN_HUB_DEFAULT_REPOSITORY: ${MOLTEN_HUB_DEFAULT_REPOSITORY:-}
MOLTEN_HUB_SPEECH_HOST: faster-whisper
MOLTEN_HUB_SPEECH_PORT: "10300"
MOLTEN_HUB_SPEECH_LANGUAGE: "${MOLTEN_HUB_SPEECH_LANGUAGE:-${WHISPER_LANG:-en}}"
MOLTEN_HUB_SPEECH_LANGUAGE: "${MOLTEN_HUB_SPEECH_LANGUAGE:-${WHISPER_LANG:-auto}}"
volumes:
- ./.moltenhub:/workspace/config
depends_on:
Expand All @@ -32,9 +32,9 @@ services:
PUID: "${PUID:-1000}"
PGID: "${PGID:-1000}"
TZ: "${TZ:-America/Vancouver}"
WHISPER_MODEL: "${WHISPER_MODEL:-tiny-int8}"
WHISPER_LANG: "${WHISPER_LANG:-${MOLTEN_HUB_SPEECH_LANGUAGE:-en}}"
WHISPER_BEAM: "${WHISPER_BEAM:-1}"
WHISPER_MODEL: "${WHISPER_MODEL:-auto}"
WHISPER_LANG: "${WHISPER_LANG:-${MOLTEN_HUB_SPEECH_LANGUAGE:-auto}}"
WHISPER_BEAM: "${WHISPER_BEAM:-0}"
volumes:
- ./.faster-whisper:/config
ports:
Expand Down
7 changes: 4 additions & 3 deletions docker/config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,16 @@ services:
HARNESS_AGENT_HARNESS: codex
MOLTEN_HUB_SPEECH_HOST: faster-whisper
MOLTEN_HUB_SPEECH_PORT: "10300"
MOLTEN_HUB_SPEECH_LANGUAGE: en
MOLTEN_HUB_SPEECH_LANGUAGE: auto
# Optional for Codex agent auth bootstrap:
OPENAI_API_KEY: ${OPENAI_API_KEY}

faster-whisper:
image: lscr.io/linuxserver/faster-whisper:latest
environment:
WHISPER_MODEL: tiny-int8
WHISPER_LANG: en
WHISPER_MODEL: auto
WHISPER_LANG: auto
WHISPER_BEAM: "0"
ports:
- "10300:10300"
```
5 changes: 5 additions & 0 deletions internal/web/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,11 @@ func TestHandlerIndexServesHTML(t *testing.T) {
if !strings.Contains(markup, `state.speech.enabled && state.speech.reachable`) {
t.Fatalf("expected index html to show whisper connection only when speech sidecar is reachable")
}
if !strings.Contains(markup, `function speechTranscriptionLanguage()`) ||
!strings.Contains(markup, `const language = speechTranscriptionLanguage();`) ||
!strings.Contains(markup, `/api/speech/transcribe?language=${encodeURIComponent(language)}`) {
t.Fatalf("expected index html to send browser language hints to whisper")
}
if !strings.Contains(markup, `setIndicator(hubConnItem, hubConnDot, hubConnText, "Molten Hub", online, text);`) {
t.Fatalf("expected index html to update hub indicator tooltip copy")
}
Expand Down
10 changes: 9 additions & 1 deletion internal/web/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -5256,7 +5256,8 @@ <h2 id="hub-setup-title">Connect to Hub</h2>
syncPromptSpeechButton();
setLocalPromptStatus("warn", "Transcribing...");
try {
const response = await fetch("/api/speech/transcribe?language=auto", {
const language = speechTranscriptionLanguage();
const response = await fetch(`/api/speech/transcribe?language=${encodeURIComponent(language)}`, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: concatSpeechBuffers(chunks),
Expand Down Expand Up @@ -5343,6 +5344,13 @@ <h2 id="hub-setup-title">Connect to Hub</h2>
return output;
}

function speechTranscriptionLanguage() {
const locales = Array.isArray(navigator.languages) ? navigator.languages : [];
const locale = String(locales[0] || navigator.language || "").toLowerCase();
const language = locale.split(/[-_]/)[0];
return /^[a-z]{2,3}$/.test(language) ? language : "auto";
}

function floatToSpeechPCM(input) {
const buffer = new ArrayBuffer(input.length * 2);
const view = new DataView(buffer);
Expand Down