From e3e589f0c96a8bd070d01692dc623e9d83879044 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 30 Apr 2026 21:32:30 +0000 Subject: [PATCH 1/5] Expose assistant audio streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kræn Hansen --- .changeset/tender-buses-yell.md | 5 + packages/client/src/OutputController.ts | 6 + packages/client/src/TextConversation.ts | 4 + packages/client/src/VoiceConversation.ts | 16 +- .../client/src/utils/WebRTCConnection.test.ts | 35 +++++ packages/client/src/utils/WebRTCConnection.ts | 26 ++++ packages/client/src/utils/output.ts | 23 ++- .../ConversationAudioStream.test.tsx | 137 ++++++++++++++++++ .../conversation/ConversationAudioStream.tsx | 57 ++++++++ .../src/conversation/ConversationProvider.tsx | 2 + .../react/src/conversation/useConversation.ts | 5 +- packages/react/src/index.ts | 2 + packages/react/src/test-globals.d.ts | 2 + packages/types/src/types.ts | 2 + 14 files changed, 318 insertions(+), 4 deletions(-) create mode 100644 .changeset/tender-buses-yell.md create mode 100644 packages/react/src/conversation/ConversationAudioStream.test.tsx create mode 100644 packages/react/src/conversation/ConversationAudioStream.tsx diff --git a/.changeset/tender-buses-yell.md b/.changeset/tender-buses-yell.md new file mode 100644 index 00000000..4eaf906e --- /dev/null +++ b/.changeset/tender-buses-yell.md @@ -0,0 +1,5 @@ +"@elevenlabs/client": patch +"@elevenlabs/react": patch +"@elevenlabs/types": patch + +Expose the assistant output audio stream on conversations and React hooks. diff --git a/packages/client/src/OutputController.ts b/packages/client/src/OutputController.ts index 700f34c7..e75ea1f2 100644 --- a/packages/client/src/OutputController.ts +++ b/packages/client/src/OutputController.ts @@ -4,11 +4,17 @@ export type OutputDeviceConfig = { outputDeviceId?: string; }; +export type AudioStreamListener = (stream: MediaStream | null) => void; + export interface OutputController { close(): Promise; setDevice(config?: Partial & OutputDeviceConfig): Promise; setVolume(volume: number): void; interrupt(resetDuration?: number): void; + /** Returns the assistant output audio stream, if one is available. */ + getAudioStream(): MediaStream | null; + addAudioStreamListener(listener: AudioStreamListener): void; + removeAudioStreamListener(listener: AudioStreamListener): void; /** * @deprecated AnalyserNode is a web-only API and will not work on all diff --git a/packages/client/src/TextConversation.ts b/packages/client/src/TextConversation.ts index 84b05441..9b43b908 100644 --- a/packages/client/src/TextConversation.ts +++ b/packages/client/src/TextConversation.ts @@ -24,6 +24,10 @@ export class TextConversation extends BaseConversation { return EMPTY_FREQUENCY_DATA; } + public getAudioStream(): MediaStream | null { + return null; + } + public getInputVolume(): number { return 0; } diff --git a/packages/client/src/VoiceConversation.ts b/packages/client/src/VoiceConversation.ts index b0a379d4..c573455b 100644 --- a/packages/client/src/VoiceConversation.ts +++ b/packages/client/src/VoiceConversation.ts @@ -13,7 +13,10 @@ import { type PartialOptions, } from "./BaseConversation.js"; import type { InputController } from "./InputController.js"; -import type { OutputController } from "./OutputController.js"; +import type { + AudioStreamListener, + OutputController, +} from "./OutputController.js"; import { setupStrategy } from "./platform/VoiceSessionSetup.js"; export class VoiceConversation extends BaseConversation { @@ -115,6 +118,10 @@ export class VoiceConversation extends BaseConversation { } }; + private handleAudioStream: AudioStreamListener = stream => { + this.options.onAudioStream?.(stream); + }; + protected constructor( options: Options, connection: BaseConnection, @@ -128,6 +135,8 @@ export class VoiceConversation extends BaseConversation { playbackEventTarget?.addListener(this.handlePlaybackEvent); + output.addAudioStreamListener(this.handleAudioStream); + if (wakeLock) { // Wake locks are automatically released when a page is hidden like when switching tabs // so attempt to re-acquire lock when page becomes visible again @@ -149,6 +158,7 @@ export class VoiceConversation extends BaseConversation { this.cleanUp(); this.playbackEventTarget?.removeListener(this.handlePlaybackEvent); this.playbackEventTarget = null; + this.output.removeAudioStreamListener(this.handleAudioStream); await super.handleEndSession(); if (this.visibilityChangeHandler) { @@ -225,6 +235,10 @@ export class VoiceConversation extends BaseConversation { return this.output.getVolume(); } + public getAudioStream(): MediaStream | null { + return this.output.getAudioStream(); + } + public async changeInputDevice({ sampleRate, format, diff --git a/packages/client/src/utils/WebRTCConnection.test.ts b/packages/client/src/utils/WebRTCConnection.test.ts index 4bc275a8..ebc19a03 100644 --- a/packages/client/src/utils/WebRTCConnection.test.ts +++ b/packages/client/src/utils/WebRTCConnection.test.ts @@ -298,4 +298,39 @@ describe("WebRTCConnection", () => { } } ); + + it("notifies audio stream listeners when the stream changes", async () => { + const mockRoom = new Room() as any; + (mockRoom.on as ReturnType).mockImplementation( + (event: string, callback: () => void) => { + if (event === "connected") { + queueMicrotask(callback); + } + } + ); + (mockRoom.once as ReturnType).mockImplementation( + (event: string, callback: () => void) => { + if (event === "signalConnected") { + queueMicrotask(callback); + } + } + ); + + const connection = await WebRTCConnection.create({ + conversationToken: "test-token", + connectionType: "webrtc", + }); + const listener = vi.fn(); + + connection.output.addAudioStreamListener(listener); + expect(listener).toHaveBeenCalledWith(null); + + const stream = { getTracks: () => [] } as unknown as MediaStream; + connection["setOutputAudioStream"](stream); + expect(connection.output.getAudioStream()).toBe(stream); + expect(listener).toHaveBeenLastCalledWith(stream); + + connection.close(); + expect(listener).toHaveBeenLastCalledWith(null); + }); }); diff --git a/packages/client/src/utils/WebRTCConnection.ts b/packages/client/src/utils/WebRTCConnection.ts index 76f5f419..ab45ae93 100644 --- a/packages/client/src/utils/WebRTCConnection.ts +++ b/packages/client/src/utils/WebRTCConnection.ts @@ -28,6 +28,7 @@ import { arrayBufferToBase64 } from "./audio.js"; import { loadRawAudioProcessor } from "./rawAudioProcessor.generated.js"; import type { InputController, InputDeviceConfig } from "../InputController.js"; import type { + AudioStreamListener, OutputController, OutputDeviceConfig, } from "../OutputController.js"; @@ -60,6 +61,8 @@ export class WebRTCConnection extends BaseConnection { private audioCaptureContext: AudioContext | null = null; private audioElements: HTMLAudioElement[] = []; private outputDeviceId: string | null = null; + private outputAudioStream: MediaStream | null = null; + private outputAudioStreamListeners = new Set(); private inputAnalyser: AnalyserNode | null = null; private inputAudioContext: AudioContext | null = null; @@ -200,6 +203,14 @@ export class WebRTCConnection extends BaseConnection { // Audio interruption is managed by the server/agent }, getAnalyser: () => this.outputAnalyser ?? undefined, + getAudioStream: () => this.outputAudioStream, + addAudioStreamListener: (listener: AudioStreamListener) => { + this.outputAudioStreamListeners.add(listener); + listener(this.outputAudioStream); + }, + removeAudioStreamListener: (listener: AudioStreamListener) => { + this.outputAudioStreamListeners.delete(listener); + }, getVolume: () => this.outputVolumeProvider.getVolume(), getByteFrequencyData: (buffer: Uint8Array) => { this.outputVolumeProvider.getByteFrequencyData(buffer); @@ -438,6 +449,12 @@ export class WebRTCConnection extends BaseConnection { // Store reference for volume control this.audioElements.push(audioElement); + // Expose the agent's remote track immediately; audio capture below is + // best-effort and may fail in non-browser environments. + this.setOutputAudioStream( + new MediaStream([remoteAudioTrack.mediaStreamTrack]) + ); + // Apply current volume if it exists (for when volume was set before audio track arrived) if (this.audioElements.length === 1) { // First audio element - trigger a callback to sync with current volume @@ -507,6 +524,7 @@ export class WebRTCConnection extends BaseConnection { }); this.audioCaptureContext = null; } + this.setOutputAudioStream(null); // Clean up audio elements this.audioElements.forEach(element => { @@ -601,6 +619,14 @@ export class WebRTCConnection extends BaseConnection { this.outputVolumeProvider = provider; } + private setOutputAudioStream(stream: MediaStream | null): void { + if (this.outputAudioStream === stream) { + return; + } + this.outputAudioStream = stream; + this.outputAudioStreamListeners.forEach(listener => listener(stream)); + } + private async setupAudioCapture(track: RemoteAudioTrack) { try { // Create audio context for processing diff --git a/packages/client/src/utils/output.ts b/packages/client/src/utils/output.ts index bdf82af1..81df0aee 100644 --- a/packages/client/src/utils/output.ts +++ b/packages/client/src/utils/output.ts @@ -3,6 +3,7 @@ import type { FormatConfig } from "./connection.js"; import type { AudioWorkletConfig } from "../BaseConversation.js"; import { addLibsamplerateModule } from "./addLibsamplerateModule.js"; import type { + AudioStreamListener, OutputController, OutputDeviceConfig, } from "../OutputController.js"; @@ -104,7 +105,8 @@ export class MediaDeviceOutput analyser, gain, worklet, - audioElement + audioElement, + destination.stream ); return newOutput; @@ -126,13 +128,15 @@ export class MediaDeviceOutput private interrupted = false; private interruptTimeout: ReturnType | null = null; private readonly volumeProvider: VolumeProvider; + private readonly audioStreamListeners = new Set(); private constructor( private readonly context: AudioContext, private readonly analyser: AnalyserNode, private readonly gain: GainNode, private readonly worklet: AudioWorkletNode, - private readonly audioElement: HTMLAudioElement + private readonly audioElement: HTMLAudioElement, + private readonly audioStream: MediaStream ) { // Start the MessagePort to enable addEventListener to work // (required when using addEventListener instead of onmessage) @@ -155,6 +159,19 @@ export class MediaDeviceOutput this.volumeProvider.getByteFrequencyData(buffer); } + public getAudioStream(): MediaStream { + return this.audioStream; + } + + public addAudioStreamListener(listener: AudioStreamListener): void { + this.audioStreamListeners.add(listener); + listener(this.audioStream); + } + + public removeAudioStreamListener(listener: AudioStreamListener): void { + this.audioStreamListeners.delete(listener); + } + public addListener(listener: PlaybackListener): void { this.worklet.port.addEventListener("message", listener); } @@ -235,6 +252,8 @@ export class MediaDeviceOutput if (this.audioElement.parentNode) { this.audioElement.parentNode.removeChild(this.audioElement); } + this.audioStreamListeners.forEach(listener => listener(null)); + this.audioStreamListeners.clear(); this.audioElement.pause(); await this.context.close(); } diff --git a/packages/react/src/conversation/ConversationAudioStream.test.tsx b/packages/react/src/conversation/ConversationAudioStream.test.tsx new file mode 100644 index 00000000..aeb00a83 --- /dev/null +++ b/packages/react/src/conversation/ConversationAudioStream.test.tsx @@ -0,0 +1,137 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import React, { useContext } from "react"; +import { renderHook, act } from "@testing-library/react"; +import { + Conversation, + type Callbacks, + type ConversationLifecycleOptions, +} from "@elevenlabs/client"; +import { ConversationProvider } from "./ConversationProvider.js"; +import { + ConversationContext, + type ConversationContextValue, +} from "./ConversationContext.js"; +import { useConversationAudioStream } from "./ConversationAudioStream.js"; +import { useConversation } from "./useConversation.js"; + +vi.mock("@elevenlabs/client", async importOriginal => { + const actual = await importOriginal(); + return { ...actual, Conversation: { startSession: vi.fn() } }; +}); + +const createMockConversation = (audioStream: MediaStream | null = null) => + ({ + getId: vi.fn().mockReturnValue("test-id"), + endSession: vi.fn().mockResolvedValue(undefined), + setMicMuted: vi.fn(), + setVolume: vi.fn(), + getAudioStream: vi.fn().mockReturnValue(audioStream), + }) as unknown as Conversation; + +function useTestHook() { + const ctx = useContext(ConversationContext) as ConversationContextValue; + const audioStream = useConversationAudioStream(); + return { startSession: ctx.startSession, audioStream }; +} + +function createWrapper(props: Record = {}) { + return function Wrapper({ children }: React.PropsWithChildren) { + return {children}; + }; +} + +type MockStartSessionOptions = Partial & + Record; + +describe("useConversationAudioStream", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("throws when used outside a ConversationProvider", () => { + expect(() => renderHook(() => useConversationAudioStream())).toThrow( + "useConversationAudioStream must be used within a ConversationProvider" + ); + }); + + it("returns null initially", () => { + const { result } = renderHook(() => useConversationAudioStream(), { + wrapper: createWrapper(), + }); + + expect(result.current.audioStream).toBeNull(); + }); + + it("updates when onAudioStream fires", async () => { + const mockConversation = createMockConversation(); + vi.mocked(Conversation.startSession).mockResolvedValue(mockConversation); + + const { result } = renderHook(() => useTestHook(), { + wrapper: createWrapper(), + }); + + await act(async () => { + result.current.startSession(); + }); + + const [[opts]] = vi.mocked(Conversation.startSession).mock + .calls as [[MockStartSessionOptions]]; + const stream = {} as MediaStream; + + act(() => { + opts.onAudioStream?.(stream); + }); + + expect(result.current.audioStream.audioStream).toBe(stream); + }); + + it("clears the stream when the session disconnects", async () => { + const stream = {} as MediaStream; + const mockConversation = createMockConversation(stream); + vi.mocked(Conversation.startSession).mockResolvedValue(mockConversation); + + const { result } = renderHook(() => useTestHook(), { + wrapper: createWrapper(), + }); + + await act(async () => { + result.current.startSession(); + }); + + const [[opts]] = vi.mocked(Conversation.startSession).mock + .calls as [[MockStartSessionOptions]]; + + act(() => { + opts.onAudioStream?.(stream); + }); + expect(result.current.audioStream.audioStream).toBe(stream); + + act(() => { + opts.onDisconnect?.({ reason: "user" }); + }); + expect(result.current.audioStream.audioStream).toBeNull(); + }); + + it("is included in useConversation", async () => { + const mockConversation = createMockConversation(); + vi.mocked(Conversation.startSession).mockResolvedValue(mockConversation); + + const { result } = renderHook(() => useConversation(), { + wrapper: createWrapper(), + }); + + await act(async () => { + result.current.startSession(); + }); + + const [[opts]] = vi.mocked(Conversation.startSession).mock + .calls as [[MockStartSessionOptions]]; + const stream = {} as MediaStream; + + act(() => { + opts.onAudioStream?.(stream); + }); + + expect(result.current.audioStream).toBe(stream); + }); +}); diff --git a/packages/react/src/conversation/ConversationAudioStream.tsx b/packages/react/src/conversation/ConversationAudioStream.tsx new file mode 100644 index 00000000..c5f8e625 --- /dev/null +++ b/packages/react/src/conversation/ConversationAudioStream.tsx @@ -0,0 +1,57 @@ +import { createContext, useContext, useMemo, useState } from "react"; +import { useRegisterCallbacks } from "./ConversationContext.js"; + +export type ConversationAudioStreamValue = { + audioStream: MediaStream | null; +}; + +const ConversationAudioStreamContext = + createContext(null); + +/** + * Tracks the assistant output audio stream exposed by the active conversation. + * Must be rendered inside a `ConversationProvider`. + */ +export function ConversationAudioStreamProvider({ + children, +}: React.PropsWithChildren) { + const [audioStream, setAudioStream] = useState(null); + + useRegisterCallbacks({ + onAudioStream(stream) { + setAudioStream(stream); + }, + onDisconnect() { + setAudioStream(null); + }, + }); + + const value = useMemo( + () => ({ + audioStream, + }), + [audioStream] + ); + + return ( + + {children} + + ); +} + +/** + * Returns the assistant output audio stream, or `null` before a stream is + * available. Re-renders when the stream changes. + * + * Must be used within a `ConversationProvider`. + */ +export function useConversationAudioStream(): ConversationAudioStreamValue { + const ctx = useContext(ConversationAudioStreamContext); + if (!ctx) { + throw new Error( + "useConversationAudioStream must be used within a ConversationProvider" + ); + } + return ctx; +} diff --git a/packages/react/src/conversation/ConversationProvider.tsx b/packages/react/src/conversation/ConversationProvider.tsx index ca76890a..d360f279 100644 --- a/packages/react/src/conversation/ConversationProvider.tsx +++ b/packages/react/src/conversation/ConversationProvider.tsx @@ -33,6 +33,7 @@ import { } from "./ConversationInput.js"; import { ConversationModeProvider } from "./ConversationMode.js"; import { ConversationFeedbackProvider } from "./ConversationFeedback.js"; +import { ConversationAudioStreamProvider } from "./ConversationAudioStream.js"; import { ConversationClientToolsProvider, buildClientTools, @@ -50,6 +51,7 @@ const SUB_PROVIDERS_WITHOUT_PROPS: React.ComponentType[ ConversationStatusProvider, ConversationModeProvider, ConversationFeedbackProvider, + ConversationAudioStreamProvider, ConversationClientToolsProvider, ]; diff --git a/packages/react/src/conversation/useConversation.ts b/packages/react/src/conversation/useConversation.ts index 30c1c101..96c2a72d 100644 --- a/packages/react/src/conversation/useConversation.ts +++ b/packages/react/src/conversation/useConversation.ts @@ -6,6 +6,7 @@ import { useConversationStatus } from "./ConversationStatus.js"; import { useConversationInput } from "./ConversationInput.js"; import { useConversationMode } from "./ConversationMode.js"; import { useConversationFeedback } from "./ConversationFeedback.js"; +import { useConversationAudioStream } from "./ConversationAudioStream.js"; import { useRawConversation, useRegisterCallbacks, @@ -45,6 +46,7 @@ export function useConversation(props: UseConversationOptions = {}) { const { isMuted, setMuted } = useConversationInput(); const { mode, isSpeaking, isListening } = useConversationMode(); const { canSendFeedback, sendFeedback } = useConversationFeedback(); + const { audioStream } = useConversationAudioStream(); const startSession = useCallback( (options?: HookOptions) => { @@ -64,7 +66,7 @@ export function useConversation(props: UseConversationOptions = {}) { ...options, } as HookOptions); }, - [controls, hookOptionsRef] + [controls] ); const conversation = useRawConversation(); @@ -91,6 +93,7 @@ export function useConversation(props: UseConversationOptions = {}) { mode, isSpeaking, isListening, + audioStream, canSendFeedback, sendFeedback, }; diff --git a/packages/react/src/index.ts b/packages/react/src/index.ts index d560b121..cfb177d2 100644 --- a/packages/react/src/index.ts +++ b/packages/react/src/index.ts @@ -27,6 +27,7 @@ export { ConversationProvider } from "./conversation/ConversationProvider.js"; export { useConversationControls } from "./conversation/ConversationControls.js"; export { useConversationStatus } from "./conversation/ConversationStatus.js"; export { useConversationInput } from "./conversation/ConversationInput.js"; +export { useConversationAudioStream } from "./conversation/ConversationAudioStream.js"; export { useConversationMode } from "./conversation/ConversationMode.js"; export { useConversationFeedback } from "./conversation/ConversationFeedback.js"; export { useRawConversation } from "./conversation/ConversationContext.js"; @@ -35,6 +36,7 @@ export { useConversationClientTool } from "./conversation/ConversationClientTool export type { UseConversationOptions } from "./conversation/useConversation.js"; export type { ConversationControlsValue } from "./conversation/ConversationControls.js"; export type { ConversationInputValue } from "./conversation/ConversationInput.js"; +export type { ConversationAudioStreamValue } from "./conversation/ConversationAudioStream.js"; export type { ConversationStatus, ConversationStatusValue, diff --git a/packages/react/src/test-globals.d.ts b/packages/react/src/test-globals.d.ts index a21dd352..fb2bc40d 100644 --- a/packages/react/src/test-globals.d.ts +++ b/packages/react/src/test-globals.d.ts @@ -1 +1,3 @@ +/// + declare const console: Pick; diff --git a/packages/types/src/types.ts b/packages/types/src/types.ts index 22a8a729..ced6994b 100644 --- a/packages/types/src/types.ts +++ b/packages/types/src/types.ts @@ -61,6 +61,7 @@ export type Callbacks = { onError?: (message: string, context?: any) => void; onMessage?: (props: MessagePayload) => void; onAudio?: (base64Audio: string) => void; + onAudioStream?: (stream: MediaStream | null) => void; onModeChange?: (prop: { mode: Mode }) => void; onStatusChange?: (prop: { status: Status }) => void; onCanSendFeedbackChange?: (prop: { canSendFeedback: boolean }) => void; @@ -110,6 +111,7 @@ export const CALLBACK_KEYS = [ "onError", "onMessage", "onAudio", + "onAudioStream", "onModeChange", "onStatusChange", "onCanSendFeedbackChange", From a027233aee23481a1271936cb678f9c2f6a8ad98 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 30 Apr 2026 21:34:14 +0000 Subject: [PATCH 2/5] Fix React audio stream types --- .../react/src/conversation/ConversationAudioStream.test.tsx | 6 +++--- packages/react/src/conversation/types.ts | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/react/src/conversation/ConversationAudioStream.test.tsx b/packages/react/src/conversation/ConversationAudioStream.test.tsx index aeb00a83..3799dbf9 100644 --- a/packages/react/src/conversation/ConversationAudioStream.test.tsx +++ b/packages/react/src/conversation/ConversationAudioStream.test.tsx @@ -75,7 +75,7 @@ describe("useConversationAudioStream", () => { }); const [[opts]] = vi.mocked(Conversation.startSession).mock - .calls as [[MockStartSessionOptions]]; + .calls as unknown as [[MockStartSessionOptions]]; const stream = {} as MediaStream; act(() => { @@ -99,7 +99,7 @@ describe("useConversationAudioStream", () => { }); const [[opts]] = vi.mocked(Conversation.startSession).mock - .calls as [[MockStartSessionOptions]]; + .calls as unknown as [[MockStartSessionOptions]]; act(() => { opts.onAudioStream?.(stream); @@ -125,7 +125,7 @@ describe("useConversationAudioStream", () => { }); const [[opts]] = vi.mocked(Conversation.startSession).mock - .calls as [[MockStartSessionOptions]]; + .calls as unknown as [[MockStartSessionOptions]]; const stream = {} as MediaStream; act(() => { diff --git a/packages/react/src/conversation/types.ts b/packages/react/src/conversation/types.ts index d3c3b870..df4fb967 100644 --- a/packages/react/src/conversation/types.ts +++ b/packages/react/src/conversation/types.ts @@ -26,6 +26,7 @@ export type HookCallbacks = Pick< | "onError" | "onMessage" | "onAudio" + | "onAudioStream" | "onModeChange" | "onStatusChange" | "onCanSendFeedbackChange" From dc15b277f2d34841a8e1753050ec26c3d1b7b5a3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 30 Apr 2026 22:38:10 +0000 Subject: [PATCH 3/5] Add symmetric conversation audio streams --- .changeset/tender-buses-yell.md | 2 +- packages/client/src/AudioStream.ts | 1 + packages/client/src/BaseConversation.test.ts | 6 +++ packages/client/src/BaseConversation.ts | 4 ++ packages/client/src/InputController.ts | 5 ++ packages/client/src/OutputController.ts | 9 ++-- packages/client/src/TextConversation.ts | 6 ++- packages/client/src/VoiceConversation.ts | 28 ++++++---- packages/client/src/index.ts | 1 + .../client/src/utils/WebRTCConnection.test.ts | 45 ++++++++++++++-- packages/client/src/utils/WebRTCConnection.ts | 54 ++++++++++++++++--- packages/client/src/utils/input.ts | 20 +++++++ packages/client/src/utils/output.ts | 18 +++---- .../ConversationAudioStream.test.tsx | 51 +++++++++++------- .../conversation/ConversationAudioStream.tsx | 29 ++++++---- packages/react/src/conversation/types.ts | 3 +- .../react/src/conversation/useConversation.ts | 5 +- packages/types/src/types.ts | 6 ++- 18 files changed, 223 insertions(+), 70 deletions(-) create mode 100644 packages/client/src/AudioStream.ts diff --git a/.changeset/tender-buses-yell.md b/.changeset/tender-buses-yell.md index 4eaf906e..6b3db294 100644 --- a/.changeset/tender-buses-yell.md +++ b/.changeset/tender-buses-yell.md @@ -2,4 +2,4 @@ "@elevenlabs/react": patch "@elevenlabs/types": patch -Expose the assistant output audio stream on conversations and React hooks. +Expose input and output audio streams on conversations and React hooks. diff --git a/packages/client/src/AudioStream.ts b/packages/client/src/AudioStream.ts new file mode 100644 index 00000000..10715f0e --- /dev/null +++ b/packages/client/src/AudioStream.ts @@ -0,0 +1 @@ +export type AudioStreamListener = (stream: MediaStream | null) => void; diff --git a/packages/client/src/BaseConversation.test.ts b/packages/client/src/BaseConversation.test.ts index c8b493a3..11a1f9d1 100644 --- a/packages/client/src/BaseConversation.test.ts +++ b/packages/client/src/BaseConversation.test.ts @@ -42,6 +42,12 @@ class TestConversation extends BaseConversation { public getOutputByteFrequencyData(): Uint8Array { return new Uint8Array(0); } + public getInputAudioStream(): MediaStream | null { + return null; + } + public getOutputAudioStream(): MediaStream | null { + return null; + } public getInputVolume(): number { return 0; } diff --git a/packages/client/src/BaseConversation.ts b/packages/client/src/BaseConversation.ts index 3e696a9a..09119ebd 100644 --- a/packages/client/src/BaseConversation.ts +++ b/packages/client/src/BaseConversation.ts @@ -156,6 +156,10 @@ export abstract class BaseConversation { return this.endSessionWithDetails({ reason: "user" }); } + public abstract getInputAudioStream(): MediaStream | null; + + public abstract getOutputAudioStream(): MediaStream | null; + private endSessionWithDetails = async (details: DisconnectionDetails) => { if (this.status !== "connected" && this.status !== "connecting") return; this.updateStatus("disconnecting"); diff --git a/packages/client/src/InputController.ts b/packages/client/src/InputController.ts index a1855c3d..b2be9db4 100644 --- a/packages/client/src/InputController.ts +++ b/packages/client/src/InputController.ts @@ -1,4 +1,5 @@ import type { FormatConfig } from "./utils/BaseConnection.js"; +import type { AudioStreamListener } from "./AudioStream.js"; export type InputDeviceConfig = { inputDeviceId?: string; @@ -10,6 +11,10 @@ export interface InputController { setDevice(config?: Partial & InputDeviceConfig): Promise; setMuted(isMuted: boolean): Promise; isMuted(): boolean; + /** Returns the user input audio stream, if one is available. */ + getInputAudioStream(): MediaStream | null; + addInputAudioStreamListener(listener: AudioStreamListener): void; + removeInputAudioStreamListener(listener: AudioStreamListener): void; /** * @deprecated AnalyserNode is a web-only API and will not work on all diff --git a/packages/client/src/OutputController.ts b/packages/client/src/OutputController.ts index e75ea1f2..6b1ddb50 100644 --- a/packages/client/src/OutputController.ts +++ b/packages/client/src/OutputController.ts @@ -1,20 +1,19 @@ import type { FormatConfig } from "./utils/BaseConnection.js"; +import type { AudioStreamListener } from "./AudioStream.js"; export type OutputDeviceConfig = { outputDeviceId?: string; }; -export type AudioStreamListener = (stream: MediaStream | null) => void; - export interface OutputController { close(): Promise; setDevice(config?: Partial & OutputDeviceConfig): Promise; setVolume(volume: number): void; interrupt(resetDuration?: number): void; /** Returns the assistant output audio stream, if one is available. */ - getAudioStream(): MediaStream | null; - addAudioStreamListener(listener: AudioStreamListener): void; - removeAudioStreamListener(listener: AudioStreamListener): void; + getOutputAudioStream(): MediaStream | null; + addOutputAudioStreamListener(listener: AudioStreamListener): void; + removeOutputAudioStreamListener(listener: AudioStreamListener): void; /** * @deprecated AnalyserNode is a web-only API and will not work on all diff --git a/packages/client/src/TextConversation.ts b/packages/client/src/TextConversation.ts index 9b43b908..f420d56e 100644 --- a/packages/client/src/TextConversation.ts +++ b/packages/client/src/TextConversation.ts @@ -24,7 +24,11 @@ export class TextConversation extends BaseConversation { return EMPTY_FREQUENCY_DATA; } - public getAudioStream(): MediaStream | null { + public getInputAudioStream(): MediaStream | null { + return null; + } + + public getOutputAudioStream(): MediaStream | null { return null; } diff --git a/packages/client/src/VoiceConversation.ts b/packages/client/src/VoiceConversation.ts index c573455b..faf615f2 100644 --- a/packages/client/src/VoiceConversation.ts +++ b/packages/client/src/VoiceConversation.ts @@ -13,10 +13,8 @@ import { type PartialOptions, } from "./BaseConversation.js"; import type { InputController } from "./InputController.js"; -import type { - AudioStreamListener, - OutputController, -} from "./OutputController.js"; +import type { AudioStreamListener } from "./AudioStream.js"; +import type { OutputController } from "./OutputController.js"; import { setupStrategy } from "./platform/VoiceSessionSetup.js"; export class VoiceConversation extends BaseConversation { @@ -118,8 +116,12 @@ export class VoiceConversation extends BaseConversation { } }; - private handleAudioStream: AudioStreamListener = stream => { - this.options.onAudioStream?.(stream); + private handleInputAudioStream: AudioStreamListener = stream => { + this.options.onInputAudioStream?.(stream); + }; + + private handleOutputAudioStream: AudioStreamListener = stream => { + this.options.onOutputAudioStream?.(stream); }; protected constructor( @@ -135,7 +137,8 @@ export class VoiceConversation extends BaseConversation { playbackEventTarget?.addListener(this.handlePlaybackEvent); - output.addAudioStreamListener(this.handleAudioStream); + input.addInputAudioStreamListener(this.handleInputAudioStream); + output.addOutputAudioStreamListener(this.handleOutputAudioStream); if (wakeLock) { // Wake locks are automatically released when a page is hidden like when switching tabs @@ -158,7 +161,8 @@ export class VoiceConversation extends BaseConversation { this.cleanUp(); this.playbackEventTarget?.removeListener(this.handlePlaybackEvent); this.playbackEventTarget = null; - this.output.removeAudioStreamListener(this.handleAudioStream); + this.input.removeInputAudioStreamListener(this.handleInputAudioStream); + this.output.removeOutputAudioStreamListener(this.handleOutputAudioStream); await super.handleEndSession(); if (this.visibilityChangeHandler) { @@ -235,8 +239,12 @@ export class VoiceConversation extends BaseConversation { return this.output.getVolume(); } - public getAudioStream(): MediaStream | null { - return this.output.getAudioStream(); + public getInputAudioStream(): MediaStream | null { + return this.input.getInputAudioStream(); + } + + public getOutputAudioStream(): MediaStream | null { + return this.output.getOutputAudioStream(); } public async changeInputDevice({ diff --git a/packages/client/src/index.ts b/packages/client/src/index.ts index 8091dc77..f8c9bf2b 100644 --- a/packages/client/src/index.ts +++ b/packages/client/src/index.ts @@ -21,6 +21,7 @@ export type { OutputController, OutputDeviceConfig, } from "./OutputController.js"; +export type { AudioStreamListener } from "./AudioStream.js"; export type { InputConfig } from "./utils/input.js"; export type { OutputConfig } from "./utils/output.js"; export type { diff --git a/packages/client/src/utils/WebRTCConnection.test.ts b/packages/client/src/utils/WebRTCConnection.test.ts index ebc19a03..fbd11718 100644 --- a/packages/client/src/utils/WebRTCConnection.test.ts +++ b/packages/client/src/utils/WebRTCConnection.test.ts @@ -174,7 +174,9 @@ describe("WebRTCConnection", () => { vi.stubGlobal("AudioContext", MockAudioContext); vi.stubGlobal( "MediaStream", - vi.fn((tracks: unknown[]) => ({ getTracks: () => tracks })) + vi.fn(function MediaStream(tracks: unknown[]) { + return { getTracks: () => tracks }; + }) ); const connection = await WebRTCConnection.create({ @@ -299,7 +301,7 @@ describe("WebRTCConnection", () => { } ); - it("notifies audio stream listeners when the stream changes", async () => { + it("notifies output audio stream listeners when the stream changes", async () => { const mockRoom = new Room() as any; (mockRoom.on as ReturnType).mockImplementation( (event: string, callback: () => void) => { @@ -322,12 +324,47 @@ describe("WebRTCConnection", () => { }); const listener = vi.fn(); - connection.output.addAudioStreamListener(listener); + connection.output.addOutputAudioStreamListener(listener); expect(listener).toHaveBeenCalledWith(null); const stream = { getTracks: () => [] } as unknown as MediaStream; connection["setOutputAudioStream"](stream); - expect(connection.output.getAudioStream()).toBe(stream); + expect(connection.output.getOutputAudioStream()).toBe(stream); + expect(listener).toHaveBeenLastCalledWith(stream); + + connection.close(); + expect(listener).toHaveBeenLastCalledWith(null); + }); + + it("notifies input audio stream listeners when the stream changes", async () => { + const mockRoom = new Room() as any; + (mockRoom.on as ReturnType).mockImplementation( + (event: string, callback: () => void) => { + if (event === "connected") { + queueMicrotask(callback); + } + } + ); + (mockRoom.once as ReturnType).mockImplementation( + (event: string, callback: () => void) => { + if (event === "signalConnected") { + queueMicrotask(callback); + } + } + ); + + const connection = await WebRTCConnection.create({ + conversationToken: "test-token", + connectionType: "webrtc", + }); + const listener = vi.fn(); + + connection.input.addInputAudioStreamListener(listener); + expect(listener).toHaveBeenCalledWith(null); + + const stream = { getTracks: () => [] } as unknown as MediaStream; + connection["setInputAudioStream"](stream); + expect(connection.input.getInputAudioStream()).toBe(stream); expect(listener).toHaveBeenLastCalledWith(stream); connection.close(); diff --git a/packages/client/src/utils/WebRTCConnection.ts b/packages/client/src/utils/WebRTCConnection.ts index ab45ae93..bab09534 100644 --- a/packages/client/src/utils/WebRTCConnection.ts +++ b/packages/client/src/utils/WebRTCConnection.ts @@ -27,8 +27,8 @@ import { import { arrayBufferToBase64 } from "./audio.js"; import { loadRawAudioProcessor } from "./rawAudioProcessor.generated.js"; import type { InputController, InputDeviceConfig } from "../InputController.js"; +import type { AudioStreamListener } from "../AudioStream.js"; import type { - AudioStreamListener, OutputController, OutputDeviceConfig, } from "../OutputController.js"; @@ -61,6 +61,8 @@ export class WebRTCConnection extends BaseConnection { private audioCaptureContext: AudioContext | null = null; private audioElements: HTMLAudioElement[] = []; private outputDeviceId: string | null = null; + private inputAudioStream: MediaStream | null = null; + private inputAudioStreamListeners = new Set(); private outputAudioStream: MediaStream | null = null; private outputAudioStreamListeners = new Set(); @@ -158,6 +160,14 @@ export class WebRTCConnection extends BaseConnection { } }, isMuted: () => this._isMuted, + getInputAudioStream: () => this.inputAudioStream, + addInputAudioStreamListener: (listener: AudioStreamListener) => { + this.inputAudioStreamListeners.add(listener); + listener(this.inputAudioStream); + }, + removeInputAudioStreamListener: (listener: AudioStreamListener) => { + this.inputAudioStreamListeners.delete(listener); + }, getAnalyser: () => this.inputAnalyser ?? undefined, getVolume: () => { if (this._isMuted) return 0; @@ -203,12 +213,12 @@ export class WebRTCConnection extends BaseConnection { // Audio interruption is managed by the server/agent }, getAnalyser: () => this.outputAnalyser ?? undefined, - getAudioStream: () => this.outputAudioStream, - addAudioStreamListener: (listener: AudioStreamListener) => { + getOutputAudioStream: () => this.outputAudioStream, + addOutputAudioStreamListener: (listener: AudioStreamListener) => { this.outputAudioStreamListeners.add(listener); listener(this.outputAudioStream); }, - removeAudioStreamListener: (listener: AudioStreamListener) => { + removeOutputAudioStreamListener: (listener: AudioStreamListener) => { this.outputAudioStreamListeners.delete(listener); }, getVolume: () => this.outputVolumeProvider.getVolume(), @@ -341,6 +351,7 @@ export class WebRTCConnection extends BaseConnection { Track.Source.Microphone )?.track; if (micTrack) { + connection.setInputAudioStreamFromTrack(micTrack.mediaStreamTrack); connection.setupInputAnalyser(micTrack.mediaStreamTrack); } @@ -451,9 +462,7 @@ export class WebRTCConnection extends BaseConnection { // Expose the agent's remote track immediately; audio capture below is // best-effort and may fail in non-browser environments. - this.setOutputAudioStream( - new MediaStream([remoteAudioTrack.mediaStreamTrack]) - ); + this.setOutputAudioStreamFromTrack(remoteAudioTrack.mediaStreamTrack); // Apply current volume if it exists (for when volume was set before audio track arrived) if (this.audioElements.length === 1) { @@ -516,6 +525,7 @@ export class WebRTCConnection extends BaseConnection { this.inputAudioContext = null; this.inputAnalyser = null; } + this.setInputAudioStream(null); // Clean up audio capture context (non-blocking) if (this.audioCaptureContext) { @@ -619,6 +629,35 @@ export class WebRTCConnection extends BaseConnection { this.outputVolumeProvider = provider; } + private createMediaStream( + mediaStreamTrack: MediaStreamTrack + ): MediaStream | null { + if (typeof MediaStream === "undefined") { + return null; + } + return new MediaStream([mediaStreamTrack]); + } + + private setInputAudioStreamFromTrack( + mediaStreamTrack: MediaStreamTrack + ): void { + this.setInputAudioStream(this.createMediaStream(mediaStreamTrack)); + } + + private setOutputAudioStreamFromTrack( + mediaStreamTrack: MediaStreamTrack + ): void { + this.setOutputAudioStream(this.createMediaStream(mediaStreamTrack)); + } + + private setInputAudioStream(stream: MediaStream | null): void { + if (this.inputAudioStream === stream) { + return; + } + this.inputAudioStream = stream; + this.inputAudioStreamListeners.forEach(listener => listener(stream)); + } + private setOutputAudioStream(stream: MediaStream | null): void { if (this.outputAudioStream === stream) { return; @@ -764,6 +803,7 @@ export class WebRTCConnection extends BaseConnection { }); // Reconnect the input analyser to the new track + this.setInputAudioStreamFromTrack(audioTrack.mediaStreamTrack); this.setupInputAnalyser(audioTrack.mediaStreamTrack); } catch (error) { console.error("Failed to change input device:", error); diff --git a/packages/client/src/utils/input.ts b/packages/client/src/utils/input.ts index 1be80d62..774457dc 100644 --- a/packages/client/src/utils/input.ts +++ b/packages/client/src/utils/input.ts @@ -4,6 +4,7 @@ import { isIosDevice } from "./compatibility.js"; import type { AudioWorkletConfig } from "../BaseConversation.js"; import { addLibsamplerateModule } from "./addLibsamplerateModule.js"; import type { InputController, InputDeviceConfig } from "../InputController.js"; +import type { AudioStreamListener } from "../AudioStream.js"; import { createAnalyserVolumeProvider, type VolumeProvider, @@ -135,6 +136,7 @@ export class MediaDeviceInput implements InputController, InputEventTarget { private muted = false; private readonly volumeProvider: VolumeProvider; + private readonly inputAudioStreamListeners = new Set(); private constructor( private readonly context: AudioContext, @@ -175,6 +177,19 @@ export class MediaDeviceInput implements InputController, InputEventTarget { this.volumeProvider.getByteFrequencyData(buffer); } + public getInputAudioStream(): MediaStream { + return this.inputStream; + } + + public addInputAudioStreamListener(listener: AudioStreamListener): void { + this.inputAudioStreamListeners.add(listener); + listener(this.inputStream); + } + + public removeInputAudioStreamListener(listener: AudioStreamListener): void { + this.inputAudioStreamListeners.delete(listener); + } + public isMuted(): boolean { return this.muted; } @@ -200,6 +215,8 @@ export class MediaDeviceInput implements InputController, InputEventTarget { "change", this.handlePermissionsChange ); + this.inputAudioStreamListeners.forEach(listener => listener(null)); + this.inputAudioStreamListeners.clear(); await this.context.close(); } @@ -250,6 +267,9 @@ export class MediaDeviceInput implements InputController, InputEventTarget { this.inputStream = newInputStream; this.mediaStreamSource = this.context.createMediaStreamSource(newInputStream); + this.inputAudioStreamListeners.forEach(listener => + listener(newInputStream) + ); // Reconnect the audio graph this.mediaStreamSource.connect(this.analyser); diff --git a/packages/client/src/utils/output.ts b/packages/client/src/utils/output.ts index 81df0aee..1cf71a0d 100644 --- a/packages/client/src/utils/output.ts +++ b/packages/client/src/utils/output.ts @@ -2,8 +2,8 @@ import { loadAudioConcatProcessor } from "./audioConcatProcessor.generated.js"; import type { FormatConfig } from "./connection.js"; import type { AudioWorkletConfig } from "../BaseConversation.js"; import { addLibsamplerateModule } from "./addLibsamplerateModule.js"; +import type { AudioStreamListener } from "../AudioStream.js"; import type { - AudioStreamListener, OutputController, OutputDeviceConfig, } from "../OutputController.js"; @@ -128,7 +128,7 @@ export class MediaDeviceOutput private interrupted = false; private interruptTimeout: ReturnType | null = null; private readonly volumeProvider: VolumeProvider; - private readonly audioStreamListeners = new Set(); + private readonly outputAudioStreamListeners = new Set(); private constructor( private readonly context: AudioContext, @@ -159,17 +159,17 @@ export class MediaDeviceOutput this.volumeProvider.getByteFrequencyData(buffer); } - public getAudioStream(): MediaStream { + public getOutputAudioStream(): MediaStream { return this.audioStream; } - public addAudioStreamListener(listener: AudioStreamListener): void { - this.audioStreamListeners.add(listener); + public addOutputAudioStreamListener(listener: AudioStreamListener): void { + this.outputAudioStreamListeners.add(listener); listener(this.audioStream); } - public removeAudioStreamListener(listener: AudioStreamListener): void { - this.audioStreamListeners.delete(listener); + public removeOutputAudioStreamListener(listener: AudioStreamListener): void { + this.outputAudioStreamListeners.delete(listener); } public addListener(listener: PlaybackListener): void { @@ -252,8 +252,8 @@ export class MediaDeviceOutput if (this.audioElement.parentNode) { this.audioElement.parentNode.removeChild(this.audioElement); } - this.audioStreamListeners.forEach(listener => listener(null)); - this.audioStreamListeners.clear(); + this.outputAudioStreamListeners.forEach(listener => listener(null)); + this.outputAudioStreamListeners.clear(); this.audioElement.pause(); await this.context.close(); } diff --git a/packages/react/src/conversation/ConversationAudioStream.test.tsx b/packages/react/src/conversation/ConversationAudioStream.test.tsx index 3799dbf9..da443ec1 100644 --- a/packages/react/src/conversation/ConversationAudioStream.test.tsx +++ b/packages/react/src/conversation/ConversationAudioStream.test.tsx @@ -19,19 +19,23 @@ vi.mock("@elevenlabs/client", async importOriginal => { return { ...actual, Conversation: { startSession: vi.fn() } }; }); -const createMockConversation = (audioStream: MediaStream | null = null) => +const createMockConversation = ( + inputAudioStream: MediaStream | null = null, + outputAudioStream: MediaStream | null = null +) => ({ getId: vi.fn().mockReturnValue("test-id"), endSession: vi.fn().mockResolvedValue(undefined), setMicMuted: vi.fn(), setVolume: vi.fn(), - getAudioStream: vi.fn().mockReturnValue(audioStream), + getInputAudioStream: vi.fn().mockReturnValue(inputAudioStream), + getOutputAudioStream: vi.fn().mockReturnValue(outputAudioStream), }) as unknown as Conversation; function useTestHook() { const ctx = useContext(ConversationContext) as ConversationContextValue; - const audioStream = useConversationAudioStream(); - return { startSession: ctx.startSession, audioStream }; + const streams = useConversationAudioStream(); + return { startSession: ctx.startSession, streams }; } function createWrapper(props: Record = {}) { @@ -59,10 +63,11 @@ describe("useConversationAudioStream", () => { wrapper: createWrapper(), }); - expect(result.current.audioStream).toBeNull(); + expect(result.current.inputAudioStream).toBeNull(); + expect(result.current.outputAudioStream).toBeNull(); }); - it("updates when onAudioStream fires", async () => { + it("updates when input and output audio stream callbacks fire", async () => { const mockConversation = createMockConversation(); vi.mocked(Conversation.startSession).mockResolvedValue(mockConversation); @@ -76,18 +81,22 @@ describe("useConversationAudioStream", () => { const [[opts]] = vi.mocked(Conversation.startSession).mock .calls as unknown as [[MockStartSessionOptions]]; - const stream = {} as MediaStream; + const inputStream = {} as MediaStream; + const outputStream = {} as MediaStream; act(() => { - opts.onAudioStream?.(stream); + opts.onInputAudioStream?.(inputStream); + opts.onOutputAudioStream?.(outputStream); }); - expect(result.current.audioStream.audioStream).toBe(stream); + expect(result.current.streams.inputAudioStream).toBe(inputStream); + expect(result.current.streams.outputAudioStream).toBe(outputStream); }); - it("clears the stream when the session disconnects", async () => { - const stream = {} as MediaStream; - const mockConversation = createMockConversation(stream); + it("clears streams when the session disconnects", async () => { + const inputStream = {} as MediaStream; + const outputStream = {} as MediaStream; + const mockConversation = createMockConversation(inputStream, outputStream); vi.mocked(Conversation.startSession).mockResolvedValue(mockConversation); const { result } = renderHook(() => useTestHook(), { @@ -102,14 +111,17 @@ describe("useConversationAudioStream", () => { .calls as unknown as [[MockStartSessionOptions]]; act(() => { - opts.onAudioStream?.(stream); + opts.onInputAudioStream?.(inputStream); + opts.onOutputAudioStream?.(outputStream); }); - expect(result.current.audioStream.audioStream).toBe(stream); + expect(result.current.streams.inputAudioStream).toBe(inputStream); + expect(result.current.streams.outputAudioStream).toBe(outputStream); act(() => { opts.onDisconnect?.({ reason: "user" }); }); - expect(result.current.audioStream.audioStream).toBeNull(); + expect(result.current.streams.inputAudioStream).toBeNull(); + expect(result.current.streams.outputAudioStream).toBeNull(); }); it("is included in useConversation", async () => { @@ -126,12 +138,15 @@ describe("useConversationAudioStream", () => { const [[opts]] = vi.mocked(Conversation.startSession).mock .calls as unknown as [[MockStartSessionOptions]]; - const stream = {} as MediaStream; + const inputStream = {} as MediaStream; + const outputStream = {} as MediaStream; act(() => { - opts.onAudioStream?.(stream); + opts.onInputAudioStream?.(inputStream); + opts.onOutputAudioStream?.(outputStream); }); - expect(result.current.audioStream).toBe(stream); + expect(result.current.inputAudioStream).toBe(inputStream); + expect(result.current.outputAudioStream).toBe(outputStream); }); }); diff --git a/packages/react/src/conversation/ConversationAudioStream.tsx b/packages/react/src/conversation/ConversationAudioStream.tsx index c5f8e625..ee7d7349 100644 --- a/packages/react/src/conversation/ConversationAudioStream.tsx +++ b/packages/react/src/conversation/ConversationAudioStream.tsx @@ -2,35 +2,44 @@ import { createContext, useContext, useMemo, useState } from "react"; import { useRegisterCallbacks } from "./ConversationContext.js"; export type ConversationAudioStreamValue = { - audioStream: MediaStream | null; + inputAudioStream: MediaStream | null; + outputAudioStream: MediaStream | null; }; const ConversationAudioStreamContext = createContext(null); /** - * Tracks the assistant output audio stream exposed by the active conversation. + * Tracks input and output audio streams exposed by the active conversation. * Must be rendered inside a `ConversationProvider`. */ export function ConversationAudioStreamProvider({ children, }: React.PropsWithChildren) { - const [audioStream, setAudioStream] = useState(null); + const [inputAudioStream, setInputAudioStream] = + useState(null); + const [outputAudioStream, setOutputAudioStream] = + useState(null); useRegisterCallbacks({ - onAudioStream(stream) { - setAudioStream(stream); + onInputAudioStream(stream) { + setInputAudioStream(stream); + }, + onOutputAudioStream(stream) { + setOutputAudioStream(stream); }, onDisconnect() { - setAudioStream(null); + setInputAudioStream(null); + setOutputAudioStream(null); }, }); const value = useMemo( () => ({ - audioStream, + inputAudioStream, + outputAudioStream, }), - [audioStream] + [inputAudioStream, outputAudioStream] ); return ( @@ -41,8 +50,8 @@ export function ConversationAudioStreamProvider({ } /** - * Returns the assistant output audio stream, or `null` before a stream is - * available. Re-renders when the stream changes. + * Returns the user input and assistant output audio streams, or `null` before + * each stream is available. Re-renders when either stream changes. * * Must be used within a `ConversationProvider`. */ diff --git a/packages/react/src/conversation/types.ts b/packages/react/src/conversation/types.ts index df4fb967..c8b43a19 100644 --- a/packages/react/src/conversation/types.ts +++ b/packages/react/src/conversation/types.ts @@ -26,7 +26,8 @@ export type HookCallbacks = Pick< | "onError" | "onMessage" | "onAudio" - | "onAudioStream" + | "onInputAudioStream" + | "onOutputAudioStream" | "onModeChange" | "onStatusChange" | "onCanSendFeedbackChange" diff --git a/packages/react/src/conversation/useConversation.ts b/packages/react/src/conversation/useConversation.ts index 96c2a72d..c8008da2 100644 --- a/packages/react/src/conversation/useConversation.ts +++ b/packages/react/src/conversation/useConversation.ts @@ -46,7 +46,7 @@ export function useConversation(props: UseConversationOptions = {}) { const { isMuted, setMuted } = useConversationInput(); const { mode, isSpeaking, isListening } = useConversationMode(); const { canSendFeedback, sendFeedback } = useConversationFeedback(); - const { audioStream } = useConversationAudioStream(); + const { inputAudioStream, outputAudioStream } = useConversationAudioStream(); const startSession = useCallback( (options?: HookOptions) => { @@ -93,7 +93,8 @@ export function useConversation(props: UseConversationOptions = {}) { mode, isSpeaking, isListening, - audioStream, + inputAudioStream, + outputAudioStream, canSendFeedback, sendFeedback, }; diff --git a/packages/types/src/types.ts b/packages/types/src/types.ts index ced6994b..d0645b93 100644 --- a/packages/types/src/types.ts +++ b/packages/types/src/types.ts @@ -61,7 +61,8 @@ export type Callbacks = { onError?: (message: string, context?: any) => void; onMessage?: (props: MessagePayload) => void; onAudio?: (base64Audio: string) => void; - onAudioStream?: (stream: MediaStream | null) => void; + onInputAudioStream?: (stream: MediaStream | null) => void; + onOutputAudioStream?: (stream: MediaStream | null) => void; onModeChange?: (prop: { mode: Mode }) => void; onStatusChange?: (prop: { status: Status }) => void; onCanSendFeedbackChange?: (prop: { canSendFeedback: boolean }) => void; @@ -111,7 +112,8 @@ export const CALLBACK_KEYS = [ "onError", "onMessage", "onAudio", - "onAudioStream", + "onInputAudioStream", + "onOutputAudioStream", "onModeChange", "onStatusChange", "onCanSendFeedbackChange", From 0ee2ca2fc222b9f805ce63b6eb69de712aeeeb77 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 30 Apr 2026 22:43:34 +0000 Subject: [PATCH 4/5] Mark audio streams as minor release --- .changeset/tender-buses-yell.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.changeset/tender-buses-yell.md b/.changeset/tender-buses-yell.md index 6b3db294..321a6b22 100644 --- a/.changeset/tender-buses-yell.md +++ b/.changeset/tender-buses-yell.md @@ -1,5 +1,5 @@ -"@elevenlabs/client": patch -"@elevenlabs/react": patch -"@elevenlabs/types": patch +"@elevenlabs/client": minor +"@elevenlabs/react": minor +"@elevenlabs/types": minor Expose input and output audio streams on conversations and React hooks. From 4e6cf4fb2fb31b7f85308ccc5a0fc7f33a1a30b8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 30 Apr 2026 22:45:58 +0000 Subject: [PATCH 5/5] Simplify controller audio stream methods --- packages/client/src/InputController.ts | 6 +++--- packages/client/src/OutputController.ts | 6 +++--- packages/client/src/VoiceConversation.ts | 12 ++++++------ packages/client/src/utils/WebRTCConnection.test.ts | 8 ++++---- packages/client/src/utils/WebRTCConnection.ts | 12 ++++++------ packages/client/src/utils/input.ts | 6 +++--- packages/client/src/utils/output.ts | 6 +++--- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/packages/client/src/InputController.ts b/packages/client/src/InputController.ts index b2be9db4..da7c308e 100644 --- a/packages/client/src/InputController.ts +++ b/packages/client/src/InputController.ts @@ -12,9 +12,9 @@ export interface InputController { setMuted(isMuted: boolean): Promise; isMuted(): boolean; /** Returns the user input audio stream, if one is available. */ - getInputAudioStream(): MediaStream | null; - addInputAudioStreamListener(listener: AudioStreamListener): void; - removeInputAudioStreamListener(listener: AudioStreamListener): void; + getAudioStream(): MediaStream | null; + addAudioStreamListener(listener: AudioStreamListener): void; + removeAudioStreamListener(listener: AudioStreamListener): void; /** * @deprecated AnalyserNode is a web-only API and will not work on all diff --git a/packages/client/src/OutputController.ts b/packages/client/src/OutputController.ts index 6b1ddb50..3755d4fe 100644 --- a/packages/client/src/OutputController.ts +++ b/packages/client/src/OutputController.ts @@ -11,9 +11,9 @@ export interface OutputController { setVolume(volume: number): void; interrupt(resetDuration?: number): void; /** Returns the assistant output audio stream, if one is available. */ - getOutputAudioStream(): MediaStream | null; - addOutputAudioStreamListener(listener: AudioStreamListener): void; - removeOutputAudioStreamListener(listener: AudioStreamListener): void; + getAudioStream(): MediaStream | null; + addAudioStreamListener(listener: AudioStreamListener): void; + removeAudioStreamListener(listener: AudioStreamListener): void; /** * @deprecated AnalyserNode is a web-only API and will not work on all diff --git a/packages/client/src/VoiceConversation.ts b/packages/client/src/VoiceConversation.ts index faf615f2..e3247cdc 100644 --- a/packages/client/src/VoiceConversation.ts +++ b/packages/client/src/VoiceConversation.ts @@ -137,8 +137,8 @@ export class VoiceConversation extends BaseConversation { playbackEventTarget?.addListener(this.handlePlaybackEvent); - input.addInputAudioStreamListener(this.handleInputAudioStream); - output.addOutputAudioStreamListener(this.handleOutputAudioStream); + input.addAudioStreamListener(this.handleInputAudioStream); + output.addAudioStreamListener(this.handleOutputAudioStream); if (wakeLock) { // Wake locks are automatically released when a page is hidden like when switching tabs @@ -161,8 +161,8 @@ export class VoiceConversation extends BaseConversation { this.cleanUp(); this.playbackEventTarget?.removeListener(this.handlePlaybackEvent); this.playbackEventTarget = null; - this.input.removeInputAudioStreamListener(this.handleInputAudioStream); - this.output.removeOutputAudioStreamListener(this.handleOutputAudioStream); + this.input.removeAudioStreamListener(this.handleInputAudioStream); + this.output.removeAudioStreamListener(this.handleOutputAudioStream); await super.handleEndSession(); if (this.visibilityChangeHandler) { @@ -240,11 +240,11 @@ export class VoiceConversation extends BaseConversation { } public getInputAudioStream(): MediaStream | null { - return this.input.getInputAudioStream(); + return this.input.getAudioStream(); } public getOutputAudioStream(): MediaStream | null { - return this.output.getOutputAudioStream(); + return this.output.getAudioStream(); } public async changeInputDevice({ diff --git a/packages/client/src/utils/WebRTCConnection.test.ts b/packages/client/src/utils/WebRTCConnection.test.ts index fbd11718..4e64d51c 100644 --- a/packages/client/src/utils/WebRTCConnection.test.ts +++ b/packages/client/src/utils/WebRTCConnection.test.ts @@ -324,12 +324,12 @@ describe("WebRTCConnection", () => { }); const listener = vi.fn(); - connection.output.addOutputAudioStreamListener(listener); + connection.output.addAudioStreamListener(listener); expect(listener).toHaveBeenCalledWith(null); const stream = { getTracks: () => [] } as unknown as MediaStream; connection["setOutputAudioStream"](stream); - expect(connection.output.getOutputAudioStream()).toBe(stream); + expect(connection.output.getAudioStream()).toBe(stream); expect(listener).toHaveBeenLastCalledWith(stream); connection.close(); @@ -359,12 +359,12 @@ describe("WebRTCConnection", () => { }); const listener = vi.fn(); - connection.input.addInputAudioStreamListener(listener); + connection.input.addAudioStreamListener(listener); expect(listener).toHaveBeenCalledWith(null); const stream = { getTracks: () => [] } as unknown as MediaStream; connection["setInputAudioStream"](stream); - expect(connection.input.getInputAudioStream()).toBe(stream); + expect(connection.input.getAudioStream()).toBe(stream); expect(listener).toHaveBeenLastCalledWith(stream); connection.close(); diff --git a/packages/client/src/utils/WebRTCConnection.ts b/packages/client/src/utils/WebRTCConnection.ts index bab09534..eb54ca74 100644 --- a/packages/client/src/utils/WebRTCConnection.ts +++ b/packages/client/src/utils/WebRTCConnection.ts @@ -160,12 +160,12 @@ export class WebRTCConnection extends BaseConnection { } }, isMuted: () => this._isMuted, - getInputAudioStream: () => this.inputAudioStream, - addInputAudioStreamListener: (listener: AudioStreamListener) => { + getAudioStream: () => this.inputAudioStream, + addAudioStreamListener: (listener: AudioStreamListener) => { this.inputAudioStreamListeners.add(listener); listener(this.inputAudioStream); }, - removeInputAudioStreamListener: (listener: AudioStreamListener) => { + removeAudioStreamListener: (listener: AudioStreamListener) => { this.inputAudioStreamListeners.delete(listener); }, getAnalyser: () => this.inputAnalyser ?? undefined, @@ -213,12 +213,12 @@ export class WebRTCConnection extends BaseConnection { // Audio interruption is managed by the server/agent }, getAnalyser: () => this.outputAnalyser ?? undefined, - getOutputAudioStream: () => this.outputAudioStream, - addOutputAudioStreamListener: (listener: AudioStreamListener) => { + getAudioStream: () => this.outputAudioStream, + addAudioStreamListener: (listener: AudioStreamListener) => { this.outputAudioStreamListeners.add(listener); listener(this.outputAudioStream); }, - removeOutputAudioStreamListener: (listener: AudioStreamListener) => { + removeAudioStreamListener: (listener: AudioStreamListener) => { this.outputAudioStreamListeners.delete(listener); }, getVolume: () => this.outputVolumeProvider.getVolume(), diff --git a/packages/client/src/utils/input.ts b/packages/client/src/utils/input.ts index 774457dc..82ece356 100644 --- a/packages/client/src/utils/input.ts +++ b/packages/client/src/utils/input.ts @@ -177,16 +177,16 @@ export class MediaDeviceInput implements InputController, InputEventTarget { this.volumeProvider.getByteFrequencyData(buffer); } - public getInputAudioStream(): MediaStream { + public getAudioStream(): MediaStream { return this.inputStream; } - public addInputAudioStreamListener(listener: AudioStreamListener): void { + public addAudioStreamListener(listener: AudioStreamListener): void { this.inputAudioStreamListeners.add(listener); listener(this.inputStream); } - public removeInputAudioStreamListener(listener: AudioStreamListener): void { + public removeAudioStreamListener(listener: AudioStreamListener): void { this.inputAudioStreamListeners.delete(listener); } diff --git a/packages/client/src/utils/output.ts b/packages/client/src/utils/output.ts index 1cf71a0d..5ec5469a 100644 --- a/packages/client/src/utils/output.ts +++ b/packages/client/src/utils/output.ts @@ -159,16 +159,16 @@ export class MediaDeviceOutput this.volumeProvider.getByteFrequencyData(buffer); } - public getOutputAudioStream(): MediaStream { + public getAudioStream(): MediaStream { return this.audioStream; } - public addOutputAudioStreamListener(listener: AudioStreamListener): void { + public addAudioStreamListener(listener: AudioStreamListener): void { this.outputAudioStreamListeners.add(listener); listener(this.audioStream); } - public removeOutputAudioStreamListener(listener: AudioStreamListener): void { + public removeAudioStreamListener(listener: AudioStreamListener): void { this.outputAudioStreamListeners.delete(listener); }