Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/tender-buses-yell.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"@elevenlabs/client": minor
"@elevenlabs/react": minor
"@elevenlabs/types": minor

Expose input and output audio streams on conversations and React hooks.
1 change: 1 addition & 0 deletions packages/client/src/AudioStream.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export type AudioStreamListener = (stream: MediaStream | null) => void;
6 changes: 6 additions & 0 deletions packages/client/src/BaseConversation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ class TestConversation extends BaseConversation {
public getOutputByteFrequencyData(): Uint8Array {
return new Uint8Array(0);
}
public getInputAudioStream(): MediaStream | null {
return null;
}
public getOutputAudioStream(): MediaStream | null {
return null;
}
public getInputVolume(): number {
return 0;
}
Expand Down
4 changes: 4 additions & 0 deletions packages/client/src/BaseConversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ export abstract class BaseConversation {
return this.endSessionWithDetails({ reason: "user" });
}

public abstract getInputAudioStream(): MediaStream | null;

public abstract getOutputAudioStream(): MediaStream | null;

private endSessionWithDetails = async (details: DisconnectionDetails) => {
if (this.status !== "connected" && this.status !== "connecting") return;
this.updateStatus("disconnecting");
Expand Down
5 changes: 5 additions & 0 deletions packages/client/src/InputController.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { FormatConfig } from "./utils/BaseConnection.js";
import type { AudioStreamListener } from "./AudioStream.js";

export type InputDeviceConfig = {
inputDeviceId?: string;
Expand All @@ -10,6 +11,10 @@ export interface InputController {
setDevice(config?: Partial<FormatConfig> & InputDeviceConfig): Promise<void>;
setMuted(isMuted: boolean): Promise<void>;
isMuted(): boolean;
/** Returns the user input audio stream, if one is available. */
getAudioStream(): MediaStream | null;

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing this we're exposing a Web API type on our public API and I'm not completely convinced that's what we'd want to do 🤔

addAudioStreamListener(listener: AudioStreamListener): void;
removeAudioStreamListener(listener: AudioStreamListener): void;

/**
* @deprecated AnalyserNode is a web-only API and will not work on all
Expand Down
5 changes: 5 additions & 0 deletions packages/client/src/OutputController.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { FormatConfig } from "./utils/BaseConnection.js";
import type { AudioStreamListener } from "./AudioStream.js";

export type OutputDeviceConfig = {
outputDeviceId?: string;
Expand All @@ -9,6 +10,10 @@ export interface OutputController {
setDevice(config?: Partial<FormatConfig> & OutputDeviceConfig): Promise<void>;
setVolume(volume: number): void;
interrupt(resetDuration?: number): void;
/** Returns the assistant output audio stream, if one is available. */
getAudioStream(): MediaStream | null;
addAudioStreamListener(listener: AudioStreamListener): void;
removeAudioStreamListener(listener: AudioStreamListener): void;

/**
* @deprecated AnalyserNode is a web-only API and will not work on all
Expand Down
8 changes: 8 additions & 0 deletions packages/client/src/TextConversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ export class TextConversation extends BaseConversation {
return EMPTY_FREQUENCY_DATA;
}

public getInputAudioStream(): MediaStream | null {
return null;
}

public getOutputAudioStream(): MediaStream | null {
return null;
}

public getInputVolume(): number {
return 0;
}
Expand Down
22 changes: 22 additions & 0 deletions packages/client/src/VoiceConversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
type PartialOptions,
} from "./BaseConversation.js";
import type { InputController } from "./InputController.js";
import type { AudioStreamListener } from "./AudioStream.js";
import type { OutputController } from "./OutputController.js";
import { setupStrategy } from "./platform/VoiceSessionSetup.js";

Expand Down Expand Up @@ -115,6 +116,14 @@ export class VoiceConversation extends BaseConversation {
}
};

private handleInputAudioStream: AudioStreamListener = stream => {
this.options.onInputAudioStream?.(stream);
};

private handleOutputAudioStream: AudioStreamListener = stream => {
this.options.onOutputAudioStream?.(stream);
};

protected constructor(
options: Options,
connection: BaseConnection,
Expand All @@ -128,6 +137,9 @@ export class VoiceConversation extends BaseConversation {

playbackEventTarget?.addListener(this.handlePlaybackEvent);

input.addAudioStreamListener(this.handleInputAudioStream);
output.addAudioStreamListener(this.handleOutputAudioStream);

if (wakeLock) {
// Wake locks are automatically released when a page is hidden like when switching tabs
// so attempt to re-acquire lock when page becomes visible again
Expand All @@ -149,6 +161,8 @@ export class VoiceConversation extends BaseConversation {
this.cleanUp();
this.playbackEventTarget?.removeListener(this.handlePlaybackEvent);
this.playbackEventTarget = null;
this.input.removeAudioStreamListener(this.handleInputAudioStream);
this.output.removeAudioStreamListener(this.handleOutputAudioStream);
await super.handleEndSession();

if (this.visibilityChangeHandler) {
Expand Down Expand Up @@ -225,6 +239,14 @@ export class VoiceConversation extends BaseConversation {
return this.output.getVolume();
}

public getInputAudioStream(): MediaStream | null {
return this.input.getAudioStream();
}

public getOutputAudioStream(): MediaStream | null {
return this.output.getAudioStream();
}

public async changeInputDevice({
sampleRate,
format,
Expand Down
1 change: 1 addition & 0 deletions packages/client/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export type {
OutputController,
OutputDeviceConfig,
} from "./OutputController.js";
export type { AudioStreamListener } from "./AudioStream.js";
export type { InputConfig } from "./utils/input.js";
export type { OutputConfig } from "./utils/output.js";
export type {
Expand Down
74 changes: 73 additions & 1 deletion packages/client/src/utils/WebRTCConnection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ describe("WebRTCConnection", () => {
vi.stubGlobal("AudioContext", MockAudioContext);
vi.stubGlobal(
"MediaStream",
vi.fn((tracks: unknown[]) => ({ getTracks: () => tracks }))
vi.fn(function MediaStream(tracks: unknown[]) {
return { getTracks: () => tracks };
})
);

const connection = await WebRTCConnection.create({
Expand Down Expand Up @@ -298,4 +300,74 @@ describe("WebRTCConnection", () => {
}
}
);

it("notifies output audio stream listeners when the stream changes", async () => {
const mockRoom = new Room() as any;
(mockRoom.on as ReturnType<typeof vi.fn>).mockImplementation(
(event: string, callback: () => void) => {
if (event === "connected") {
queueMicrotask(callback);
}
}
);
(mockRoom.once as ReturnType<typeof vi.fn>).mockImplementation(
(event: string, callback: () => void) => {
if (event === "signalConnected") {
queueMicrotask(callback);
}
}
);

const connection = await WebRTCConnection.create({
conversationToken: "test-token",
connectionType: "webrtc",
});
const listener = vi.fn();

connection.output.addAudioStreamListener(listener);
expect(listener).toHaveBeenCalledWith(null);

const stream = { getTracks: () => [] } as unknown as MediaStream;
connection["setOutputAudioStream"](stream);
expect(connection.output.getAudioStream()).toBe(stream);
expect(listener).toHaveBeenLastCalledWith(stream);

connection.close();
expect(listener).toHaveBeenLastCalledWith(null);
});

it("notifies input audio stream listeners when the stream changes", async () => {
const mockRoom = new Room() as any;
(mockRoom.on as ReturnType<typeof vi.fn>).mockImplementation(
(event: string, callback: () => void) => {
if (event === "connected") {
queueMicrotask(callback);
}
}
);
(mockRoom.once as ReturnType<typeof vi.fn>).mockImplementation(
(event: string, callback: () => void) => {
if (event === "signalConnected") {
queueMicrotask(callback);
}
}
);

const connection = await WebRTCConnection.create({
conversationToken: "test-token",
connectionType: "webrtc",
});
const listener = vi.fn();

connection.input.addAudioStreamListener(listener);
expect(listener).toHaveBeenCalledWith(null);

const stream = { getTracks: () => [] } as unknown as MediaStream;
connection["setInputAudioStream"](stream);
expect(connection.input.getAudioStream()).toBe(stream);
expect(listener).toHaveBeenLastCalledWith(stream);

connection.close();
expect(listener).toHaveBeenLastCalledWith(null);
});
});
66 changes: 66 additions & 0 deletions packages/client/src/utils/WebRTCConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
import { arrayBufferToBase64 } from "./audio.js";
import { loadRawAudioProcessor } from "./rawAudioProcessor.generated.js";
import type { InputController, InputDeviceConfig } from "../InputController.js";
import type { AudioStreamListener } from "../AudioStream.js";
import type {
OutputController,
OutputDeviceConfig,
Expand Down Expand Up @@ -60,6 +61,10 @@ export class WebRTCConnection extends BaseConnection {
private audioCaptureContext: AudioContext | null = null;
private audioElements: HTMLAudioElement[] = [];
private outputDeviceId: string | null = null;
private inputAudioStream: MediaStream | null = null;
private inputAudioStreamListeners = new Set<AudioStreamListener>();
private outputAudioStream: MediaStream | null = null;
private outputAudioStreamListeners = new Set<AudioStreamListener>();

private inputAnalyser: AnalyserNode | null = null;
private inputAudioContext: AudioContext | null = null;
Expand Down Expand Up @@ -155,6 +160,14 @@ export class WebRTCConnection extends BaseConnection {
}
},
isMuted: () => this._isMuted,
getAudioStream: () => this.inputAudioStream,
addAudioStreamListener: (listener: AudioStreamListener) => {
this.inputAudioStreamListeners.add(listener);
listener(this.inputAudioStream);
},
removeAudioStreamListener: (listener: AudioStreamListener) => {
this.inputAudioStreamListeners.delete(listener);
},
getAnalyser: () => this.inputAnalyser ?? undefined,
getVolume: () => {
if (this._isMuted) return 0;
Expand Down Expand Up @@ -200,6 +213,14 @@ export class WebRTCConnection extends BaseConnection {
// Audio interruption is managed by the server/agent
},
getAnalyser: () => this.outputAnalyser ?? undefined,
getAudioStream: () => this.outputAudioStream,
addAudioStreamListener: (listener: AudioStreamListener) => {
this.outputAudioStreamListeners.add(listener);
listener(this.outputAudioStream);
},
removeAudioStreamListener: (listener: AudioStreamListener) => {
this.outputAudioStreamListeners.delete(listener);
},
getVolume: () => this.outputVolumeProvider.getVolume(),
getByteFrequencyData: (buffer: Uint8Array<ArrayBuffer>) => {
this.outputVolumeProvider.getByteFrequencyData(buffer);
Expand Down Expand Up @@ -330,6 +351,7 @@ export class WebRTCConnection extends BaseConnection {
Track.Source.Microphone
)?.track;
if (micTrack) {
connection.setInputAudioStreamFromTrack(micTrack.mediaStreamTrack);
connection.setupInputAnalyser(micTrack.mediaStreamTrack);
}

Expand Down Expand Up @@ -438,6 +460,10 @@ export class WebRTCConnection extends BaseConnection {
// Store reference for volume control
this.audioElements.push(audioElement);

// Expose the agent's remote track immediately; audio capture below is
// best-effort and may fail in non-browser environments.
this.setOutputAudioStreamFromTrack(remoteAudioTrack.mediaStreamTrack);

// Apply current volume if it exists (for when volume was set before audio track arrived)
if (this.audioElements.length === 1) {
// First audio element - trigger a callback to sync with current volume
Expand Down Expand Up @@ -499,6 +525,7 @@ export class WebRTCConnection extends BaseConnection {
this.inputAudioContext = null;
this.inputAnalyser = null;
}
this.setInputAudioStream(null);

// Clean up audio capture context (non-blocking)
if (this.audioCaptureContext) {
Expand All @@ -507,6 +534,7 @@ export class WebRTCConnection extends BaseConnection {
});
this.audioCaptureContext = null;
}
this.setOutputAudioStream(null);

// Clean up audio elements
this.audioElements.forEach(element => {
Expand Down Expand Up @@ -601,6 +629,43 @@ export class WebRTCConnection extends BaseConnection {
this.outputVolumeProvider = provider;
}

private createMediaStream(
mediaStreamTrack: MediaStreamTrack
): MediaStream | null {
if (typeof MediaStream === "undefined") {
return null;
}
return new MediaStream([mediaStreamTrack]);
}

private setInputAudioStreamFromTrack(
mediaStreamTrack: MediaStreamTrack
): void {
this.setInputAudioStream(this.createMediaStream(mediaStreamTrack));
}

private setOutputAudioStreamFromTrack(
mediaStreamTrack: MediaStreamTrack
): void {
this.setOutputAudioStream(this.createMediaStream(mediaStreamTrack));
}

private setInputAudioStream(stream: MediaStream | null): void {
if (this.inputAudioStream === stream) {
return;
}
this.inputAudioStream = stream;
this.inputAudioStreamListeners.forEach(listener => listener(stream));
}

private setOutputAudioStream(stream: MediaStream | null): void {
if (this.outputAudioStream === stream) {
return;
}
this.outputAudioStream = stream;
this.outputAudioStreamListeners.forEach(listener => listener(stream));
}

private async setupAudioCapture(track: RemoteAudioTrack) {
try {
// Create audio context for processing
Expand Down Expand Up @@ -738,6 +803,7 @@ export class WebRTCConnection extends BaseConnection {
});

// Reconnect the input analyser to the new track
this.setInputAudioStreamFromTrack(audioTrack.mediaStreamTrack);
this.setupInputAnalyser(audioTrack.mediaStreamTrack);
} catch (error) {
console.error("Failed to change input device:", error);
Expand Down
Loading
Loading