diff --git a/jest.config.js b/jest.config.js
index ee2c4fe946..3309ddb808 100644
--- a/jest.config.js
+++ b/jest.config.js
@@ -5,6 +5,9 @@ export default {
   transformIgnorePatterns: [
     '/node_modules/(?!@microsoft/kiota-http-fetchlibrary).+\\.js$',
   ],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+  },
   globals: {
     'ts-jest': {
       useESM: true,
diff --git a/tests/integration/inference.test.ts b/tests/integration/inference.test.ts
new file mode 100644
index 0000000000..f912f38ae4
--- /dev/null
+++ b/tests/integration/inference.test.ts
@@ -0,0 +1,250 @@
+import dotenv from "dotenv";
+import InferenceClient from "../../src/inference-gen/InferenceClient.js";
+import { SSEStream } from "../../src/inference-gen/InferenceClient.js";
+
+dotenv.config();
+
+const apiKey = process.env.MODEL_ACCESS_KEY;
+if (!apiKey) {
+    throw new Error("MODEL_ACCESS_KEY is not set. Please check your .env file.");
+}
+
+const client = new InferenceClient({ apiKey });
+
+let CHAT_MODEL = process.env.DO_CHAT_MODEL ?? "openai-gpt-oss-20b";
+let RESPONSES_MODEL = process.env.DO_RESPONSES_MODEL ?? "openai-gpt-oss-20b";
+
+describe("Inference Integration Tests", () => {
+
+    /* ─── /v1/models ─────────────────────────────────────────────────── */
+
+    describe("models.list", () => {
+        it("returns a list of available models with expected shape", async () => {
+            const result = await client.models.list();
+
+            // Auto-discover models for subsequent tests if not set via env.
+            // Prefer non-reasoning models (reasoning models return content as object, not string).
+            if (Array.isArray(result.data)) {
+                const ids: string[] = result.data.map((m: Record<string, unknown>) => m.id as string);
+                const skip = (id: string) =>
+                    id.includes("deepseek-r1") || id.includes("reasoning") || id.includes("embed");
+
+                if (!CHAT_MODEL) {
+                    const pick = ids.find(id => !skip(id) && (id.includes("llama") || id.includes("instruct")))
+                        ?? ids.find(id => !skip(id))
+                        ?? ids[0];
+                    CHAT_MODEL = pick;
+                }
+                if (!RESPONSES_MODEL) {
+                    RESPONSES_MODEL = CHAT_MODEL;
+                }
+            }
+            console.log(`Using CHAT_MODEL=${CHAT_MODEL}, RESPONSES_MODEL=${RESPONSES_MODEL}`);
+
+            expect(result).toBeDefined();
+            expect(result.object).toBe("list");
+            expect(Array.isArray(result.data)).toBe(true);
+            expect(result.data.length).toBeGreaterThan(0);
+
+            const first = result.data[0];
+            expect(first.id).toBeDefined();
+            expect(typeof first.id).toBe("string");
+            expect(first.object).toBe("model");
+            expect(typeof first.owned_by).toBe("string");
+        }, 30000);
+    });
+
+    /* ─── /v1/chat/completions (non-streaming) ───────────────────────── */
+
+    describe("chat.completions.create", () => {
+        it("returns a completion with choices[0].message.content", async () => {
+            const completion = await client.chat.completions.create({
+                model: CHAT_MODEL,
+                messages: [
+                    { role: "user", content: "Reply with exactly: INTEGRATION_OK" },
+                ],
+                max_tokens: 20,
+            });
+
+            expect(completion).toBeDefined();
+            expect(completion.id).toBeDefined();
+            expect(completion.object).toBe("chat.completion");
+            expect(Array.isArray(completion.choices)).toBe(true);
+            expect(completion.choices.length).toBeGreaterThan(0);
+
+            const msg = completion.choices[0].message;
+            expect(msg.role).toBe("assistant");
+            expect(msg.content).toBeDefined();
+            // content is a string for standard models, may be array/object for reasoning models
+            if (typeof msg.content === "string") {
+                expect(msg.content.length).toBeGreaterThan(0);
+            }
+
+            // snake_case fields preserved (not camelCase)
+            expect(completion.choices[0].finish_reason).toBeDefined();
+        }, 30000);
+
+        it("streaming returns an async iterable SSEStream with delta chunks", async () => {
+            const stream = await client.chat.completions.create({
+                model: CHAT_MODEL,
+                messages: [
+                    { role: "user", content: "Say hello in one word." },
+                ],
+                max_tokens: 10,
+                stream: true,
+            });
+
+            expect(stream).toBeInstanceOf(SSEStream);
+
+            let chunkCount = 0;
+            for await (const _chunk of stream as SSEStream) {
+                chunkCount++;
+            }
+
+            expect(chunkCount).toBeGreaterThan(0);
+        }, 30000);
+
+        it("streaming with callbacks delivers chunks via onData", async () => {
+            let chunkCount = 0;
+            let completeCalled = false;
+
+            await client.chat.completions.create(
+                {
+                    model: CHAT_MODEL,
+                    messages: [{ role: "user", content: "Say hi." }],
+                    max_tokens: 10,
+                    stream: true,
+                },
+                {
+                    onData: () => { chunkCount++; },
+                    onComplete: () => { completeCalled = true; },
+                },
+            );
+
+            expect(chunkCount).toBeGreaterThan(0);
+            expect(completeCalled).toBe(true);
+        }, 30000);
+    });
+
+    /* ─── /v1/responses (non-streaming) ──────────────────────────────── */
+
+    describe("responses.create", () => {
+        it("returns a response with aggregated output_text", async () => {
+            let response: Record<string, unknown> | undefined;
+            try {
+                response = await client.responses.create({
+                    model: RESPONSES_MODEL,
+                    input: "Reply with exactly: INTEGRATION_OK",
+                });
+            } catch (err: unknown) {
+                const msg = err instanceof Error ? err.message : "";
+                if (msg.includes("401") || msg.includes("403") || msg.includes("404") || msg.includes("not available")) {
+                    console.warn(`responses.create skipped: ${msg.slice(0, 80)}`);
+                    return;
+                }
+                throw err;
+            }
+
+            expect(response).toBeDefined();
+            expect(response!.id).toBeDefined();
+            expect(typeof response!.output_text).toBe("string");
+            expect((response!.output_text as string).length).toBeGreaterThan(0);
+
+            expect(Array.isArray(response!.output)).toBe(true);
+            const output = response!.output as Array<Record<string, unknown>>;
+            expect(output.length).toBeGreaterThan(0);
+            const respMsg = output.find((o) => o.type === "message");
+            expect(respMsg).toBeDefined();
+            expect(respMsg!.role).toBe("assistant");
+            expect(Array.isArray(respMsg!.content)).toBe(true);
+        }, 30000);
+
+        it("streaming returns SSEStream with event objects", async () => {
+            let stream: SSEStream | undefined;
+            try {
+                stream = await client.responses.create({
+                    model: RESPONSES_MODEL,
+                    input: "Say hello in one word.",
+                    stream: true,
+                }) as SSEStream;
+            } catch (err: unknown) {
+                const msg = err instanceof Error ? err.message : "";
+                if (msg.includes("401") || msg.includes("403") || msg.includes("404") || msg.includes("not available")) {
+                    console.warn(`responses.create streaming skipped: ${msg.slice(0, 80)}`);
+                    return;
+                }
+                throw err;
+            }
+
+            expect(stream).toBeInstanceOf(SSEStream);
+
+            let eventCount = 0;
+            try {
+                for await (const _event of stream!) {
+                    eventCount++;
+                }
+            } catch (iterErr: unknown) {
+                const msg = iterErr instanceof Error ? iterErr.message : "";
+                if (msg.includes("401") || msg.includes("403") || msg.includes("404") || msg.includes("not available")) {
+                    console.warn(`responses.create streaming skipped: ${msg.slice(0, 80)}`);
+                    return;
+                }
+                throw iterErr;
+            }
+
+            expect(eventCount).toBeGreaterThan(0);
+        }, 30000);
+    });
+
+    /* ─── /v1/async-invoke ───────────────────────────────────────────── */
+
+    describe("asyncInvoke.create", () => {
+        it("queues a request and returns a request_id", async () => {
+            let result: Record<string, unknown> | undefined;
+            try {
+                result = await client.asyncInvoke.create({
+                    model_id: "fal-ai/flux/schnell",
+                    input: { prompt: "A tiny shark" },
+                });
+            } catch (err: unknown) {
+                const msg = err instanceof Error ? err.message : "";
+                if (msg.includes("401") || msg.includes("403") || msg.includes("404")) {
+                    console.warn("asyncInvoke skipped: model not available for this key");
+                    return;
+                }
+                throw err;
+            }
+
+            expect(result).toBeDefined();
+            expect(typeof result!.request_id).toBe("string");
+            expect((result!.request_id as string).length).toBeGreaterThan(0);
+        }, 60000);
+    });
+
+    /* ─── snake_case verification ────────────────────────────────────── */
+
+    describe("snake_case field preservation", () => {
+        it("chat completion response uses snake_case, not camelCase", async () => {
+            const completion = await client.chat.completions.create({
+                model: CHAT_MODEL,
+                messages: [{ role: "user", content: "Hi" }],
+                max_tokens: 5,
+            });
+
+            const raw = JSON.stringify(completion);
+
+            // Should have snake_case
+            expect(raw).toContain("finish_reason");
+            // Should NOT have camelCase equivalents
+            expect(raw).not.toContain("finishReason");
+        }, 30000);
+
+        it("models response uses snake_case owned_by", async () => {
+            const models = await client.models.list();
+            const raw = JSON.stringify(models);
+
+            expect(raw).toContain("owned_by");
+            expect(raw).not.toContain("ownedBy");
+        }, 30000);
+    });
+});
diff --git a/tests/mocked/inference-smoke.test.ts b/tests/mocked/inference-smoke.test.ts
new file mode 100644
index 0000000000..3ac3310448
--- /dev/null
+++ b/tests/mocked/inference-smoke.test.ts
@@ -0,0 +1,300 @@
+import nock from "nock";
+import InferenceClient from "../../src/inference-gen/InferenceClient.js";
+import { SSEStream } from "../../src/inference-gen/InferenceClient.js";
+
+const BASE = "https://inference.do-ai.run";
+
+describe("InferenceClient", () => {
+    afterEach(() => nock.cleanAll());
+
+    /* ─── responses.create ───────────────────────────────────────────── */
+
+    it("responses.create — same shape as OpenAI readme", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/responses")
+            .reply(200, {
+                id: "resp-1",
+                object: "response",
+                output: [
+                    {
+                        type: "message",
+                        role: "assistant",
+                        content: [
+                            { type: "output_text", text: "Ahoy! Semicolons be optional, matey!" },
+                        ],
+                    },
+                ],
+            });
+
+        const response = await client.responses.create({
+            model: "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            instructions: "You are a coding assistant that talks like a pirate",
+            input: "Are semicolons optional in JavaScript?",
+        });
+
+        expect(response).toBeDefined();
+        expect(response!.output_text).toBe("Ahoy! Semicolons be optional, matey!");
+    });
+
+    it("responses.create aggregates multiple output_text parts", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/responses")
+            .reply(200, {
+                id: "resp-2",
+                object: "response",
+                output: [
+                    {
+                        type: "message",
+                        role: "assistant",
+                        content: [
+                            { type: "output_text", text: "Part 1. " },
+                            { type: "output_text", text: "Part 2." },
+                        ],
+                    },
+                ],
+            });
+
+        const response = await client.responses.create({
+            model: "m",
+            input: "hi",
+        });
+
+        expect(response!.output_text).toBe("Part 1. Part 2.");
+    });
+
+    /* ─── chat.completions.create ────────────────────────────────────── */
+
+    it("chat.completions.create — same shape as OpenAI readme", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/chat/completions")
+            .reply(200, {
+                object: "chat.completion",
+                choices: [
+                    {
+                        message: { role: "assistant", content: "Arr, they be optional!" },
+                    },
+                ],
+            });
+
+        const completion = await client.chat.completions.create({
+            model: "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            messages: [
+                { role: "developer", content: "Talk like a pirate." },
+                { role: "user", content: "Are semicolons optional in JavaScript?" },
+            ],
+        });
+
+        expect(completion).toBeDefined();
+        expect(completion!.choices?.[0]?.message?.content).toBe("Arr, they be optional!");
+    });
+
+    /* ─── models.list ────────────────────────────────────────────────── */
+
+    it("models.list — returns model data", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .get("/v1/models")
+            .reply(200, {
+                object: "list",
+                data: [
+                    { id: "llama3.3-70b-instruct", object: "model", owned_by: "meta" },
+                    { id: "openai-gpt-oss-20b", object: "model", owned_by: "openai" },
+                ],
+            });
+
+        const result = await client.models.list();
+
+        expect(result).toBeDefined();
+        expect(result!.data).toHaveLength(2);
+        expect(result!.data![0]!.id).toBe("llama3.3-70b-instruct");
+    });
+
+    /* ─── images.generate ────────────────────────────────────────────── */
+
+    it("images.generations.create — returns base64 data", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/images/generations")
+            .reply(200, {
+                created: 1700000000,
+                data: [{ b64_json: "iVBORw0KGgo..." }],
+            });
+
+        const result = await client.images.generations.create({
+            model: "openai-gpt-image-1",
+            prompt: "A cute otter",
+            n: 1,
+            size: "256x256",
+        });
+
+        expect(result).toBeDefined();
+        expect(result!.data?.[0]?.b64_json).toBe("iVBORw0KGgo...");
+    });
+
+    it("images.generate — OpenAI-style alias", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/images/generations")
+            .reply(200, {
+                created: 1700000000,
+                data: [{ b64_json: "abc123" }],
+            });
+
+        const result = await client.images.generate({
+            model: "openai-gpt-image-1",
+            prompt: "A cat",
+        });
+
+        expect(result).toBeDefined();
+        expect(result!.data?.[0]?.b64_json).toBe("abc123");
+    });
+
+    /* ─── streaming guards ───────────────────────────────────────────── */
+
+    it("responses.create with stream: true returns SSEStream", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/responses")
+            .reply(200, "data: [DONE]\n\n", { "Content-Type": "text/event-stream" });
+
+        const result = await client.responses.create({
+            model: "m",
+            input: "hi",
+            stream: true,
+        });
+
+        expect(result).toBeInstanceOf(SSEStream);
+    });
+
+    it("chat.completions.create with stream: true returns SSEStream", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/chat/completions")
+            .reply(200, "data: [DONE]\n\n", { "Content-Type": "text/event-stream" });
+
+        const result = await client.chat.completions.create({
+            model: "m",
+            messages: [{ role: "user", content: "hi" }],
+            stream: true,
+        });
+
+        expect(result).toBeInstanceOf(SSEStream);
+    });
+
+    /* ─── constructor validation ─────────────────────────────────────── */
+
+    it("throws on missing apiKey", () => {
+        expect(() => new InferenceClient({ apiKey: "" })).toThrow("apiKey is required");
+    });
+
+    it("accepts custom baseURL", () => {
+        const client = new InferenceClient({
+            apiKey: "test-key",
+            baseURL: "https://custom.inference.host/v1",
+        });
+        expect(client).toBeDefined();
+    });
+
+    /* ─── asyncInvoke.create ─────────────────────────────────────────── */
+
+    it("asyncInvoke.create — fal model", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/async-invoke")
+            .reply(200, {
+                request_id: "abc-123",
+                status: "QUEUED",
+                model_id: "fal-ai/flux/schnell",
+            });
+
+        const result = await client.asyncInvoke.create({
+            model_id: "fal-ai/flux/schnell",
+            input: { prompt: "A futuristic city" },
+        });
+
+        expect(result).toBeDefined();
+        expect(result!.request_id).toBe("abc-123");
+    });
+
+    /* ─── audio.generate ──────────────────────────────────────────────── */
+
+    it("audio.generate — async audio generation", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/async-invoke", (body: Record<string, unknown>) => {
+                expect(body.model_id).toBe("fal-ai/stable-audio-25/text-to-audio");
+                const input = body.input as Record<string, unknown>;
+                expect(input.prompt).toBe("ocean waves crashing");
+                expect(input.seconds_total).toBe(10);
+                return true;
+            })
+            .reply(200, { request_id: "audio-1", status: "QUEUED" });
+
+        const res = await client.audio.generate({
+            model_id: "fal-ai/stable-audio-25/text-to-audio",
+            prompt: "ocean waves crashing",
+            seconds_total: 10,
+        });
+
+        expect(res.request_id).toBe("audio-1");
+        expect(res.status).toBe("QUEUED");
+    });
+
+    /* ─── audio.speech.create ─────────────────────────────────────────── */
+
+    it("audio.speech.create — TTS", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/async-invoke", (body: Record<string, unknown>) => {
+                expect(body.model_id).toBe("fal-ai/playai/tts/v3");
+                const input = body.input as Record<string, unknown>;
+                expect(input.text).toBe("Hello from DigitalOcean");
+                return true;
+            })
+            .reply(200, { request_id: "tts-1", status: "QUEUED" });
+
+        const res = await client.audio.speech.create({
+            model_id: "fal-ai/playai/tts/v3",
+            input: "Hello from DigitalOcean",
+        });
+
+        expect(res.request_id).toBe("tts-1");
+    });
+
+    /* ─── async_images.generate ───────────────────────────────────────── */
+
+    it("async_images.generate — async image generation", async () => {
+        const client = new InferenceClient({ apiKey: "test-key" });
+
+        nock(BASE)
+            .post("/v1/async-invoke", (body: Record<string, unknown>) => {
+                expect(body.model_id).toBe("fal-ai/fast-sdxl");
+                const input = body.input as Record<string, unknown>;
+                expect(input.prompt).toBe("A cute otter");
+                return true;
+            })
+            .reply(200, { request_id: "img-1", status: "QUEUED" });
+
+        const res = await client.async_images.generate({
+            model_id: "fal-ai/fast-sdxl",
+            prompt: "A cute otter",
+        });
+
+        expect(res.request_id).toBe("img-1");
+        expect(res.status).toBe("QUEUED");
+    });
+});