dimknaf · WarGloom · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/README.md b/README.md
@@ -72,7 +72,7 @@ Any reachable hostname/IP works — the connecting user just needs network acces
 
 ### 4. Pick an LLM provider (for the internal agent)
 
-The agent talks to any LiteLLM-supported backend. **Recommended for new users: `deepinfra` with `google/gemma-4-31B-it`** — fast (5–30s per agent call), cheap, validated end-to-end on the wiki/maintainer/writer pipeline. `nim` is a free-tier fallback (occasionally flaky). The `vllm_*` profiles run a local model on your own GPU workstation — useful for offline / cost-free experiments, but require a running vLLM server reachable from the docker network (typically via SSH tunnel).
+The agent talks to any LiteLLM-supported backend. **Recommended for new users: `deepinfra` with `google/gemma-4-31B-it`** — fast (5–30s per agent call), cheap, validated end-to-end on the wiki/maintainer/writer pipeline. `nim` is a free-tier fallback (occasionally flaky). The `vllm_*` profiles run a local model on your own GPU workstation — useful for offline / cost-free experiments, but require a running vLLM server reachable from the docker network (typically via SSH tunnel). Use `openai_compatible` for generic OpenAI-compatible `/v1` endpoints such as Ollama, copilot-api, or LM Studio.
 
 In `.env`:
 ```
@@ -81,9 +81,18 @@ DEEPINFRA_API_KEY=...        # if profile=deepinfra — get from https://deepinf
 NVIDIA_NIM_API_KEY=...       # if profile=nim       — get from https://build.nvidia.com/
 ```
 
+For an OpenAI-compatible local endpoint:
+
+```
+LLM_PROFILE=openai_compatible
+AGENT_MODEL=openai/llama3.2:3b
+AGENT_BASE_URL=http://host.docker.internal:11434/v1
+AGENT_API_KEY=              # optional; set only if the endpoint requires auth
+```
+
 Only the key matching your chosen profile needs to be filled. Leave the other blank or absent.
 
-Adding a third provider (Together, OpenAI, local vLLM, whatever) is a two-line entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + an env var — no other code changes. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe.
+Adding another hosted provider (Together, OpenAI, whatever) is a two-line entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + an env var — no other code changes. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe.
 
 ### 5. Create the Docker network, then bring the stack up
 
@@ -180,18 +189,28 @@ The agent has 21 tools — every single BrainDB endpoint plus `delegate_to_subag
 
 - **`deepinfra` — recommended default.** Model `google/gemma-4-31B-it`. Fast (5–30s per agent call), cheap, validated end-to-end.
 - `nim` — NVIDIA NIM, model `google/gemma-4-31b-it`. Free tier, occasionally flaky.
+- `openai_compatible` — any OpenAI-compatible `/v1` endpoint. Set `AGENT_MODEL=openai/<model-id>` and `AGENT_BASE_URL`.
 - `vllm_workstation` / `vllm_workstation_qwen` / `vllm_workstation_gemma` — local vLLM running on your own GPU (advanced / offline; needs the server reachable from the docker network, usually via SSH tunnel).
 
 Each profile is a model-prefix + env-var pair; adding a new one is a dict entry.
 
 ```
-LLM_PROFILE=deepinfra         # or nim / vllm_workstation / vllm_workstation_qwen
+LLM_PROFILE=deepinfra         # or nim / openai_compatible / vllm_workstation
 DEEPINFRA_API_KEY=...         # required if profile=deepinfra (https://deepinfra.com/)
 NVIDIA_NIM_API_KEY=...        # required if profile=nim (https://build.nvidia.com/)
 VLLM_API_KEY=...              # optional, only if local vLLM is started with --api-key
 AGENT_MODEL=                  # optional: override the profile's default model
 ```
 
+For `openai_compatible`, `AGENT_MODEL` is required because BrainDB does not know which model your endpoint serves:
+
+```
+LLM_PROFILE=openai_compatible
+AGENT_MODEL=openai/llama3.2:3b
+AGENT_BASE_URL=http://host.docker.internal:11434/v1
+AGENT_API_KEY=
+```
+
 **Verbose logging**: set `AGENT_VERBOSE=true` in `.env` to log every tool call (entry args + exit elapsed/result) to stdout, visible via `docker logs braindb_api -f`.
 
 ---

diff --git a/braindb/config.py b/braindb/config.py
@@ -20,6 +20,10 @@
         "model": "deepinfra/google/gemma-4-31B-it",
         "api_key_env": "DEEPINFRA_API_KEY",
     },
+    "openai_compatible": {
+        "model": "",
+        "api_key_env": "AGENT_API_KEY",
+    },
     "vllm_workstation": {
         "model": "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit",
         "api_key_env": "VLLM_API_KEY",
@@ -105,6 +109,7 @@ class Settings(BaseSettings):
     # Agent (LiteLLM — provider selected via llm_profile)
     llm_profile: str = "deepinfra"
     agent_model: str = ""          # blank = use profile's default model
+    agent_base_url: str = ""
     # Bumped 15 → 20 after live observation on Qwen 27B AWQ-INT4 (vLLM):
     # deep-research-style runs commonly need >15 tool turns to land
     # `final_answer`. 20 gives breathing room; finishes-fast providers
@@ -176,13 +181,13 @@ def resolved_api_key(self) -> str:
         key = os.getenv(profile["api_key_env"], "")
         # Self-hosted profiles (vLLM/Ollama) may run without auth, but the
         # OpenAI client still needs a non-empty key — supply a placeholder.
-        if not key and profile.get("base_url"):
+        if not key and self.resolved_base_url:
             return "EMPTY"
         return key
 
     @property
     def resolved_base_url(self) -> str | None:
-        return _LLM_PROFILES[self.llm_profile].get("base_url")
+        return self.agent_base_url or _LLM_PROFILES[self.llm_profile].get("base_url")
 
 
 settings = Settings()
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -11,6 +11,8 @@ services:
       HF_TOKEN: ${HF_TOKEN:-}
       LLM_PROFILE: ${LLM_PROFILE:-deepinfra}
       AGENT_MODEL: ${AGENT_MODEL:-}
+      AGENT_BASE_URL: ${AGENT_BASE_URL:-}
+      AGENT_API_KEY: ${AGENT_API_KEY:-}
       NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY:-}
       DEEPINFRA_API_KEY: ${DEEPINFRA_API_KEY:-}
       VLLM_API_KEY: ${VLLM_API_KEY:-}

diff --git a/pyproject.toml b/pyproject.toml
@@ -61,3 +61,6 @@ packages = ["braindb"]
 testpaths = ["tests"]
 timeout = 60
 addopts = "-ra"
+markers = [
+    "unit: tests that do not require a live BrainDB stack",
+]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -42,8 +42,9 @@ def _wait_for_health(url: str, timeout: int = 30) -> bool:
 
 
 @pytest.fixture(scope="session", autouse=True)
-def _require_live_api() -> None:
-    """Fail fast and loud if the stack isn't up — tests have nothing to run against."""
+def _require_live_api(request: pytest.FixtureRequest) -> None:
+    if request.session.items and all(item.get_closest_marker("unit") for item in request.session.items):
+        return
     if not _wait_for_health(API_URL):
         pytest.fail(
             f"BrainDB API not healthy at {API_URL}. "
@@ -52,7 +53,7 @@ def _require_live_api() -> None:
 
 
 @pytest.fixture(scope="session", autouse=True)
-def _purge_pytest_artefacts_at_session_end() -> Iterator[None]:
+def _purge_pytest_artefacts_at_session_end(request: pytest.FixtureRequest) -> Iterator[None]:
     """Session teardown safety net for the per-test `created_entities`
     fixture: any test that errors before registering its IDs (or that
     bypasses the factories entirely) still leaks `_pytest_<hex>` rows
@@ -68,6 +69,8 @@ def _purge_pytest_artefacts_at_session_end() -> Iterator[None]:
     entities themselves.
     """
     yield
+    if request.session.items and all(item.get_closest_marker("unit") for item in request.session.items):
+        return
     try:
         from braindb.db import get_conn  # only imported at teardown
     except Exception as exc:   # noqa: BLE001 — defensive, never block the session

diff --git a/tests/test_config_profiles.py b/tests/test_config_profiles.py
@@ -0,0 +1,54 @@
+import pytest
+
+from braindb.config import Settings
+
+
+pytestmark = pytest.mark.unit
+
+
+def test_openai_compatible_profile_resolves_env_values(monkeypatch):
+    monkeypatch.setenv("AGENT_MODEL", "openai/gpt-5-mini")
+    monkeypatch.setenv("AGENT_BASE_URL", "http://localhost:4141/v1")
+    monkeypatch.setenv("AGENT_API_KEY", "test-key")
+
+    settings = Settings(_env_file=None, llm_profile="openai_compatible")
+
+    assert settings.resolved_agent_model == "openai/gpt-5-mini"
+    assert settings.resolved_base_url == "http://localhost:4141/v1"
+    assert settings.resolved_api_key == "test-key"
+
+
+def test_openai_compatible_profile_allows_empty_key_for_local_endpoint(monkeypatch):
+    monkeypatch.setenv("AGENT_MODEL", "openai/llama3.2:3b")
+    monkeypatch.setenv("AGENT_BASE_URL", "http://localhost:11434/v1")
+    monkeypatch.delenv("AGENT_API_KEY", raising=False)
+
+    settings = Settings(_env_file=None, llm_profile="openai_compatible")
+
+    assert settings.resolved_agent_model == "openai/llama3.2:3b"
+    assert settings.resolved_base_url == "http://localhost:11434/v1"
+    assert settings.resolved_api_key == "EMPTY"
+
+
+@pytest.mark.parametrize(
+    ("profile", "expected_model", "expected_base_url"),
+    [
+        ("deepinfra", "deepinfra/google/gemma-4-31B-it", None),
+        ("nim", "nvidia_nim/google/gemma-4-31b-it", None),
+        ("vllm_workstation", "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit", "http://host.docker.internal:8002/v1"),
+        ("vllm_workstation_qwen", "openai/cyankiwi/Qwen3.6-27B-AWQ-INT4", "http://host.docker.internal:8010/v1"),
+        ("vllm_workstation_gemma", "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit", "http://host.docker.internal:8009/v1"),
+    ],
+)
+def test_existing_profiles_keep_current_resolution(monkeypatch, profile, expected_model, expected_base_url):
+    monkeypatch.delenv("AGENT_MODEL", raising=False)
+    monkeypatch.delenv("AGENT_BASE_URL", raising=False)
+    monkeypatch.delenv("DEEPINFRA_API_KEY", raising=False)
+    monkeypatch.delenv("NVIDIA_NIM_API_KEY", raising=False)
+    monkeypatch.delenv("VLLM_API_KEY", raising=False)
+
+    settings = Settings(_env_file=None, llm_profile=profile)
+
+    assert settings.resolved_agent_model == expected_model
+    assert settings.resolved_base_url == expected_base_url
+    assert settings.resolved_api_key == ("EMPTY" if expected_base_url else "")