From f6de383bce252cea91f42ebe080831bb598e68d4 Mon Sep 17 00:00:00 2001
From: Mahil Patel <mahilpatel0808@gmail.com>
Date: Wed, 27 May 2026 23:09:24 +0530
Subject: [PATCH 1/4] docs: add tutorial for securing internal traffic with
 SPIRE (mTLS)

Signed-off-by: Mahil Patel <mahilpatel0808@gmail.com>
---
 .../docs/tutorials/internal-auth-spire.md     | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 docs/agentcube/docs/tutorials/internal-auth-spire.md

diff --git a/docs/agentcube/docs/tutorials/internal-auth-spire.md b/docs/agentcube/docs/tutorials/internal-auth-spire.md
new file mode 100644
index 00000000..838bef75
--- /dev/null
+++ b/docs/agentcube/docs/tutorials/internal-auth-spire.md
@@ -0,0 +1,61 @@
+# Securing Internal Traffic with SPIRE (mTLS)
+
+This tutorial covers how we use SPIRE to establish zero-trust, mutually authenticated TLS (mTLS) for AgentCube's control plane.
+
+## Why Do We Need This?
+
+By default, internal components trust each other based on network reachability. With our new mTLS implementation, we're locking down the control plane so the Router and WorkloadManager cryptographically verify each other's SPIFFE identities before communicating.
+
+## 1. How the Architecture Works
+
+We implemented a robust mTLS abstraction layer that handles the heavy lifting for the control plane:
+
+- **Strict Identity Enforcement**: The Router and WorkloadManager have hardcoded SPIFFE IDs. 
+  - The WorkloadManager accepts any client presenting a valid certificate signed by the trusted CA pool (authorization is handled later at the application layer).
+  - However, the Router strictly verifies it's actually talking to the `WorkloadManagerSPIFFEID` before it forwards any traffic, preventing spoofed servers.
+- **Zero-Downtime Rotation**: A new `CertWatcher` actively monitors the certificates on disk (using `fsnotify`). When SPIRE rotates the short-lived certs, they are hot-reloaded without dropping any active connections.
+
+### What about Sandboxes?
+
+You might wonder why we don't inject mTLS into the `PicoD` or `AgentRuntime` sandboxes:
+- **Startup Latency**: Establishing a new TLS handshake for every short-lived sandbox adds significant latency. We opted to use our existing, blazing-fast JWT-based authentication for the `Router -> Sandbox` path instead.
+- **User-Defined Runtimes**: `AgentRuntime` sandboxes are user-defined containers. By avoiding mTLS sidecar injection, we keep them clean and pure without forcing SPIRE dependencies on them.
+- **WorkloadManager isolation**: The WorkloadManager never communicates directly with sandboxes over HTTP; it solely manages them via the secure Kubernetes API.
+
+## 2. Enabling mTLS on the Control Plane
+
+To turn on mTLS, you just need to pass the appropriate certificate paths to the binaries. They automatically enable mTLS when the CA bundle is provided alongside the cert and key.
+
+For the **Router**, use the `mtls` prefix:
+```bash
+--mtls-cert=/path/to/tls.crt
+--mtls-key=/path/to/tls.key
+--mtls-ca=/path/to/ca.crt
+```
+
+For the **WorkloadManager**, use the `tls` prefix:
+```bash
+--tls-cert=/path/to/tls.crt
+--tls-key=/path/to/tls.key
+--tls-ca=/path/to/ca.crt
+```
+
+When you deploy AgentCube via our Helm charts, you don't have to manually manage these certificates. Instead, the **`spiffe-helper` sidecar** runs alongside the Router and WorkloadManager containers in their respective pods. 
+
+Here is what the `spiffe-helper` sidecar does in the background:
+1. It securely authenticates with the local SPIRE Agent.
+2. It fetches the short-lived SVIDs (certificates) for the control plane component.
+3. It writes the certificates to a shared volume where the component's `CertWatcher` instantly picks them up.
+4. It continuously handles rotation before the certificates expire.
+
+## 3. Verifying It Works
+
+Once you've applied the configuration:
+
+1. Check the logs for the Router and WorkloadManager. You'll see the `CertWatcher` output confirming it has successfully loaded the certificates.
+2. Try deploying an agent and sending a request. 
+3. If everything is wired correctly, the Router will perform the mTLS handshake and verify the WorkloadManager's SPIFFE ID when provisioning the sandbox, and then seamlessly fall back to the low-latency JWT auth when proxying your request directly to the sandbox.
+
+## Next Steps
+
+Now that your control plane communications are locked down, your AgentCube deployment is running a zero-trust architecture. You can safely deploy sensitive agents in multi-tenant environments.
\ No newline at end of file

From ccb5fde14065f03efb37dab75494b2158528ef84 Mon Sep 17 00:00:00 2001
From: Mahil Patel <mahilpatel0808@gmail.com>
Date: Thu, 28 May 2026 13:35:36 +0530
Subject: [PATCH 2/4] docs: enhanced tutorial for securing internal traffic
 with SPIRE (mTLS)

Signed-off-by: Mahil Patel <mahilpatel0808@gmail.com>
---
 .../docs/tutorials/internal-auth-spire.md     | 331 ++++++++++++++++--
 1 file changed, 294 insertions(+), 37 deletions(-)

diff --git a/docs/agentcube/docs/tutorials/internal-auth-spire.md b/docs/agentcube/docs/tutorials/internal-auth-spire.md
index 838bef75..cab0e857 100644
--- a/docs/agentcube/docs/tutorials/internal-auth-spire.md
+++ b/docs/agentcube/docs/tutorials/internal-auth-spire.md
@@ -1,61 +1,318 @@
 # Securing Internal Traffic with SPIRE (mTLS)
 
-This tutorial covers how we use SPIRE to establish zero-trust, mutually authenticated TLS (mTLS) for AgentCube's control plane.
+This task shows you how to enable mutual TLS (mTLS) between AgentCube's
+control-plane components using [SPIRE](https://spiffe.io/docs/latest/spire-about/spire-concepts/).
+By the end, every request between the Router and WorkloadManager will be
+cryptographically authenticated using short-lived X.509 certificates that rotate
+automatically.
 
-## Why Do We Need This?
+## Before you begin
 
-By default, internal components trust each other based on network reachability. With our new mTLS implementation, we're locking down the control plane so the Router and WorkloadManager cryptographically verify each other's SPIFFE identities before communicating.
+1. Follow the [Getting Started](../getting-started.md) guide to install
+   AgentCube on your cluster. **Do not** enable SPIRE during the initial
+   installation - this tutorial walks through that step explicitly.
 
-## 1. How the Architecture Works
+2. Make sure you have the following tools installed:
+   - [`kubectl`](https://kubernetes.io/docs/tasks/tools/) (v1.25+)
+   - [`helm`](https://helm.sh/docs/intro/install/) (v3.12+)
 
-We implemented a robust mTLS abstraction layer that handles the heavy lifting for the control plane:
+3. Confirm AgentCube is running without SPIRE:
 
-- **Strict Identity Enforcement**: The Router and WorkloadManager have hardcoded SPIFFE IDs. 
-  - The WorkloadManager accepts any client presenting a valid certificate signed by the trusted CA pool (authorization is handled later at the application layer).
-  - However, the Router strictly verifies it's actually talking to the `WorkloadManagerSPIFFEID` before it forwards any traffic, preventing spoofed servers.
-- **Zero-Downtime Rotation**: A new `CertWatcher` actively monitors the certificates on disk (using `fsnotify`). When SPIRE rotates the short-lived certs, they are hot-reloaded without dropping any active connections.
+   ```bash
+   kubectl get pods -n agentcube-system
+   ```
 
-### What about Sandboxes?
+   You should see the Router and WorkloadManager pods in `Running` state, each
+   showing `1/1` containers ready (no sidecar yet):
 
-You might wonder why we don't inject mTLS into the `PicoD` or `AgentRuntime` sandboxes:
-- **Startup Latency**: Establishing a new TLS handshake for every short-lived sandbox adds significant latency. We opted to use our existing, blazing-fast JWT-based authentication for the `Router -> Sandbox` path instead.
-- **User-Defined Runtimes**: `AgentRuntime` sandboxes are user-defined containers. By avoiding mTLS sidecar injection, we keep them clean and pure without forcing SPIRE dependencies on them.
-- **WorkloadManager isolation**: The WorkloadManager never communicates directly with sandboxes over HTTP; it solely manages them via the secure Kubernetes API.
+   ```
+   NAME                                READY   STATUS    RESTARTS   AGE
+   agentcube-router-5d8f9b7c4-xxxxx    1/1     Running   0          5m
+   workloadmanager-6b6bb75d98-xxxxx    1/1     Running   0          5m
+   ```
 
-## 2. Enabling mTLS on the Control Plane
+> **Tip :**
+> If you are running on a local [Kind](https://kind.sigs.k8s.io/) or
+[Minikube](https://minikube.sigs.k8s.io/) cluster, you will need to pass two
+extra overrides in the Helm upgrade command shown below. These are already
+included in the instructions, so just keep them in..
 
-To turn on mTLS, you just need to pass the appropriate certificate paths to the binaries. They automatically enable mTLS when the CA bundle is provided alongside the cert and key.
 
-For the **Router**, use the `mtls` prefix:
+## What gets deployed
+
+When you enable SPIRE, the Helm chart creates the following additional resources
+inside your cluster:
+
+| Resource | Kind | Purpose |
+|---|---|---|
+| `spire-server` | StatefulSet (1 replica) | Central certificate authority. Runs the SPIRE Controller Manager as a sidecar. |
+| `spire-agent` | DaemonSet | Runs on every node. Attests workloads and delivers certificates. |
+| `ClusterSPIFFEID` (×2) | CRD | Declarative identity registration for the Router and WorkloadManager. |
+| `spiffe-helper` sidecar | Container (injected) | Fetches and rotates certificates inside the Router and WorkloadManager pods. |
+
+The Router and WorkloadManager pods will each go from `1/1` to `2/2` containers
+(the main process + the `spiffe-helper` sidecar).
+
+## Step 1 - Install the SPIRE Controller Manager CRDs
+
+The SPIRE Controller Manager watches `ClusterSPIFFEID` custom resources. These
+CRDs must be present in the cluster **before** the Helm upgrade, otherwise the
+chart will fail to create them.
+
+```bash
+kubectl apply -k "https://github.com/spiffe/spire-controller-manager/config/crd?ref=v0.6.4"
+```
+
+Verify the CRD was installed:
+
+```bash
+kubectl get crd clusterspiffeids.spire.spiffe.io
+```
+
+Expected output:
+
+```
+NAME                                  CREATED AT
+clusterspiffeids.spire.spiffe.io      2025-XX-XXTXX:XX:XXZ
+```
+
+## Step 2 - Upgrade the Helm release with SPIRE enabled
+
+Run the Helm upgrade with `spire.enabled=true`. The extra `--set` flags for
+`insecureBootstrap` and `skipKubeletVerification` are needed for local
+development clusters (Kind / Minikube). On a production cluster with proper
+kubelet certificates, you can omit them.
+
+```bash
+helm upgrade agentcube manifests/charts/base \
+  -n agentcube-system \
+  --set spire.enabled=true \
+  --set spire.agent.insecureBootstrap=true \
+  --set spire.agent.skipKubeletVerification=true
+```
+
+This single command deploys the full SPIRE infrastructure **and** injects the
+`spiffe-helper` sidecar into the Router and WorkloadManager pods.
+
+Wait for everything to become ready:
+
+```bash
+kubectl rollout status statefulset/spire-server -n agentcube-system --timeout=120s
+kubectl rollout status daemonset/spire-agent -n agentcube-system --timeout=120s
+kubectl rollout status deployment/agentcube-router -n agentcube-system --timeout=120s
+kubectl rollout status deployment/workloadmanager -n agentcube-system --timeout=120s
+```
+
+## Step 3 - Verify SPIRE is healthy
+
+Check that the SPIRE Server is up and has registered agents:
+
 ```bash
---mtls-cert=/path/to/tls.crt
---mtls-key=/path/to/tls.key
---mtls-ca=/path/to/ca.crt
+kubectl exec -n agentcube-system statefulset/spire-server -c spire-server -- \
+  /opt/spire/bin/spire-server agent list
+```
+
+You should see at least one agent entry (one per cluster node):
+
 ```
+Found X attested agent(s):
+
+SPIFFE ID         : spiffe://cluster.local/spire/agent/k8s_psat/agentcube-cluster/...
+Attestation type  : k8s_psat
+...
+```
+
+Next, confirm the identity registrations were picked up from the
+`ClusterSPIFFEID` resources:
 
-For the **WorkloadManager**, use the `tls` prefix:
 ```bash
---tls-cert=/path/to/tls.crt
---tls-key=/path/to/tls.key
---tls-ca=/path/to/ca.crt
+kubectl exec -n agentcube-system statefulset/spire-server -c spire-server -- \
+  /opt/spire/bin/spire-server entry show
 ```
 
-When you deploy AgentCube via our Helm charts, you don't have to manually manage these certificates. Instead, the **`spiffe-helper` sidecar** runs alongside the Router and WorkloadManager containers in their respective pods. 
+You should see entries for both the Router and WorkloadManager, with SPIFFE IDs
+following the format
+`spiffe://cluster.local/ns/agentcube-system/sa/<service-account>`:
 
-Here is what the `spiffe-helper` sidecar does in the background:
-1. It securely authenticates with the local SPIRE Agent.
-2. It fetches the short-lived SVIDs (certificates) for the control plane component.
-3. It writes the certificates to a shared volume where the component's `CertWatcher` instantly picks them up.
-4. It continuously handles rotation before the certificates expire.
+```
+Entry ID         : ...
+SPIFFE ID        : spiffe://cluster.local/ns/agentcube-system/sa/agentcube-router
+Parent ID        : ...
+Revision         : 0
 
-## 3. Verifying It Works
+Entry ID         : ...
+SPIFFE ID        : spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager
+Parent ID        : ...
+Revision         : 0
+```
 
-Once you've applied the configuration:
+## Step 4 - Verify the sidecar and certificates
 
-1. Check the logs for the Router and WorkloadManager. You'll see the `CertWatcher` output confirming it has successfully loaded the certificates.
-2. Try deploying an agent and sending a request. 
-3. If everything is wired correctly, the Router will perform the mTLS handshake and verify the WorkloadManager's SPIFFE ID when provisioning the sandbox, and then seamlessly fall back to the low-latency JWT auth when proxying your request directly to the sandbox.
+Confirm that both the Router and WorkloadManager pods now show `2/2` containers
+(the main container + the `spiffe-helper` sidecar):
 
-## Next Steps
+```bash
+kubectl get pods -n agentcube-system
+```
+
+Expected output:
+
+```
+NAME                                READY   STATUS    RESTARTS   AGE
+agentcube-router-7f8d4b9c6-xxxxx    2/2     Running   0          2m
+workloadmanager-8c7dd85f9-xxxxx     2/2     Running   0          2m
+spire-agent-xxxxx                   1/1     Running   0          2m
+spire-server-0                      2/2     Running   0          2m
+```
+
+Check the Router logs to confirm mTLS is active. You should see a log line
+indicating it is waiting for, and then successfully loading, the certificates:
 
-Now that your control plane communications are locked down, your AgentCube deployment is running a zero-trust architecture. You can safely deploy sensitive agents in multi-tenant environments.
\ No newline at end of file
+```bash
+kubectl logs -n agentcube-system deployment/agentcube-router -c agentcube-router | grep -i mtls
+```
+
+Expected output:
+
+```
+Waiting for Router mTLS cert/key/CA files
+All mTLS cert/key/CA files are present
+Router→WorkloadManager mTLS enabled: expecting server SPIFFE ID spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager
+```
+
+Do the same for the WorkloadManager:
+
+```bash
+kubectl logs -n agentcube-system deployment/workloadmanager -c workloadmanager | grep -i mtls
+```
+
+Expected output:
+
+```
+Waiting for WorkloadManager mTLS cert/key/CA files
+All mTLS cert/key/CA files are present
+WorkloadManager mTLS enabled: accepting clients with valid SPIRE-provisioned certificates
+```
+
+## Step 5 - Test it end-to-end
+
+Deploy a simple agent and invoke it through the Router to confirm the full
+mTLS-secured path works:
+
+```bash
+kubectl apply -f - <<EOF
+apiVersion: runtime.agentcube.volcano.sh/v1alpha1
+kind: AgentRuntime
+metadata:
+  name: mtls-test
+  namespace: default
+spec:
+  targetPort:
+    - pathPrefix: "/"
+      port: 8000
+      protocol: "HTTP"
+  podTemplate:
+    spec:
+      containers:
+        - name: agent
+          image: python:3.11-slim
+          command: ["python3", "-m", "http.server", "8000"]
+          resources:
+            requests:
+              cpu: "100m"
+              memory: "128Mi"
+            limits:
+              cpu: "500m"
+              memory: "512Mi"
+EOF
+```
+
+Port-forward the Router and send a request:
+
+```bash
+kubectl port-forward -n agentcube-system svc/agentcube-router 8080:8080 &
+curl -s -o /dev/null -w "%{http_code}" \
+  http://localhost:8080/v1/namespaces/default/agent-runtimes/mtls-test/invocations/test/
+```
+
+If the mTLS handshake between Router and WorkloadManager succeeds, you will see
+a `200` (or `502` while the sandbox is still booting - retry after a few
+seconds). A TLS-related error in the Router logs would indicate a
+misconfiguration.
+
+## Understanding what changed
+
+Here is how each component is configured behind the scenes. You do **not** need
+to set any of these flags manually - the Helm chart handles it when
+`spire.enabled=true`.
+
+### Router (mTLS client)
+
+The Helm chart passes these flags to the Router binary:
+
+```
+--mtls-cert=/run/spire/certs/svid.pem
+--mtls-key=/run/spire/certs/svid_key.pem
+--mtls-ca=/run/spire/certs/svid_bundle.pem
+```
+
+When all three are present, the Router creates a dedicated HTTPS transport for
+its WorkloadManager connection. It verifies that the WorkloadManager's
+certificate contains the expected SPIFFE ID
+(`spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager`).
+
+### WorkloadManager (mTLS server)
+
+The Helm chart passes these flags to the WorkloadManager binary:
+
+```
+--tls-cert=/run/spire/certs/svid.pem
+--tls-key=/run/spire/certs/svid_key.pem
+--tls-ca=/run/spire/certs/svid_bundle.pem
+```
+
+When the CA file is present, the WorkloadManager starts its HTTP server with
+mTLS enabled. It requires every connecting client to present a valid certificate
+signed by the trusted CA. Authorization is handled at the application layer, not
+at the TLS level.
+
+### Certificate rotation
+
+The `spiffe-helper` sidecar continuously fetches fresh SVIDs from the local
+SPIRE Agent and writes them to a shared volume at `/run/spire/certs/`. A
+`CertWatcher` inside each component watches that directory using `fsnotify` and
+hot-reloads the certificates without dropping any active connections. The default
+SVID TTL is **1 hour**.
+
+### What about sandboxes?
+
+mTLS is only used for the control-plane path (Router ↔ WorkloadManager).
+The Router→Sandbox connection continues to use the existing JWT-based
+authentication. This keeps sandbox startup latency low and avoids injecting
+SPIRE dependencies into user-defined runtime containers.
+
+## Cleanup
+
+Remove the test agent:
+
+```bash
+kubectl delete agentruntime mtls-test -n default
+```
+
+If you want to **disable** SPIRE and go back to plain HTTP between the control
+plane components, run the Helm upgrade again with `spire.enabled=false`:
+
+```bash
+helm upgrade agentcube manifests/charts/base \
+  -n agentcube-system \
+  --set spire.enabled=false
+```
+
+This removes all SPIRE resources (Server, Agent, CRDs, sidecars) and the
+Router/WorkloadManager pods will restart with `1/1` containers.
+
+To also remove the SPIRE Controller Manager CRDs:
+
+```bash
+kubectl delete -k "https://github.com/spiffe/spire-controller-manager/config/crd?ref=v0.6.4"
+```
\ No newline at end of file

From b73b62dbf82e0af04b87108d26f6233022de50ae Mon Sep 17 00:00:00 2001
From: Mahil Patel <mahilpatel0808@gmail.com>
Date: Sat, 30 May 2026 21:36:54 +0530
Subject: [PATCH 3/4] fix(docs,mtls): address review comments on
 internal-auth-spire tutorial

- Added klog.Infof to wait.go so expected output is conistent with what actually appears in logs, matching the tutorial's expected output
- Helm upgrade removes SPIRE workloads and sidecars, not CRDs (those are removed separately via kubectl)
- added --reuse-values flag to preserve the install-time values

Signed-off-by: Mahil Patel <mahilpatel0808@gmail.com>
---
 docs/agentcube/docs/tutorials/internal-auth-spire.md | 11 ++++++++---
 pkg/mtls/wait.go                                     |  3 +++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/docs/agentcube/docs/tutorials/internal-auth-spire.md b/docs/agentcube/docs/tutorials/internal-auth-spire.md
index cab0e857..c39a74a5 100644
--- a/docs/agentcube/docs/tutorials/internal-auth-spire.md
+++ b/docs/agentcube/docs/tutorials/internal-auth-spire.md
@@ -78,7 +78,9 @@ clusterspiffeids.spire.spiffe.io      2025-XX-XXTXX:XX:XXZ
 
 ## Step 2 - Upgrade the Helm release with SPIRE enabled
 
-Run the Helm upgrade with `spire.enabled=true`. The extra `--set` flags for
+Run the Helm upgrade with `spire.enabled=true`. Keep `--reuse-values` so your
+existing install-time settings (for example Redis, images, RBAC, or service
+accounts) are preserved while enabling SPIRE. The extra `--set` flags for
 `insecureBootstrap` and `skipKubeletVerification` are needed for local
 development clusters (Kind / Minikube). On a production cluster with proper
 kubelet certificates, you can omit them.
@@ -86,6 +88,7 @@ kubelet certificates, you can omit them.
 ```bash
 helm upgrade agentcube manifests/charts/base \
   -n agentcube-system \
+  --reuse-values \
   --set spire.enabled=true \
   --set spire.agent.insecureBootstrap=true \
   --set spire.agent.skipKubeletVerification=true
@@ -305,11 +308,13 @@ plane components, run the Helm upgrade again with `spire.enabled=false`:
 ```bash
 helm upgrade agentcube manifests/charts/base \
   -n agentcube-system \
+  --reuse-values \
   --set spire.enabled=false
 ```
 
-This removes all SPIRE resources (Server, Agent, CRDs, sidecars) and the
-Router/WorkloadManager pods will restart with `1/1` containers.
+This removes all SPIRE workloads (Server, Agent), sidecars, and ClusterSPIFFEID
+resources from this Helm release. The Router/WorkloadManager pods will restart
+with `1/1` containers.
 
 To also remove the SPIRE Controller Manager CRDs:
 
diff --git a/pkg/mtls/wait.go b/pkg/mtls/wait.go
index 79f5cf1e..71b25876 100644
--- a/pkg/mtls/wait.go
+++ b/pkg/mtls/wait.go
@@ -21,6 +21,8 @@ import (
 	"os"
 	"strings"
 	"time"
+
+	"k8s.io/klog/v2"
 )
 
 // DefaultCertificateFileWaitTimeout bounds the startup race while spiffe-helper writes the initial SVID files.
@@ -41,6 +43,7 @@ func WaitForCertificateFiles(cfg Config, timeout time.Duration) error {
 			return fmt.Errorf("failed to access mTLS cert/key/CA files: %w", err)
 		}
 		if exist {
+			klog.Infof("All mTLS cert/key/CA files are present")
 			return nil
 		}
 		missing = currentMissing

From bd05fe571f83f1c3de49ea6f08dadc18c75ce89a Mon Sep 17 00:00:00 2001
From: Mahil Patel <mahilpatel0808@gmail.com>
Date: Mon, 1 Jun 2026 23:16:11 +0530
Subject: [PATCH 4/4] docs: update SPIRE tutorial with authentic outputs and
 fix namespace - Replaced generalized output placeholders (xxxxx pod hashes,
 ... UUIDs, and XX-XX timestamps) with actual outputs to prevent ambiguity. -
 Updated expected log outputs for Router and WorkloadManager to accurately
 reflect the format emitted by the codebase. - Fixed agentcube-system
 namespace inconsistencies across the documentation to align with the core
 getting-started guide.

Signed-off-by: Mahil Patel <mahilpatel0808@gmail.com>
---
 .../docs/tutorials/internal-auth-spire.md     | 105 +++++++++---------
 pkg/mtls/loader_test.go                       |   4 +-
 pkg/mtls/spiffeid.go                          |   4 +-
 pkg/mtls/spiffeid_test.go                     |   4 +-
 pkg/router/session_manager_test.go            |   2 +-
 test/e2e/run_e2e.sh                           |   2 +-
 6 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/docs/agentcube/docs/tutorials/internal-auth-spire.md b/docs/agentcube/docs/tutorials/internal-auth-spire.md
index c39a74a5..16fdf22f 100644
--- a/docs/agentcube/docs/tutorials/internal-auth-spire.md
+++ b/docs/agentcube/docs/tutorials/internal-auth-spire.md
@@ -19,23 +19,23 @@ automatically.
 3. Confirm AgentCube is running without SPIRE:
 
    ```bash
-   kubectl get pods -n agentcube-system
+   kubectl get pods -n agentcube
    ```
 
    You should see the Router and WorkloadManager pods in `Running` state, each
    showing `1/1` containers ready (no sidecar yet):
 
-   ```
-   NAME                                READY   STATUS    RESTARTS   AGE
-   agentcube-router-5d8f9b7c4-xxxxx    1/1     Running   0          5m
-   workloadmanager-6b6bb75d98-xxxxx    1/1     Running   0          5m
-   ```
+```
+  NAME                               READY   STATUS    RESTARTS   AGE
+  agentcube-router-7fbb7b54c-7khq5   1/1     Running   0          8s
+  workloadmanager-6c44454f68-zmfcc   1/1     Running   0          8s
+```
 
-> **Tip :**
+> **Tip:**
 > If you are running on a local [Kind](https://kind.sigs.k8s.io/) or
-[Minikube](https://minikube.sigs.k8s.io/) cluster, you will need to pass two
-extra overrides in the Helm upgrade command shown below. These are already
-included in the instructions, so just keep them in..
+> [Minikube](https://minikube.sigs.k8s.io/) cluster, you will need to pass two
+> extra overrides in the Helm upgrade command shown below. These are already
+> included in the instructions, so just keep them in.
 
 
 ## What gets deployed
@@ -73,7 +73,7 @@ Expected output:
 
 ```
 NAME                                  CREATED AT
-clusterspiffeids.spire.spiffe.io      2025-XX-XXTXX:XX:XXZ
+clusterspiffeids.spire.spiffe.io      2026-06-01T16:22:32Z
 ```
 
 ## Step 2 - Upgrade the Helm release with SPIRE enabled
@@ -87,7 +87,7 @@ kubelet certificates, you can omit them.
 
 ```bash
 helm upgrade agentcube manifests/charts/base \
-  -n agentcube-system \
+  -n agentcube \
   --reuse-values \
   --set spire.enabled=true \
   --set spire.agent.insecureBootstrap=true \
@@ -100,10 +100,10 @@ This single command deploys the full SPIRE infrastructure **and** injects the
 Wait for everything to become ready:
 
 ```bash
-kubectl rollout status statefulset/spire-server -n agentcube-system --timeout=120s
-kubectl rollout status daemonset/spire-agent -n agentcube-system --timeout=120s
-kubectl rollout status deployment/agentcube-router -n agentcube-system --timeout=120s
-kubectl rollout status deployment/workloadmanager -n agentcube-system --timeout=120s
+kubectl rollout status statefulset/spire-server -n agentcube --timeout=120s
+kubectl rollout status daemonset/spire-agent -n agentcube --timeout=120s
+kubectl rollout status deployment/agentcube-router -n agentcube --timeout=120s
+kubectl rollout status deployment/workloadmanager -n agentcube --timeout=120s
 ```
 
 ## Step 3 - Verify SPIRE is healthy
@@ -111,16 +111,16 @@ kubectl rollout status deployment/workloadmanager -n agentcube-system --timeout=
 Check that the SPIRE Server is up and has registered agents:
 
 ```bash
-kubectl exec -n agentcube-system statefulset/spire-server -c spire-server -- \
+kubectl exec -n agentcube statefulset/spire-server -c spire-server -- \
   /opt/spire/bin/spire-server agent list
 ```
 
 You should see at least one agent entry (one per cluster node):
 
 ```
-Found X attested agent(s):
+Found 1 attested agent(s):
 
-SPIFFE ID         : spiffe://cluster.local/spire/agent/k8s_psat/agentcube-cluster/...
+SPIFFE ID         : spiffe://cluster.local/spire/agent/k8s_psat/agentcube-cluster/67790303-3657-42d6-bf4f-c3833ec6dd5e
 Attestation type  : k8s_psat
 ...
 ```
@@ -129,23 +129,23 @@ Next, confirm the identity registrations were picked up from the
 `ClusterSPIFFEID` resources:
 
 ```bash
-kubectl exec -n agentcube-system statefulset/spire-server -c spire-server -- \
+kubectl exec -n agentcube statefulset/spire-server -c spire-server -- \
   /opt/spire/bin/spire-server entry show
 ```
 
 You should see entries for both the Router and WorkloadManager, with SPIFFE IDs
 following the format
-`spiffe://cluster.local/ns/agentcube-system/sa/<service-account>`:
+`spiffe://cluster.local/ns/agentcube/sa/<service-account>`:
 
 ```
-Entry ID         : ...
-SPIFFE ID        : spiffe://cluster.local/ns/agentcube-system/sa/agentcube-router
-Parent ID        : ...
+Entry ID         : bfd507ec-10d8-43e5-b984-861a3ff81167
+SPIFFE ID        : spiffe://cluster.local/ns/agentcube/sa/agentcube-router
+Parent ID        : spiffe://cluster.local/spire/agent/k8s_psat/agentcube-cluster/67790303-3657-42d6-bf4f-c3833ec6dd5e
 Revision         : 0
 
-Entry ID         : ...
-SPIFFE ID        : spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager
-Parent ID        : ...
+Entry ID         : 21e3ba6f-ad13-4076-9e08-90a2d4ff518f
+SPIFFE ID        : spiffe://cluster.local/ns/agentcube/sa/workloadmanager
+Parent ID        : spiffe://cluster.local/spire/agent/k8s_psat/agentcube-cluster/67790303-3657-42d6-bf4f-c3833ec6dd5e
 Revision         : 0
 ```
 
@@ -155,46 +155,47 @@ Confirm that both the Router and WorkloadManager pods now show `2/2` containers
 (the main container + the `spiffe-helper` sidecar):
 
 ```bash
-kubectl get pods -n agentcube-system
+kubectl get pods -n agentcube
 ```
 
 Expected output:
 
 ```
-NAME                                READY   STATUS    RESTARTS   AGE
-agentcube-router-7f8d4b9c6-xxxxx    2/2     Running   0          2m
-workloadmanager-8c7dd85f9-xxxxx     2/2     Running   0          2m
-spire-agent-xxxxx                   1/1     Running   0          2m
-spire-server-0                      2/2     Running   0          2m
+NAME                               READY   STATUS    RESTARTS        AGE
+agentcube-router-574d98b76-tr2nr   2/2     Running   5 (2m24s ago)   3m17s
+spire-agent-8r9jx                  1/1     Running   3 (2m44s ago)   3m17s
+spire-server-0                     2/2     Running   0               3m17s
+workloadmanager-5797888bd4-jm2qj   2/2     Running   3 (118s ago)    3m17s
 ```
 
 Check the Router logs to confirm mTLS is active. You should see a log line
 indicating it is waiting for, and then successfully loading, the certificates:
 
 ```bash
-kubectl logs -n agentcube-system deployment/agentcube-router -c agentcube-router | grep -i mtls
+kubectl logs -n agentcube deployment/agentcube-router -c agentcube-router | grep -i mtls
 ```
 
 Expected output:
 
 ```
-Waiting for Router mTLS cert/key/CA files
-All mTLS cert/key/CA files are present
-Router→WorkloadManager mTLS enabled: expecting server SPIFFE ID spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager
+I0601 16:25:21.444099       1 main.go:64] Waiting for Router mTLS cert/key/CA files
+I0601 16:25:21.444259       1 wait.go:46] All mTLS cert/key/CA files are present
+I0601 16:25:21.445161       1 session_manager.go:84] Using https:// for WORKLOAD_MANAGER_URL because mTLS is configured
+I0601 16:25:21.445482       1 session_manager.go:93] Router→WorkloadManager mTLS enabled: expecting server SPIFFE ID spiffe://cluster.local/ns/agentcube/sa/workloadmanager
 ```
 
 Do the same for the WorkloadManager:
 
 ```bash
-kubectl logs -n agentcube-system deployment/workloadmanager -c workloadmanager | grep -i mtls
+kubectl logs -n agentcube deployment/workloadmanager -c workloadmanager | grep -i mtls
 ```
 
 Expected output:
 
 ```
-Waiting for WorkloadManager mTLS cert/key/CA files
-All mTLS cert/key/CA files are present
-WorkloadManager mTLS enabled: accepting clients with valid SPIRE-provisioned certificates
+I0601 16:25:22.561316       1 main.go:80] Waiting for WorkloadManager mTLS cert/key/CA files
+I0601 16:25:22.561931       1 wait.go:46] All mTLS cert/key/CA files are present
+I0601 16:25:22.678777       1 server.go:218] WorkloadManager mTLS enabled: accepting clients with valid SPIRE-provisioned certificates
 ```
 
 ## Step 5 - Test it end-to-end
@@ -230,18 +231,22 @@ spec:
 EOF
 ```
 
-Port-forward the Router and send a request:
+Open a new terminal and port-forward the Router:
+
+```bash
+kubectl port-forward -n agentcube svc/agentcube-router 8080:8080
+```
+
+In your original terminal, send a request to the root path of the sandbox:
 
 ```bash
-kubectl port-forward -n agentcube-system svc/agentcube-router 8080:8080 &
-curl -s -o /dev/null -w "%{http_code}" \
-  http://localhost:8080/v1/namespaces/default/agent-runtimes/mtls-test/invocations/test/
+curl -i http://localhost:8080/v1/namespaces/default/agent-runtimes/mtls-test/invocations/
 ```
 
 If the mTLS handshake between Router and WorkloadManager succeeds, you will see
-a `200` (or `502` while the sandbox is still booting - retry after a few
-seconds). A TLS-related error in the Router logs would indicate a
-misconfiguration.
+a `200 OK` response with a directory listing from the python server (or a `502`
+while the sandbox is still booting - just retry after a few seconds). A
+TLS-related error in the Router logs would indicate a misconfiguration.
 
 ## Understanding what changed
 
@@ -262,7 +267,7 @@ The Helm chart passes these flags to the Router binary:
 When all three are present, the Router creates a dedicated HTTPS transport for
 its WorkloadManager connection. It verifies that the WorkloadManager's
 certificate contains the expected SPIFFE ID
-(`spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager`).
+(`spiffe://cluster.local/ns/agentcube/sa/workloadmanager`).
 
 ### WorkloadManager (mTLS server)
 
@@ -307,7 +312,7 @@ plane components, run the Helm upgrade again with `spire.enabled=false`:
 
 ```bash
 helm upgrade agentcube manifests/charts/base \
-  -n agentcube-system \
+  -n agentcube \
   --reuse-values \
   --set spire.enabled=false
 ```
diff --git a/pkg/mtls/loader_test.go b/pkg/mtls/loader_test.go
index 27517e10..793c75c6 100644
--- a/pkg/mtls/loader_test.go
+++ b/pkg/mtls/loader_test.go
@@ -263,7 +263,7 @@ func TestLoadServerConfig_InvalidCAPEM(t *testing.T) {
 // --- SPIFFE ID verification ---
 
 func TestVerifyServerCert_MatchingID(t *testing.T) {
-	spiffeID := "spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager"
+	spiffeID := "spiffe://cluster.local/ns/agentcube/sa/workloadmanager"
 	certFile, _, caFile := generateTestCertsWithSPIFFEID(t, spiffeID)
 
 	rawCert := readRawCert(t, certFile)
@@ -282,7 +282,7 @@ func TestVerifyServerCert_MatchingID(t *testing.T) {
 }
 
 func TestVerifyServerCert_UntrustedCA(t *testing.T) {
-	spiffeID := "spiffe://cluster.local/ns/agentcube-system/sa/workloadmanager"
+	spiffeID := "spiffe://cluster.local/ns/agentcube/sa/workloadmanager"
 	certFile, _, _ := generateTestCertsWithSPIFFEID(t, spiffeID)
 
 	// Use a DIFFERENT CA — chain verification should fail
diff --git a/pkg/mtls/spiffeid.go b/pkg/mtls/spiffeid.go
index 0e511ab0..287b7592 100644
--- a/pkg/mtls/spiffeid.go
+++ b/pkg/mtls/spiffeid.go
@@ -25,7 +25,7 @@ import (
 const (
 	defaultTrustDomain = "cluster.local"
 	trustDomainEnvVar  = "AGENTCUBE_SPIFFE_TRUST_DOMAIN"
-	defaultNamespace   = "agentcube-system"
+	defaultNamespace   = "agentcube"
 	namespaceEnvVar    = "AGENTCUBE_NAMESPACE"
 )
 
@@ -33,7 +33,7 @@ const (
 // These follow the Istio-convention format: spiffe://<trust-domain>/ns/<namespace>/sa/<service-account>.
 // The trust domain defaults to cluster.local and can be overridden with AGENTCUBE_SPIFFE_TRUST_DOMAIN
 // to match the SPIRE trust domain configured by deployment tooling.
-// The namespace defaults to agentcube-system and can be overridden with AGENTCUBE_NAMESPACE.
+// The namespace defaults to agentcube and can be overridden with AGENTCUBE_NAMESPACE.
 var (
 	// RouterSPIFFEID is the SPIFFE identity for the Router component.
 	RouterSPIFFEID = componentSPIFFEID(configuredTrustDomain(), configuredNamespace(), "agentcube-router")
diff --git a/pkg/mtls/spiffeid_test.go b/pkg/mtls/spiffeid_test.go
index bdd328d1..71baede8 100644
--- a/pkg/mtls/spiffeid_test.go
+++ b/pkg/mtls/spiffeid_test.go
@@ -43,8 +43,8 @@ func TestConfiguredNamespace(t *testing.T) {
 }
 
 func TestComponentSPIFFEID(t *testing.T) {
-	got := componentSPIFFEID("example.org", "agentcube-system", "agentcube-router")
-	want := "spiffe://example.org/ns/agentcube-system/sa/agentcube-router"
+	got := componentSPIFFEID("example.org", "agentcube", "agentcube-router")
+	want := "spiffe://example.org/ns/agentcube/sa/agentcube-router"
 	if got != want {
 		t.Fatalf("componentSPIFFEID() = %q, want %q", got, want)
 	}
diff --git a/pkg/router/session_manager_test.go b/pkg/router/session_manager_test.go
index 8b585f4b..0b61f28d 100644
--- a/pkg/router/session_manager_test.go
+++ b/pkg/router/session_manager_test.go
@@ -636,7 +636,7 @@ func generateTestCertsForRouter(t *testing.T, dir string) (certFile, keyFile, ca
 	if err != nil {
 		t.Fatalf("generate leaf key: %v", err)
 	}
-	spiffeURL, _ := url.Parse("spiffe://cluster.local/ns/agentcube-system/sa/agentcube-router")
+	spiffeURL, _ := url.Parse("spiffe://cluster.local/ns/agentcube/sa/agentcube-router")
 	leafTemplate := &x509.Certificate{
 		SerialNumber: big.NewInt(2),
 		Subject:      pkix.Name{Organization: []string{"Test Router"}},
diff --git a/test/e2e/run_e2e.sh b/test/e2e/run_e2e.sh
index 0c215718..e14c1c5b 100755
--- a/test/e2e/run_e2e.sh
+++ b/test/e2e/run_e2e.sh
@@ -18,7 +18,7 @@ WORKLOAD_MANAGER_IMAGE=${WORKLOAD_MANAGER_IMAGE:-workloadmanager:latest}
 ROUTER_IMAGE=${ROUTER_IMAGE:-agentcube-router:latest}
 PICOD_IMAGE=${PICOD_IMAGE:-picod:latest}
 REDIS_IMAGE=${REDIS_IMAGE:-redis:7-alpine}
-AGENTCUBE_NAMESPACE=${AGENTCUBE_NAMESPACE:-agentcube-system}
+AGENTCUBE_NAMESPACE=${AGENTCUBE_NAMESPACE:-agentcube}
 WORKLOAD_NAMESPACE=${WORKLOAD_NAMESPACE:-agentcube}
 E2E_VENV_DIR=${E2E_VENV_DIR:-/tmp/agentcube-e2e-venv}
 MCP_K8S_LOCAL_PORT=${MCP_K8S_LOCAL_PORT:-19446}