diff --git a/Makefile.operator b/Makefile.operator index 7a604635..75bd3457 100644 --- a/Makefile.operator +++ b/Makefile.operator @@ -28,8 +28,8 @@ help: ## Display this help message. ##@ Development -manifests: controller-gen ## Generate ClusterRole and CustomResourceDefinition objects. - $(CONTROLLER_GEN) rbac:roleName=mcp-runtime-operator-role crd webhook paths="./api/..." output:crd:artifacts:config=config/crd/bases +manifests: controller-gen ## Generate ClusterRole, CustomResourceDefinition, and webhook objects. + $(CONTROLLER_GEN) rbac:roleName=mcp-runtime-operator-role crd webhook paths="./api/..." output:crd:artifacts:config=config/crd/bases output:webhook:artifacts:config=config/webhook generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./api/..." diff --git a/api/v1alpha1/access_types.go b/api/v1alpha1/access_types.go index 0a7b01f7..0a9c70e2 100644 --- a/api/v1alpha1/access_types.go +++ b/api/v1alpha1/access_types.go @@ -56,6 +56,7 @@ type MCPAccessGrantStatus struct { // +kubebuilder:printcolumn:name="Trust",type="string",JSONPath=".spec.maxTrust" // +kubebuilder:printcolumn:name="Disabled",type="boolean",JSONPath=".spec.disabled" // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpaccessgrant,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpaccessgrants,verbs=create;update,versions=v1alpha1,name=vmcpaccessgrant.kb.io,admissionReviewVersions=v1,serviceName=mcp-runtime-operator-webhook-service,serviceNamespace=mcp-runtime,servicePort=443 // MCPAccessGrant grants a human or agent access to an MCPServer. type MCPAccessGrant struct { @@ -105,6 +106,7 @@ type MCPAgentSessionStatus struct { // +kubebuilder:printcolumn:name="Revoked",type="boolean",JSONPath=".spec.revoked" // +kubebuilder:printcolumn:name="Expires",type="string",JSONPath=".spec.expiresAt" // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpagentsession,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpagentsessions,verbs=create;update,versions=v1alpha1,name=vmcpagentsession.kb.io,admissionReviewVersions=v1,serviceName=mcp-runtime-operator-webhook-service,serviceNamespace=mcp-runtime,servicePort=443 // MCPAgentSession stores consent and upstream token state for an agent session. type MCPAgentSession struct { diff --git a/api/v1alpha1/mcpserver_types.go b/api/v1alpha1/mcpserver_types.go index 48d97fc3..8178a2b5 100644 --- a/api/v1alpha1/mcpserver_types.go +++ b/api/v1alpha1/mcpserver_types.go @@ -328,6 +328,8 @@ type MCPServerStatus struct { // +kubebuilder:printcolumn:name="Gateway",type="boolean",JSONPath=".status.gatewayReady" // +kubebuilder:printcolumn:name="Ready",type="boolean",JSONPath=".status.deploymentReady" // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:webhook:path=/mutate-mcpruntime-org-v1alpha1-mcpserver,mutating=true,failurePolicy=ignore,sideEffects=None,groups=mcpruntime.org,resources=mcpservers,verbs=create;update,versions=v1alpha1,name=mmcpserver.kb.io,admissionReviewVersions=v1,serviceName=mcp-runtime-operator-webhook-service,serviceNamespace=mcp-runtime,servicePort=443 +// +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpserver,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpservers,verbs=create;update,versions=v1alpha1,name=vmcpserver.kb.io,admissionReviewVersions=v1,serviceName=mcp-runtime-operator-webhook-service,serviceNamespace=mcp-runtime,servicePort=443 // MCPServer is the Schema for the mcpservers API. type MCPServer struct { diff --git a/api/v1alpha1/validation.go b/api/v1alpha1/validation.go index b4d1c26a..66c4277e 100644 --- a/api/v1alpha1/validation.go +++ b/api/v1alpha1/validation.go @@ -5,7 +5,6 @@ import ( "fmt" "strconv" "strings" - "time" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" @@ -16,11 +15,10 @@ import ( ) var ( - _ admission.Defaulter[*MCPServer] = mcpServerWebhook{} - _ admission.Validator[*MCPServer] = mcpServerWebhook{} - _ admission.Validator[*MCPAccessGrant] = mcpAccessGrantValidator{} - _ admission.Validator[*MCPAgentSession] = mcpAgentSessionValidator{} - nowFunc = time.Now + _ admission.Defaulter[*MCPServer] = mcpServerWebhook{} + _ admission.Validator[*MCPServer] = mcpServerWebhook{} + _ admission.Validator[*MCPAccessGrant] = mcpAccessGrantValidator{} + _ admission.Validator[*MCPAgentSession] = mcpAgentSessionValidator{} ) const ( @@ -81,8 +79,23 @@ func gatewayEnabled(spec MCPServerSpec) bool { return spec.Gateway != nil && spec.Gateway.Enabled } -// +kubebuilder:webhook:path=/mutate-mcpruntime-org-v1alpha1-mcpserver,mutating=true,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpservers,verbs=create;update,versions=v1alpha1,name=mmcpserver.kb.io,admissionReviewVersions=v1 +// MCPServerDefaultOptions holds operator-scoped values that the admission +// webhook can use while defaulting MCPServer objects. +type MCPServerDefaultOptions struct { + DefaultIngressHost string + DefaultAnalyticsIngestURL string +} + func (r *MCPServer) Default() { + r.DefaultWithOptions(MCPServerDefaultOptions{}) +} + +// DefaultWithOptions applies MCPServer defaults, including operator-configured +// fallbacks when the webhook is registered by the operator manager. +func (r *MCPServer) DefaultWithOptions(options MCPServerDefaultOptions) { + ingressHostUnset := strings.TrimSpace(r.Spec.IngressHost) == "" + publicPathPrefixUnset := strings.TrimSpace(r.Spec.PublicPathPrefix) == "" + if strings.TrimSpace(r.Spec.ImageTag) == "" && !imageHasTagOrDigest(strings.TrimSpace(r.Spec.Image)) { r.Spec.ImageTag = defaultImageTag } @@ -105,6 +118,9 @@ func (r *MCPServer) Default() { if strings.TrimSpace(r.Spec.IngressClass) == "" { r.Spec.IngressClass = defaultIngressClass } + if ingressHostUnset && publicPathPrefixUnset { + r.Spec.IngressHost = strings.TrimSpace(options.DefaultIngressHost) + } if gatewayEnabled(r.Spec) { if r.Spec.Auth == nil { @@ -194,6 +210,9 @@ func (r *MCPServer) Default() { if strings.TrimSpace(r.Spec.Analytics.EventType) == "" { r.Spec.Analytics.EventType = defaultAnalyticsEventType } + if strings.TrimSpace(r.Spec.Analytics.IngestURL) == "" { + r.Spec.Analytics.IngestURL = strings.TrimSpace(options.DefaultAnalyticsIngestURL) + } } if r.Spec.Rollout != nil { @@ -209,18 +228,23 @@ func (r *MCPServer) Default() { } } -// +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpserver,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpservers,verbs=create;update,versions=v1alpha1,name=vmcpserver.kb.io,admissionReviewVersions=v1 func (r *MCPServer) SetupWebhookWithManager(mgr ctrl.Manager) error { + return r.SetupWebhookWithManagerWithOptions(mgr, MCPServerDefaultOptions{}) +} + +func (r *MCPServer) SetupWebhookWithManagerWithOptions(mgr ctrl.Manager, options MCPServerDefaultOptions) error { return ctrl.NewWebhookManagedBy(mgr, r). - WithDefaulter(mcpServerWebhook{}). - WithValidator(mcpServerWebhook{}). + WithDefaulter(mcpServerWebhook{defaultOptions: options}). + WithValidator(mcpServerWebhook{defaultOptions: options}). Complete() } -type mcpServerWebhook struct{} +type mcpServerWebhook struct { + defaultOptions MCPServerDefaultOptions +} -func (mcpServerWebhook) Default(_ context.Context, obj *MCPServer) error { - obj.Default() +func (w mcpServerWebhook) Default(_ context.Context, obj *MCPServer) error { + obj.DefaultWithOptions(w.defaultOptions) return nil } @@ -385,7 +409,6 @@ func validateRolloutValue(fieldPath *field.Path, value string) *field.Error { return nil } -// +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpaccessgrant,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpaccessgrants,verbs=create;update,versions=v1alpha1,name=vmcpaccessgrant.kb.io,admissionReviewVersions=v1 func (r *MCPAccessGrant) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr, r). WithValidator(mcpAccessGrantValidator{}). @@ -407,17 +430,29 @@ func (mcpAccessGrantValidator) ValidateDelete(_ context.Context, obj *MCPAccessG } func (r *MCPAccessGrant) ValidateCreate() (admission.Warnings, error) { - return nil, r.validate() + return r.wildcardSubjectWarnings(), r.validate() } func (r *MCPAccessGrant) ValidateUpdate(_ runtime.Object) (admission.Warnings, error) { - return nil, r.validate() + return r.wildcardSubjectWarnings(), r.validate() } func (r *MCPAccessGrant) ValidateDelete() (admission.Warnings, error) { return nil, nil } +func (r *MCPAccessGrant) wildcardSubjectWarnings() admission.Warnings { + subject := r.Spec.Subject + if strings.TrimSpace(subject.HumanID) != "" || + strings.TrimSpace(subject.AgentID) != "" || + strings.TrimSpace(subject.TeamID) != "" { + return nil + } + return admission.Warnings{ + "MCPAccessGrant subject is empty (wildcard grant): the gateway matches any authenticated principal for the server; adapter session creation still requires subject alignment with the caller", + } +} + func (r *MCPAccessGrant) validate() error { var allErrs field.ErrorList specPath := field.NewPath("spec") @@ -425,9 +460,6 @@ func (r *MCPAccessGrant) validate() error { if strings.TrimSpace(r.Spec.ServerRef.Name) == "" { allErrs = append(allErrs, field.Required(specPath.Child("serverRef", "name"), "serverRef.name is required")) } - if strings.TrimSpace(r.Spec.Subject.HumanID) == "" && strings.TrimSpace(r.Spec.Subject.AgentID) == "" && strings.TrimSpace(r.Spec.Subject.TeamID) == "" { - allErrs = append(allErrs, field.Required(specPath.Child("subject"), "one of subject.humanID, subject.agentID, or subject.teamID is required")) - } if err := validateTeamIDField(specPath.Child("subject", "teamID"), r.Spec.Subject.TeamID); err != nil { allErrs = append(allErrs, err) } @@ -475,7 +507,6 @@ func (r *MCPAccessGrant) validate() error { return apierrors.NewInvalid(schema.GroupKind{Group: GroupVersion.Group, Kind: "MCPAccessGrant"}, r.Name, allErrs) } -// +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpagentsession,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpagentsessions,verbs=create;update,versions=v1alpha1,name=vmcpagentsession.kb.io,admissionReviewVersions=v1 func (r *MCPAgentSession) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr, r). WithValidator(mcpAgentSessionValidator{}). @@ -521,10 +552,6 @@ func (r *MCPAgentSession) validate() error { if err := validateTeamIDField(specPath.Child("subject", "teamID"), r.Spec.Subject.TeamID); err != nil { allErrs = append(allErrs, err) } - now := nowFunc().UTC() - if r.Spec.ExpiresAt != nil && !r.Spec.ExpiresAt.Time.After(now) { - allErrs = append(allErrs, field.Invalid(specPath.Child("expiresAt"), r.Spec.ExpiresAt.Time.Format(time.RFC3339), "expiresAt must be in the future")) - } if ref := r.Spec.UpstreamTokenSecretRef; ref != nil { if strings.TrimSpace(ref.Name) == "" { allErrs = append(allErrs, field.Required(specPath.Child("upstreamTokenSecretRef", "name"), "secret name is required")) diff --git a/api/v1alpha1/validation_test.go b/api/v1alpha1/validation_test.go index a5e09dc2..7790cc2b 100644 --- a/api/v1alpha1/validation_test.go +++ b/api/v1alpha1/validation_test.go @@ -62,6 +62,39 @@ func TestMCPAccessGrantValidateAllowsTeamOnlySubject(t *testing.T) { } } +func TestMCPAccessGrantValidateAllowsWildcardSubject(t *testing.T) { + grant := &MCPAccessGrant{ + ObjectMeta: metav1.ObjectMeta{Name: "grant"}, + Spec: MCPAccessGrantSpec{ + ServerRef: ServerReference{Name: "payments"}, + }, + } + + if err := grant.validate(); err != nil { + t.Fatalf("expected empty subject to remain a wildcard grant, got %v", err) + } +} + +func TestMCPAccessGrantValidateCreateWarnsOnWildcardSubject(t *testing.T) { + grant := &MCPAccessGrant{ + ObjectMeta: metav1.ObjectMeta{Name: "grant"}, + Spec: MCPAccessGrantSpec{ + ServerRef: ServerReference{Name: "payments"}, + }, + } + + warnings, err := grant.ValidateCreate() + if err != nil { + t.Fatalf("expected wildcard grant to validate, got %v", err) + } + if len(warnings) != 1 { + t.Fatalf("expected one wildcard warning, got %d: %v", len(warnings), warnings) + } + if !strings.Contains(warnings[0], "wildcard grant") { + t.Fatalf("expected wildcard warning, got %q", warnings[0]) + } +} + func TestMCPAccessGrantValidateRejectsWhitespaceTeamID(t *testing.T) { grant := &MCPAccessGrant{ ObjectMeta: metav1.ObjectMeta{Name: "grant"}, @@ -80,30 +113,19 @@ func TestMCPAccessGrantValidateRejectsWhitespaceTeamID(t *testing.T) { } } -func TestMCPAgentSessionValidateUsesInjectedTimeSource(t *testing.T) { - fixedNow := time.Date(2026, time.March, 25, 12, 0, 0, 0, time.UTC) - originalNowFunc := nowFunc - nowFunc = func() time.Time { return fixedNow } - t.Cleanup(func() { - nowFunc = originalNowFunc - }) - +func TestMCPAgentSessionValidateAllowsExpiredSessionState(t *testing.T) { session := &MCPAgentSession{ ObjectMeta: metav1.ObjectMeta{Name: "session"}, Spec: MCPAgentSessionSpec{ ServerRef: ServerReference{Name: "payments"}, Subject: SubjectRef{AgentID: "ops-agent"}, ConsentedTrust: TrustLevelMedium, - ExpiresAt: &metav1.Time{Time: fixedNow}, + ExpiresAt: &metav1.Time{Time: time.Date(2026, time.March, 25, 12, 0, 0, 0, time.UTC)}, }, } - err := session.validate() - if err == nil { - t.Fatal("expected validation error for expired session") - } - if !strings.Contains(err.Error(), "expiresAt") { - t.Fatalf("expected expiresAt validation error, got %v", err) + if err := session.validate(); err != nil { + t.Fatalf("expired sessions should remain valid persisted state: %v", err) } } @@ -179,6 +201,29 @@ func TestMCPServerDefault(t *testing.T) { } } +func TestMCPServerDefaultWithOptions(t *testing.T) { + server := &MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test-server"}, + Spec: MCPServerSpec{ + Image: "example.com/mcp-server", + Gateway: &GatewayConfig{Enabled: true}, + Analytics: &AnalyticsConfig{}, + }, + } + + server.DefaultWithOptions(MCPServerDefaultOptions{ + DefaultIngressHost: "mcp.example.com", + DefaultAnalyticsIngestURL: "http://mcp-sentinel-ingest.mcp-sentinel.svc.cluster.local:8081/events", + }) + + if server.Spec.IngressHost != "mcp.example.com" { + t.Fatalf("expected ingressHost default from options, got %q", server.Spec.IngressHost) + } + if server.Spec.Analytics == nil || server.Spec.Analytics.IngestURL != "http://mcp-sentinel-ingest.mcp-sentinel.svc.cluster.local:8081/events" { + t.Fatalf("expected analytics ingest URL default from options, got %#v", server.Spec.Analytics) + } +} + func TestMCPServerDefaultGatewayAuthTeamHeader(t *testing.T) { server := &MCPServer{ ObjectMeta: metav1.ObjectMeta{Name: "test-server"}, diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index a7d95428..5c449937 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -365,6 +365,21 @@ func (in *MCPServer) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MCPServerDefaultOptions) DeepCopyInto(out *MCPServerDefaultOptions) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MCPServerDefaultOptions. +func (in *MCPServerDefaultOptions) DeepCopy() *MCPServerDefaultOptions { + if in == nil { + return nil + } + out := new(MCPServerDefaultOptions) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MCPServerList) DeepCopyInto(out *MCPServerList) { *out = *in diff --git a/cmd/operator/main.go b/cmd/operator/main.go index c64d998a..cbf5d733 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -72,10 +72,17 @@ func main() { } if webhooksEnabledFromEnv(os.Getenv) { + mcpServerWebhookOptions := mcpv1alpha1.MCPServerDefaultOptions{ + DefaultIngressHost: os.Getenv("MCP_DEFAULT_INGRESS_HOST"), + DefaultAnalyticsIngestURL: analyticsIngestURLFromEnv(os.Getenv), + } + if err := (&mcpv1alpha1.MCPServer{}).SetupWebhookWithManagerWithOptions(mgr, mcpServerWebhookOptions); err != nil { + setupLog.Error(err, "unable to create webhook") + os.Exit(1) + } for _, resource := range []interface { SetupWebhookWithManager(ctrl.Manager) error }{ - &mcpv1alpha1.MCPServer{}, &mcpv1alpha1.MCPAccessGrant{}, &mcpv1alpha1.MCPAgentSession{}, } { diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml new file mode 100644 index 00000000..98a66d3a --- /dev/null +++ b/config/webhook/kustomization.yaml @@ -0,0 +1,6 @@ +# Standalone `kubectl apply -k config/webhook` does not inject clientConfig.caBundle. +# Use `mcp-runtime setup`, which reads ca.crt from the operator webhook TLS Secret, +# or manually patch Mutating/ValidatingWebhookConfiguration objects after apply. +resources: +- manifests.yaml +- service.yaml diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml new file mode 100644 index 00000000..8735fca0 --- /dev/null +++ b/config/webhook/manifests.yaml @@ -0,0 +1,96 @@ +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: mcp-runtime-operator-webhook-service + namespace: mcp-runtime + path: /mutate-mcpruntime-org-v1alpha1-mcpserver + port: 443 + failurePolicy: Ignore + name: mmcpserver.kb.io + rules: + - apiGroups: + - mcpruntime.org + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - mcpservers + sideEffects: None +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: validating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: mcp-runtime-operator-webhook-service + namespace: mcp-runtime + path: /validate-mcpruntime-org-v1alpha1-mcpaccessgrant + port: 443 + failurePolicy: Fail + name: vmcpaccessgrant.kb.io + rules: + - apiGroups: + - mcpruntime.org + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - mcpaccessgrants + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: mcp-runtime-operator-webhook-service + namespace: mcp-runtime + path: /validate-mcpruntime-org-v1alpha1-mcpagentsession + port: 443 + failurePolicy: Fail + name: vmcpagentsession.kb.io + rules: + - apiGroups: + - mcpruntime.org + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - mcpagentsessions + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: mcp-runtime-operator-webhook-service + namespace: mcp-runtime + path: /validate-mcpruntime-org-v1alpha1-mcpserver + port: 443 + failurePolicy: Fail + name: vmcpserver.kb.io + rules: + - apiGroups: + - mcpruntime.org + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - mcpservers + sideEffects: None diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml new file mode 100644 index 00000000..790035a1 --- /dev/null +++ b/config/webhook/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: mcp-runtime-operator-webhook-service + namespace: mcp-runtime +spec: + ports: + - name: https + port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager diff --git a/docs/api.md b/docs/api.md index 5bdef345..63ce96fb 100644 --- a/docs/api.md +++ b/docs/api.md @@ -117,7 +117,10 @@ spec: `MCPServer.spec.teamID` records the owning platform team. `SubjectRef` has `humanID`, `agentID`, and `teamID`; the gateway matches every non-empty subject -field exactly. A grant with only `subject.teamID` applies to any authenticated +field exactly. An all-empty `subject` is a wildcard grant (the validating +webhook emits a warning): the gateway matches any authenticated principal for +the server, while adapter session creation still requires subject alignment +with the caller. A grant with only `subject.teamID` applies to any authenticated principal from that team when trusted header or OAuth team identity is present. See [Multi-team isolation](multi-team.md). diff --git a/docs/internals/api-types.md b/docs/internals/api-types.md index d9f04265..40d113b9 100644 --- a/docs/internals/api-types.md +++ b/docs/internals/api-types.md @@ -35,7 +35,7 @@ Key spec areas: | Area | Fields | Contributor notes | |---|---|---| | Image | `image`, `imageTag`, `registryOverride`, `useProvisionedRegistry`, `imagePullSecrets` | Reconciled by the operator into Deployment image refs and pull secrets. Keep registry behavior aligned with setup and metadata generation. | -| Scale and ports | `replicas`, `port`, `servicePort` | Defaults are applied by the operator; CRD schema should allow unset optional fields when defaults exist. | +| Scale and ports | `replicas`, `port`, `servicePort` | Defaults are applied by the admission webhook; CRD schema should allow unset optional fields when defaults exist. | | Routing | `ingressHost`, `publicPathPrefix`, `ingressPath`, `ingressClass`, `ingressAnnotations` | Host-based and hostless path-based routing both matter. E2E should cover public path changes. | | Runtime config | `envVars`, `secretEnvVars`, `resources` | Converted into pod container env and resource requirements. | | Inventory | `tools`, `prompts`, `mcpResources`, `tasks` | Used by gateway policy and UI/API surfaces. | @@ -107,16 +107,26 @@ Shared enums include: Keep enum values stable once published. If a value must be renamed, add a migration path and update examples, generated CRDs, UI/API validation, and e2e. -## Webhooks and Validation +## Webhooks, Defaults, and Validation -Validation methods live with the API package and are registered through -controller-runtime webhook setup. They should enforce invariants that must be -true no matter which client creates the object. +Defaulting and validation methods live with the API package and are registered +through controller-runtime webhook setup. They should enforce invariants that +must be true no matter which client creates or updates the object. + +`MCPServer` defaults are applied by the mutating admission webhook. The +reconciler still applies the same defaults to an in-memory copy before rendering +resources so legacy objects created before the webhook existed continue to +reconcile, but it should not persist those defaults back into the API object. Use validation for object-level API correctness, not runtime availability. For example, malformed policy decisions belong in validation; whether an image is pullable belongs in setup/doctor diagnostics and Kubernetes status. +Generated webhook configuration lives under `config/webhook/`. The setup flow +creates the webhook TLS secret, enables webhook serving on the operator +deployment, and injects the generated CA bundle into the webhook configuration +before applying it. + ## Generated Files `zz_generated.deepcopy.go` is generated and should not be hand-edited. Regenerate diff --git a/docs/internals/cmd-operator.md b/docs/internals/cmd-operator.md index 294740d7..3ac61fe0 100644 --- a/docs/internals/cmd-operator.md +++ b/docs/internals/cmd-operator.md @@ -20,6 +20,7 @@ go doc -all ./internal/operator - configures controller-runtime logging - creates the manager - registers the `MCPServerReconciler` +- registers admission webhooks when `MCP_ENABLE_WEBHOOKS` is enabled - installs health and readiness probes - starts the manager with signal handling @@ -31,7 +32,7 @@ Keep this package focused on process wiring. Reconciliation behavior belongs in `MCPServerReconciler` follows a predictable loop: 1. Fetch the `MCPServer`. -2. Apply defaults and persist defaulted spec fields when needed. +2. Apply defaults to an in-memory copy for reconcile-time rendering. 3. Validate routing prerequisites. 4. Reconcile the Deployment. 5. Reconcile the Service. @@ -45,8 +46,9 @@ normal garbage collection cleans them up with the `MCPServer`. ## Defaults -Default values are intentionally centralized in `internal/operator` constants. -Current defaults include: +Default values are intentionally centralized in `api/v1alpha1` so the admission +webhook and reconciler fallback share the same behavior. Current defaults +include: | Setting | Default | |---|---| @@ -62,8 +64,8 @@ Current defaults include: | CPU limit | `500m` | | memory limit | `256Mi` | -If you change a default, update API docs, examples, tests, and any CLI metadata -generation that emits the same field. +If you change a default, update API docs, examples, webhook/reconciler tests, +and any CLI metadata generation that emits the same field. ## Deployment Reconciliation diff --git a/docs/internals/go-package-reference.md b/docs/internals/go-package-reference.md index 16a99bd5..1e43c72c 100644 --- a/docs/internals/go-package-reference.md +++ b/docs/internals/go-package-reference.md @@ -124,11 +124,16 @@ Package v1alpha1 contains API Schema definitions for the MCP server resource. - [`func (in *MCPServer) DeepCopyInto(out *MCPServer)`](#api-types-func-in-mcpserver-deepcopyinto-out-mcpserver) - [`func (in *MCPServer) DeepCopyObject() runtime.Object`](#api-types-func-in-mcpserver-deepcopyobject-runtime-object) - [`func (r *MCPServer) Default()`](#api-types-func-r-mcpserver-default) +- [`func (r *MCPServer) DefaultWithOptions(options MCPServerDefaultOptions)`](#api-types-func-r-mcpserver-defaultwithoptions-options-mcpserverdefaultoptions) - [`func (r *MCPServer) SetupWebhookWithManager(mgr ctrl.Manager) error`](#api-types-func-r-mcpserver-setupwebhookwithmanager-mgr-ctrl-manager-error) +- [`func (r *MCPServer) SetupWebhookWithManagerWithOptions(mgr ctrl.Manager, options MCPServerDefaultOptions) error`](#api-types-func-r-mcpserver-setupwebhookwithmanagerwithoptions-mgr-ctrl-manager-options-mcpserverdefaultoptions-error) - [`func (r *MCPServer) String() string`](#api-types-func-r-mcpserver-string-string) - [`func (r *MCPServer) ValidateCreate() (admission.Warnings, error)`](#api-types-func-r-mcpserver-validatecreate-admission-warnings-error) - [`func (r *MCPServer) ValidateDelete() (admission.Warnings, error)`](#api-types-func-r-mcpserver-validatedelete-admission-warnings-error) - [`func (r *MCPServer) ValidateUpdate(_ runtime.Object) (admission.Warnings, error)`](#api-types-func-r-mcpserver-validateupdate-runtime-object-admission-warnings-error) +- [`type MCPServerDefaultOptions struct`](#api-types-type-mcpserverdefaultoptions-struct) +- [`func (in *MCPServerDefaultOptions) DeepCopy() *MCPServerDefaultOptions`](#api-types-func-in-mcpserverdefaultoptions-deepcopy-mcpserverdefaultoptions) +- [`func (in *MCPServerDefaultOptions) DeepCopyInto(out *MCPServerDefaultOptions)`](#api-types-func-in-mcpserverdefaultoptions-deepcopyinto-out-mcpserverdefaultoptions) - [`type MCPServerList struct`](#api-types-type-mcpserverlist-struct) - [`func (in *MCPServerList) DeepCopy() *MCPServerList`](#api-types-func-in-mcpserverlist-deepcopy-mcpserverlist) - [`func (in *MCPServerList) DeepCopyInto(out *MCPServerList)`](#api-types-func-in-mcpserverlist-deepcopyinto-out-mcpserverlist) @@ -441,7 +446,6 @@ func (in *MCPAccessGrant) DeepCopyObject() runtime.Object ```text func (r *MCPAccessGrant) SetupWebhookWithManager(mgr ctrl.Manager) error - +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpaccessgrant,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpaccessgrants,verbs=create;update,versions=v1alpha1,name=vmcpaccessgrant.kb.io,admissionReviewVersions=v1 ``` @@ -599,7 +603,6 @@ func (in *MCPAgentSession) DeepCopyObject() runtime.Object ```text func (r *MCPAgentSession) SetupWebhookWithManager(mgr ctrl.Manager) error - +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpagentsession,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpagentsessions,verbs=create;update,versions=v1alpha1,name=vmcpagentsession.kb.io,admissionReviewVersions=v1 ``` @@ -756,14 +759,26 @@ func (in *MCPServer) DeepCopyObject() runtime.Object ```text func (r *MCPServer) Default() - +kubebuilder:webhook:path=/mutate-mcpruntime-org-v1alpha1-mcpserver,mutating=true,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpservers,verbs=create;update,versions=v1alpha1,name=mmcpserver.kb.io,admissionReviewVersions=v1 + +``` + + +```text +func (r *MCPServer) DefaultWithOptions(options MCPServerDefaultOptions) + DefaultWithOptions applies MCPServer defaults, including operator-configured + fallbacks when the webhook is registered by the operator manager. ``` ```text func (r *MCPServer) SetupWebhookWithManager(mgr ctrl.Manager) error - +kubebuilder:webhook:path=/validate-mcpruntime-org-v1alpha1-mcpserver,mutating=false,failurePolicy=fail,sideEffects=None,groups=mcpruntime.org,resources=mcpservers,verbs=create;update,versions=v1alpha1,name=vmcpserver.kb.io,admissionReviewVersions=v1 + +``` + + +```text +func (r *MCPServer) SetupWebhookWithManagerWithOptions(mgr ctrl.Manager, options MCPServerDefaultOptions) error ``` @@ -791,6 +806,33 @@ func (r *MCPServer) ValidateUpdate(_ runtime.Object) (admission.Warnings, error) ``` + +```text +type MCPServerDefaultOptions struct { + DefaultIngressHost string + DefaultAnalyticsIngestURL string +} + MCPServerDefaultOptions holds operator-scoped values that the admission + webhook can use while defaulting MCPServer objects. + +``` + + +```text +func (in *MCPServerDefaultOptions) DeepCopy() *MCPServerDefaultOptions + DeepCopy is an autogenerated deepcopy function, copying the receiver, + creating a new MCPServerDefaultOptions. + +``` + + +```text +func (in *MCPServerDefaultOptions) DeepCopyInto(out *MCPServerDefaultOptions) + DeepCopyInto is an autogenerated deepcopy function, copying the receiver, + writing into out. in must be non-nil. + +``` + ```text type MCPServerList struct { diff --git a/docs/internals/internal-cli.md b/docs/internals/internal-cli.md index 957db28a..2e8ce4d2 100644 --- a/docs/internals/internal-cli.md +++ b/docs/internals/internal-cli.md @@ -79,6 +79,9 @@ Important setup contracts: - Registry info is resolved before runtime images are named. - Internal registry pushes validate platform credentials, then use an in-cluster helper when direct host pushes are not appropriate. +- Operator setup prepares admission webhook TLS, enables webhook serving on the + manager deployment, and applies the generated webhook service/configuration + with the matching CA bundle. - Sentinel rollouts use `MCP_DEPLOYMENT_TIMEOUT`. - Setup verification should fail with diagnostic context instead of reporting success after partial deployment. diff --git a/docs/runtime.md b/docs/runtime.md index a4470290..f59f7964 100644 --- a/docs/runtime.md +++ b/docs/runtime.md @@ -156,7 +156,7 @@ integration examples. The operator is a single-controller `controller-runtime` manager: 1. Watches `MCPServer` (and owns Deployment / Service / Ingress). -2. On reconcile, fills defaults via `setDefaults`, persists spec changes if defaults were added. +2. Defaults and validates new API writes through admission webhooks; on reconcile, applies the same defaults to an in-memory copy for legacy objects without patching the stored spec. 3. Resolves the image string (respecting `imageTag`, `registryOverride`, and `PROVISIONED_REGISTRY_URL`). 4. Builds image-pull secrets, including auto-creating a docker-config secret from provisioned-registry env vars. 5. Reconciles Deployment → Service → Ingress in order. diff --git a/internal/cli/cluster/doctor/doctor.go b/internal/cli/cluster/doctor/doctor.go index 1471dc7a..8f540f44 100644 --- a/internal/cli/cluster/doctor/doctor.go +++ b/internal/cli/cluster/doctor/doctor.go @@ -231,6 +231,7 @@ func doctorCheckSpecs(kubectl core.KubectlRunner, distro Distribution) []doctorC }, {Name: "MCPServer CRD", Detail: "checking that the MCPServer API type is installed", Run: func() DoctorCheck { return checkMCPServerCRD(kubectl) }}, {Name: "operator readiness", Detail: "reading ready and desired replicas for the operator deployment", Run: func() DoctorCheck { return checkOperatorReady(kubectl) }}, + {Name: "operator webhook TLS expiry", Detail: "checking operator admission webhook serving certificate expiry", Run: func() DoctorCheck { return checkOperatorWebhookCertExpiry(kubectl) }}, {Name: "operator registry endpoint", Detail: "checking the operator uses a node-pullable registry endpoint", Run: func() DoctorCheck { return checkOperatorRegistryEndpoint(kubectl) }}, {Name: "operator reconcile errors (last 10m)", Detail: "scanning recent operator logs for reconcile failure patterns", Run: func() DoctorCheck { return checkOperatorRecentReconcileErrors(kubectl) }}, {Name: "operator ClusterRole rules", Detail: "verifying mcp-runtime-operator-role grants get/list/watch on the resources the informer cache needs", Run: func() DoctorCheck { return checkOperatorClusterRoleRules(kubectl) }}, diff --git a/internal/cli/cluster/doctor/doctor_test.go b/internal/cli/cluster/doctor/doctor_test.go index 84e1c3eb..47bf9376 100644 --- a/internal/cli/cluster/doctor/doctor_test.go +++ b/internal/cli/cluster/doctor/doctor_test.go @@ -2,9 +2,15 @@ package doctor import ( "context" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" "encoding/base64" + "encoding/pem" "errors" "fmt" + "math/big" "strings" "testing" "time" @@ -208,6 +214,53 @@ func TestCheckOperatorReady(t *testing.T) { }) } +func testOperatorWebhookServingCertB64(t *testing.T, notAfter time.Time) string { + t.Helper() + key, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("generate key: %v", err) + } + template := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "webhook-test"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: notAfter, + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + } + der, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key) + if err != nil { + t.Fatalf("create certificate: %v", err) + } + return base64.StdEncoding.EncodeToString(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})) +} + +func TestCheckOperatorWebhookCertExpiry(t *testing.T) { + t.Run("ok when webhook secret is absent", func(t *testing.T) { + mock := &core.MockExecutor{ + CommandFunc: func(spec core.ExecSpec) *core.MockCommand { + return &core.MockCommand{OutputData: []byte("")} + }, + } + check := checkOperatorWebhookCertExpiry(core.NewTestKubectlClient(mock)) + if !check.OK || !strings.Contains(check.Detail, "not present") { + t.Fatalf("unexpected check: %+v", check) + } + }) + + t.Run("warns inside renewal window", func(t *testing.T) { + certB64 := testOperatorWebhookServingCertB64(t, time.Now().UTC().Add(10*24*time.Hour)) + mock := &core.MockExecutor{ + CommandFunc: func(spec core.ExecSpec) *core.MockCommand { + return &core.MockCommand{OutputData: []byte(certB64)} + }, + } + check := checkOperatorWebhookCertExpiry(core.NewTestKubectlClient(mock)) + if !check.OK || !strings.Contains(check.Detail, "warning:") { + t.Fatalf("expected renewal warning, got %+v", check) + } + }) +} + func TestCheckGatewayAnalyticsCredentials(t *testing.T) { t.Run("fails when gateway has ingest URL without api key", func(t *testing.T) { mock := &core.MockExecutor{ diff --git a/internal/cli/cluster/doctor/operator.go b/internal/cli/cluster/doctor/operator.go index 979cc036..f201eef7 100644 --- a/internal/cli/cluster/doctor/operator.go +++ b/internal/cli/cluster/doctor/operator.go @@ -3,7 +3,10 @@ package doctor import ( "bufio" "bytes" + "crypto/x509" + "encoding/base64" "encoding/json" + "encoding/pem" "fmt" "strings" "time" @@ -205,6 +208,88 @@ func checkOperatorReady(kubectl core.KubectlRunner) DoctorCheck { } } +const operatorWebhookCertRenewalWindow = 30 * 24 * time.Hour + +func checkOperatorWebhookCertExpiry(kubectl core.KubectlRunner) DoctorCheck { + const ( + checkName = "operator webhook TLS expiry" + secretName = "mcp-runtime-operator-webhook-server-cert" // #nosec G101 -- Kubernetes Secret object name, not credential material. + namespace = "mcp-runtime" + ) + cmd, err := kubectl.CommandArgs([]string{ + "get", "secret", secretName, + "-n", namespace, + "--ignore-not-found", "-o", "jsonpath={.data.tls\\.crt}", + }) + if err != nil { + return DoctorCheck{ + Name: checkName, + OK: false, + Detail: fmt.Sprintf("kubectl error: %v", err), + Remedy: "check cluster connectivity and kubeconfig", + } + } + out, err := cmd.Output() + certB64 := strings.TrimSpace(string(out)) + if err != nil || certB64 == "" { + return DoctorCheck{ + Name: checkName, + OK: true, + Detail: "webhook TLS secret not present (webhooks may be disabled)", + } + } + certPEM, err := base64.StdEncoding.DecodeString(certB64) + if err != nil { + return DoctorCheck{ + Name: checkName, + OK: false, + Detail: "failed to decode webhook serving certificate", + Remedy: "re-run `./bin/mcp-runtime setup` to regenerate operator webhook TLS", + } + } + block, _ := pem.Decode(certPEM) + if block == nil { + return DoctorCheck{ + Name: checkName, + OK: false, + Detail: "webhook serving certificate is not valid PEM", + Remedy: "re-run `./bin/mcp-runtime setup` to regenerate operator webhook TLS", + } + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return DoctorCheck{ + Name: checkName, + OK: false, + Detail: fmt.Sprintf("failed to parse webhook serving certificate: %v", err), + Remedy: "re-run `./bin/mcp-runtime setup` to regenerate operator webhook TLS", + } + } + + now := time.Now().UTC() + if now.After(cert.NotAfter) { + return DoctorCheck{ + Name: checkName, + OK: false, + Detail: fmt.Sprintf("serving certificate expired at %s", cert.NotAfter.UTC().Format(time.RFC3339)), + Remedy: "re-run `./bin/mcp-runtime setup` to renew operator webhook TLS", + } + } + if now.Add(operatorWebhookCertRenewalWindow).After(cert.NotAfter) { + return DoctorCheck{ + Name: checkName, + OK: true, + Detail: fmt.Sprintf("warning: serving certificate expires %s (within 30-day renewal window); re-run setup to renew", cert.NotAfter.UTC().Format(time.RFC3339)), + Remedy: "re-run `./bin/mcp-runtime setup` to renew the serving certificate", + } + } + return DoctorCheck{ + Name: checkName, + OK: true, + Detail: fmt.Sprintf("serving certificate valid until %s", cert.NotAfter.UTC().Format(time.RFC3339)), + } +} + func checkOperatorRecentReconcileErrors(kubectl core.KubectlRunner) DoctorCheck { cmd, err := kubectl.CommandArgs([]string{"logs", "-n", "mcp-runtime", "deploy/mcp-runtime-operator-controller-manager", "--since=10m"}) if err != nil { diff --git a/internal/cli/setup/platform/deploy.go b/internal/cli/setup/platform/deploy.go index 45c96004..76f007e6 100644 --- a/internal/cli/setup/platform/deploy.go +++ b/internal/cli/setup/platform/deploy.go @@ -444,8 +444,19 @@ func deployOperatorManifestsWithClientGo(logger *zap.Logger, operatorImage, gate } core.Info("Reapplied operator ClusterRole mcp-runtime-operator-role from config/rbac/role.yaml; run `mcp-runtime cluster doctor` if MCPServer creates ever appear unreconciled") + core.Info("Preparing operator admission webhook TLS") + webhookCA, err := ensureOperatorWebhookTLSSecretClientGo() + if err != nil { + wrappedErr := core.WrapWithSentinel(core.ErrApplySecretManifestFailed, err, fmt.Sprintf("failed to prepare operator webhook TLS secret: %v", err)) + core.Error("Failed to prepare operator webhook TLS") + if logger != nil { + core.LogStructuredError(logger, wrappedErr, "Failed to prepare operator webhook TLS") + } + return wrappedErr + } + core.Info("Applying operator deployment") - managerYAML, err := renderOperatorManagerManifest(operatorImage, gatewayProxyImage, operatorArgs, existingOperatorEnvValueClientGo) + managerYAML, err := renderOperatorManagerManifest(operatorImage, gatewayProxyImage, operatorArgs, existingOperatorEnvValueClientGo, webhookCA) if err != nil { if logger != nil { core.LogStructuredError(logger, err, "Failed to render manager manifest") @@ -478,11 +489,20 @@ func deployOperatorManifestsWithClientGo(logger *zap.Logger, operatorImage, gate return wrappedErr } + if err := applyOperatorWebhookManifestsClientGo(webhookCA); err != nil { + wrappedErr := core.WrapWithSentinel(core.ErrApplyManifestFailed, err, fmt.Sprintf("failed to apply operator webhook manifests: %v", err)) + core.Error("Failed to apply operator webhook manifests") + if logger != nil { + core.LogStructuredError(logger, wrappedErr, "Failed to apply operator webhook manifests") + } + return wrappedErr + } + core.Success("Operator manifests deployed successfully") return nil } -func renderOperatorManagerManifest(operatorImage, gatewayProxyImage string, operatorArgs []string, existingEnvValue func(string) string) ([]byte, error) { +func renderOperatorManagerManifest(operatorImage, gatewayProxyImage string, operatorArgs []string, existingEnvValue func(string) string, webhookCA []byte) ([]byte, error) { managerYAML, err := os.ReadFile("config/manager/manager.yaml") if err != nil { wrappedErr := core.WrapWithSentinel(core.ErrReadManagerYAMLFailed, err, fmt.Sprintf("failed to read manager.yaml: %v", err)) @@ -536,6 +556,12 @@ func renderOperatorManagerManifest(operatorImage, gatewayProxyImage string, oper } } + if err := configureOperatorWebhookDeployment(mutator, webhookCA); err != nil { + wrappedErr := core.WrapWithSentinel(core.ErrMutateManagerYAMLFailed, err, fmt.Sprintf("failed to configure operator webhooks: %v", err)) + core.Error("Failed to configure operator webhooks") + return nil, wrappedErr + } + mutatedYAML, err := mutator.ToYAML() if err != nil { wrappedErr := core.WrapWithSentinel(core.ErrRenderManagerYAMLFailed, err, fmt.Sprintf("failed to render mutated manifest: %v", err)) @@ -591,6 +617,17 @@ func deployOperatorManifestsWithKubectl(kubectl core.KubectlRunner, logger *zap. } core.Info("Reapplied operator ClusterRole mcp-runtime-operator-role from config/rbac/role.yaml; run `mcp-runtime cluster doctor` if MCPServer creates ever appear unreconciled") + core.Info("Preparing operator admission webhook TLS") + webhookCA, err := ensureOperatorWebhookTLSSecret(kubectl) + if err != nil { + wrappedErr := core.WrapWithSentinel(core.ErrApplySecretManifestFailed, err, fmt.Sprintf("failed to prepare operator webhook TLS secret: %v", err)) + core.Error("Failed to prepare operator webhook TLS") + if logger != nil { + core.LogStructuredError(logger, wrappedErr, "Failed to prepare operator webhook TLS") + } + return wrappedErr + } + // Step 3: Apply manager deployment with structured image replacement core.Info("Applying operator deployment") @@ -668,6 +705,15 @@ func deployOperatorManifestsWithKubectl(kubectl core.KubectlRunner, logger *zap. } } + if err := configureOperatorWebhookDeployment(mutator, webhookCA); err != nil { + wrappedErr := core.WrapWithSentinel(core.ErrMutateManagerYAMLFailed, err, fmt.Sprintf("failed to configure operator webhooks: %v", err)) + core.Error("Failed to configure operator webhooks") + if logger != nil { + core.LogStructuredError(logger, wrappedErr, "Failed to configure operator webhooks") + } + return wrappedErr + } + // Render the mutated manifest mutatedYAML, err := mutator.ToYAML() if err != nil { @@ -705,6 +751,15 @@ func deployOperatorManifestsWithKubectl(kubectl core.KubectlRunner, logger *zap. return wrappedErr } + if err := applyOperatorWebhookManifests(kubectl, webhookCA); err != nil { + wrappedErr := core.WrapWithSentinel(core.ErrApplyManifestFailed, err, fmt.Sprintf("failed to apply operator webhook manifests: %v", err)) + core.Error("Failed to apply operator webhook manifests") + if logger != nil { + core.LogStructuredError(logger, wrappedErr, "Failed to apply operator webhook manifests") + } + return wrappedErr + } + core.Success("Operator manifests deployed successfully") return nil } diff --git a/internal/cli/setup/platform/helpers_test.go b/internal/cli/setup/platform/helpers_test.go index 2e283120..2e941849 100644 --- a/internal/cli/setup/platform/helpers_test.go +++ b/internal/cli/setup/platform/helpers_test.go @@ -1,7 +1,9 @@ package platform import ( + "crypto/x509" "encoding/base64" + "encoding/pem" "errors" "fmt" "io" @@ -16,6 +18,9 @@ import ( "go.uber.org/zap" "gopkg.in/yaml.v3" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "mcp-runtime/internal/cli/cluster" "mcp-runtime/internal/cli/core" @@ -178,6 +183,253 @@ func TestBuildOperatorImagePassesDockerPlatform(t *testing.T) { } } +func TestGenerateOperatorWebhookCertificate(t *testing.T) { + caPEM, caKeyPEM, certPEM, keyPEM, err := generateOperatorWebhookCertificate(time.Date(2026, 5, 15, 0, 0, 0, 0, time.UTC)) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + + caBlock, _ := pem.Decode(caPEM) + if caBlock == nil { + t.Fatal("expected CA certificate PEM block") + } + caCert, err := x509.ParseCertificate(caBlock.Bytes) + if err != nil { + t.Fatalf("parse CA certificate: %v", err) + } + if !caCert.IsCA { + t.Fatal("expected CA certificate to be a CA") + } + caKeyBlock, _ := pem.Decode(caKeyPEM) + if caKeyBlock == nil { + t.Fatal("expected CA private key PEM block") + } + + certBlock, _ := pem.Decode(certPEM) + if certBlock == nil { + t.Fatal("expected serving certificate PEM block") + } + servingCert, err := x509.ParseCertificate(certBlock.Bytes) + if err != nil { + t.Fatalf("parse serving certificate: %v", err) + } + if servingCert.IsCA { + t.Fatal("serving certificate should not be a CA") + } + keyBlock, _ := pem.Decode(keyPEM) + if keyBlock == nil { + t.Fatal("expected serving private key PEM block") + } + + roots := x509.NewCertPool() + roots.AddCert(caCert) + serviceDNS := operatorWebhookServiceName + "." + core.NamespaceMCPRuntime + ".svc" + if _, err := servingCert.Verify(x509.VerifyOptions{DNSName: serviceDNS, Roots: roots}); err != nil { + t.Fatalf("serving certificate did not verify for %s: %v", serviceDNS, err) + } + if !caCert.NotAfter.After(servingCert.NotAfter) { + t.Fatalf("CA expiry %s must outlive serving certificate expiry %s", caCert.NotAfter, servingCert.NotAfter) + } +} + +func TestReusableOperatorWebhookServingCert(t *testing.T) { + issued := time.Date(2026, 5, 15, 0, 0, 0, 0, time.UTC) + caPEM, _, certPEM, keyPEM, err := generateOperatorWebhookCertificate(issued) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + + if !reusableOperatorWebhookServingCert(caPEM, certPEM, keyPEM, issued) { + t.Fatal("expected freshly generated certificate to be reusable") + } + insideRenewalWindow := issued.AddDate(1, 0, 0).Add(-29 * 24 * time.Hour) + if reusableOperatorWebhookServingCert(caPEM, certPEM, keyPEM, insideRenewalWindow) { + t.Fatal("expected certificate inside the renewal window to be rotated") + } + if reusableOperatorWebhookServingCert(nil, certPEM, keyPEM, issued) { + t.Fatal("expected secret without ca.crt to be rotated") + } + + _, _, _, otherKeyPEM, err := generateOperatorWebhookCertificate(issued) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + if reusableOperatorWebhookServingCert(caPEM, certPEM, otherKeyPEM, issued) { + t.Fatal("expected mismatched private key to be rotated") + } +} + +func operatorWebhookSecretJSON(caPEM, caKeyPEM, certPEM, keyPEM []byte) []byte { + return []byte(fmt.Sprintf(`{"data":{"ca.crt":%q,"ca.key":%q,"tls.crt":%q,"tls.key":%q}}`, + base64.StdEncoding.EncodeToString(caPEM), + base64.StdEncoding.EncodeToString(caKeyPEM), + base64.StdEncoding.EncodeToString(certPEM), + base64.StdEncoding.EncodeToString(keyPEM))) +} + +func TestEnsureOperatorWebhookTLSSecretReusesValidSecret(t *testing.T) { + caPEM, caKeyPEM, certPEM, keyPEM, err := generateOperatorWebhookCertificate(time.Now().UTC()) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + mock := &core.MockExecutor{ + CommandFunc: func(spec core.ExecSpec) *core.MockCommand { + if commandHasArgs(spec, "get", "secret", operatorWebhookSecretName) { + return &core.MockCommand{Args: spec.Args, OutputData: operatorWebhookSecretJSON(caPEM, caKeyPEM, certPEM, keyPEM)} + } + if commandHasArgs(spec, "apply") { + t.Errorf("expected no apply while reusing webhook secret, got %#v", spec.Args) + } + return &core.MockCommand{Args: spec.Args} + }, + } + kubectl := core.NewTestKubectlClient(mock) + + got, err := ensureOperatorWebhookTLSSecret(kubectl) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(got) != string(caPEM) { + t.Fatal("expected existing CA bundle to be returned without rotation") + } +} + +func TestEnsureOperatorWebhookTLSSecretRenewsExpiringServingCert(t *testing.T) { + // Issued ~11.5 months ago, the serving certificate expires inside the + // 30-day renewal window and must be re-signed with the stored CA. + caPEM, caKeyPEM, certPEM, keyPEM, err := generateOperatorWebhookCertificate(time.Now().UTC().AddDate(0, -11, -15)) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + applied := 0 + mock := &core.MockExecutor{ + CommandFunc: func(spec core.ExecSpec) *core.MockCommand { + cmd := &core.MockCommand{Args: spec.Args} + if commandHasArgs(spec, "get", "secret", operatorWebhookSecretName) { + cmd.OutputData = operatorWebhookSecretJSON(caPEM, caKeyPEM, certPEM, keyPEM) + } + if commandHasArgs(spec, "apply") { + applied++ + cmd.RunFunc = func() error { + _, err := io.ReadAll(cmd.StdinR) + return err + } + } + return cmd + }, + } + kubectl := core.NewTestKubectlClient(mock) + + got, err := ensureOperatorWebhookTLSSecret(kubectl) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if applied == 0 { + t.Fatal("expected expiring webhook secret to be re-applied") + } + if string(got) != string(caPEM) { + t.Fatal("expected serving-certificate renewal to keep the existing CA bundle") + } +} + +func TestEnsureOperatorWebhookTLSSecretRotatesLegacySecretWithoutCAKey(t *testing.T) { + caPEM, _, certPEM, keyPEM, err := generateOperatorWebhookCertificate(time.Now().UTC().AddDate(0, -11, -15)) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + applied := 0 + mock := &core.MockExecutor{ + CommandFunc: func(spec core.ExecSpec) *core.MockCommand { + cmd := &core.MockCommand{Args: spec.Args} + if commandHasArgs(spec, "get", "secret", operatorWebhookSecretName) { + cmd.OutputData = []byte(fmt.Sprintf(`{"data":{"ca.crt":%q,"tls.crt":%q,"tls.key":%q}}`, + base64.StdEncoding.EncodeToString(caPEM), + base64.StdEncoding.EncodeToString(certPEM), + base64.StdEncoding.EncodeToString(keyPEM))) + } + if commandHasArgs(spec, "apply") { + applied++ + cmd.RunFunc = func() error { + _, err := io.ReadAll(cmd.StdinR) + return err + } + } + return cmd + }, + } + kubectl := core.NewTestKubectlClient(mock) + + got, err := ensureOperatorWebhookTLSSecret(kubectl) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if applied == 0 { + t.Fatal("expected legacy webhook secret without ca.key to be re-applied") + } + if string(got) == string(caPEM) { + t.Fatal("expected legacy secret without ca.key to rotate the CA bundle") + } +} + +func TestEnsureOperatorWebhookTLSSecretClientGoReusesValidSecret(t *testing.T) { + caPEM, caKeyPEM, certPEM, keyPEM, err := generateOperatorWebhookCertificate(time.Now().UTC()) + if err != nil { + t.Fatalf("generateOperatorWebhookCertificate failed: %v", err) + } + resetPlatformKubeconfig(t) + swapKubernetesClientsForTest(t, newPlatformKubernetesTestClients([]runtime.Object{&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: operatorWebhookSecretName, Namespace: core.NamespaceMCPRuntime}, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + "ca.crt": caPEM, + "ca.key": caKeyPEM, + "tls.crt": certPEM, + "tls.key": keyPEM, + }, + }}, nil)) + + got, err := ensureOperatorWebhookTLSSecretClientGo() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(got) != string(caPEM) { + t.Fatal("expected existing CA bundle to be returned without rotation") + } +} + +func TestInjectOperatorWebhookCABundleQualifiesConfigurationNames(t *testing.T) { + rendered, err := injectOperatorWebhookCABundle([]byte(` +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: mutating-webhook-configuration +webhooks: +- name: example + clientConfig: {} +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: validating-webhook-configuration +webhooks: +- name: example + clientConfig: {} +`), []byte("test-ca")) + if err != nil { + t.Fatalf("injectOperatorWebhookCABundle failed: %v", err) + } + body := string(rendered) + for _, expected := range []string{ + "name: mcp-runtime-mutating-webhook-configuration", + "name: mcp-runtime-validating-webhook-configuration", + "caBundle:", + } { + if !strings.Contains(body, expected) { + t.Fatalf("rendered webhook manifest missing %q:\n%s", expected, body) + } + } +} + func TestGetOperatorImage(t *testing.T) { origOverride := core.DefaultCLIConfig.OperatorImage origTagResolver := setupImageTagResolver @@ -2532,6 +2784,9 @@ func TestDeployOperatorManifestsWithKubectl(t *testing.T) { }) var managerManifest string + var webhookTLSSecretManifest string + var webhookServiceManifest string + var webhookConfigManifest string mock := &core.MockExecutor{ CommandFunc: func(spec core.ExecSpec) *core.MockCommand { cmd := &core.MockCommand{Args: spec.Args} @@ -2544,6 +2799,23 @@ func TestDeployOperatorManifestsWithKubectl(t *testing.T) { managerManifest = string(data) return nil } + } else if commandHasArgs(spec, "apply", "-f", "-") { + cmd.RunFunc = func() error { + data, err := io.ReadAll(cmd.StdinR) + if err != nil { + return err + } + manifest := string(data) + switch { + case strings.Contains(manifest, "name: "+operatorWebhookSecretName): + webhookTLSSecretManifest = manifest + case strings.Contains(manifest, "kind: Service") && strings.Contains(manifest, operatorWebhookServiceName): + webhookServiceManifest = manifest + case strings.Contains(manifest, "MutatingWebhookConfiguration") || strings.Contains(manifest, "ValidatingWebhookConfiguration"): + webhookConfigManifest = manifest + } + return nil + } } return cmd }, @@ -2584,6 +2856,28 @@ func TestDeployOperatorManifestsWithKubectl(t *testing.T) { if !strings.Contains(managerManifest, "name: MCP_SENTINEL_INGEST_URL") || !strings.Contains(managerManifest, "value: "+defaultAnalyticsIngestURL) { t.Fatalf("expected manager manifest to include analytics ingest env, got:\n%s", managerManifest) } + if !strings.Contains(managerManifest, "name: MCP_ENABLE_WEBHOOKS") || !strings.Contains(managerManifest, "value: \"true\"") { + t.Fatalf("expected manager manifest to enable webhooks, got:\n%s", managerManifest) + } + if !strings.Contains(managerManifest, "secretName: "+operatorWebhookSecretName) || + !strings.Contains(managerManifest, "mountPath: "+operatorWebhookCertDir) { + t.Fatalf("expected manager manifest to mount webhook cert secret, got:\n%s", managerManifest) + } + if !strings.Contains(managerManifest, operatorWebhookCertHashAnnotation+":") { + t.Fatalf("expected manager manifest to include webhook cert hash annotation, got:\n%s", managerManifest) + } + if !strings.Contains(webhookTLSSecretManifest, "name: "+operatorWebhookSecretName) || + !strings.Contains(webhookTLSSecretManifest, "tls.crt:") || + !strings.Contains(webhookTLSSecretManifest, "tls.key:") { + t.Fatalf("expected webhook TLS secret manifest, got:\n%s", webhookTLSSecretManifest) + } + if !strings.Contains(webhookServiceManifest, "name: "+operatorWebhookServiceName) { + t.Fatalf("expected webhook service manifest, got:\n%s", webhookServiceManifest) + } + if !strings.Contains(webhookConfigManifest, "caBundle:") || + !strings.Contains(webhookConfigManifest, "name: "+operatorWebhookServiceName) { + t.Fatalf("expected webhook configuration with caBundle, got:\n%s", webhookConfigManifest) + } var ( hasCRD bool diff --git a/internal/cli/setup/platform/plan_flow_test.go b/internal/cli/setup/platform/plan_flow_test.go index 939e5868..20a68173 100644 --- a/internal/cli/setup/platform/plan_flow_test.go +++ b/internal/cli/setup/platform/plan_flow_test.go @@ -773,8 +773,9 @@ func TestSetupPlatformWithDeps_ExternalRegistry(t *testing.T) { Manifest: "config/ingress/overlays/http", Force: false, }, - RegistryManifest: "config/registry", - TLSEnabled: true, + RegistryManifest: "config/registry", + TLSEnabled: true, + InstallCertManager: true, } if err := setupPlatformWithDeps(zap.NewNop(), plan, deps); err != nil { @@ -855,9 +856,10 @@ func TestSetupPlatformWithDeps_InternalRegistryTLS(t *testing.T) { Manifest: "config/ingress/overlays/prod", Force: false, }, - RegistryManifest: "config/registry/overlays/tls", - TLSEnabled: true, - TestMode: true, + RegistryManifest: "config/registry/overlays/tls", + TLSEnabled: true, + TestMode: true, + InstallCertManager: true, } if err := setupPlatformWithDeps(zap.NewNop(), plan, deps); err != nil { @@ -951,8 +953,9 @@ func TestSetupPlatformWithDeps_ExternalRegistryTLS(t *testing.T) { Manifest: "config/ingress/overlays/prod", Force: false, }, - RegistryManifest: "config/registry/overlays/tls", - TLSEnabled: true, + RegistryManifest: "config/registry/overlays/tls", + TLSEnabled: true, + InstallCertManager: true, } if err := setupPlatformWithDeps(zap.NewNop(), plan, deps); err != nil { @@ -1033,9 +1036,10 @@ func TestSetupPlatformWithDeps_DiagnosticsOnRegistryWaitFailure(t *testing.T) { Manifest: "config/ingress/overlays/http", Force: false, }, - RegistryManifest: "config/registry", - TLSEnabled: true, - TestMode: true, + RegistryManifest: "config/registry", + TLSEnabled: true, + TestMode: true, + InstallCertManager: true, } if err := setupPlatformWithDeps(zap.NewNop(), plan, deps); err == nil { @@ -1099,8 +1103,9 @@ func TestSetupPlatformWithDeps_DiagnosticsOnOperatorWaitFailure(t *testing.T) { Manifest: "config/ingress/overlays/http", Force: false, }, - RegistryManifest: "config/registry/overlays/tls", - TLSEnabled: true, + RegistryManifest: "config/registry/overlays/tls", + TLSEnabled: true, + InstallCertManager: true, } if err := setupPlatformWithDeps(zap.NewNop(), plan, deps); err == nil { @@ -1166,8 +1171,9 @@ func TestSetupPlatformWithDeps_CRDCheckFailure(t *testing.T) { Manifest: "config/ingress/overlays/http", Force: false, }, - RegistryManifest: "config/registry/overlays/tls", - TLSEnabled: true, + RegistryManifest: "config/registry/overlays/tls", + TLSEnabled: true, + InstallCertManager: true, } if err := setupPlatformWithDeps(zap.NewNop(), plan, deps); err == nil { diff --git a/internal/cli/setup/platform/webhook.go b/internal/cli/setup/platform/webhook.go new file mode 100644 index 00000000..7bb2cfaf --- /dev/null +++ b/internal/cli/setup/platform/webhook.go @@ -0,0 +1,544 @@ +package platform + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "crypto/x509/pkix" + "encoding/base64" + "encoding/hex" + "encoding/json" + "encoding/pem" + "errors" + "fmt" + "io" + "math/big" + "os" + "time" + + "gopkg.in/yaml.v3" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "mcp-runtime/internal/cli/core" + "mcp-runtime/internal/cli/kube" + "mcp-runtime/internal/cli/setup/assetpath" + "mcp-runtime/pkg/manifest" +) + +const ( + operatorWebhookServiceName = "mcp-runtime-operator-webhook-service" + operatorWebhookSecretName = "mcp-runtime-operator-webhook-server-cert" // #nosec G101 -- Kubernetes Secret name. + operatorWebhookVolumeName = "webhook-server-cert" + operatorWebhookCertDir = "/tmp/k8s-webhook-server/serving-certs" + operatorWebhookCertHashAnnotation = "mcp-runtime.io/webhook-cert-sha256" + // operatorWebhookCertRenewalWindow is how long before expiry a setup + // re-run rotates the webhook serving certificate instead of reusing it. + // When ca.key is stored in the webhook TLS Secret, renewal re-signs the + // serving certificate with the existing CA so caBundle and validating + // webhooks stay stable; the CA itself is only rotated when it nears expiry + // or the Secret is missing ca.key / is malformed. + operatorWebhookCertRenewalWindow = 30 * 24 * time.Hour +) + +func generateOperatorWebhookCA(now time.Time) ([]byte, []byte, *x509.Certificate, *rsa.PrivateKey, error) { + caPrivateKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("generate webhook CA private key: %w", err) + } + + serialLimit := new(big.Int).Lsh(big.NewInt(1), 128) + caSerialNumber, err := rand.Int(rand.Reader, serialLimit) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("generate webhook CA certificate serial: %w", err) + } + + caTemplate := x509.Certificate{ + SerialNumber: caSerialNumber, + Subject: pkix.Name{ + CommonName: operatorWebhookServiceName + "-ca", + }, + NotBefore: now.Add(-time.Hour), + NotAfter: now.AddDate(10, 0, 0), + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, + BasicConstraintsValid: true, + IsCA: true, + } + + caCertDER, err := x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, &caPrivateKey.PublicKey, caPrivateKey) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("create webhook CA certificate: %w", err) + } + caCert, err := x509.ParseCertificate(caCertDER) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("parse webhook CA certificate: %w", err) + } + + caCertPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: caCertDER}) + caKeyPEM := pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(caPrivateKey)}) + return caCertPEM, caKeyPEM, caCert, caPrivateKey, nil +} + +func signOperatorWebhookServingCertificate(now time.Time, caCert *x509.Certificate, caPrivateKey *rsa.PrivateKey) ([]byte, []byte, error) { + privateKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return nil, nil, fmt.Errorf("generate webhook private key: %w", err) + } + + serialLimit := new(big.Int).Lsh(big.NewInt(1), 128) + serialNumber, err := rand.Int(rand.Reader, serialLimit) + if err != nil { + return nil, nil, fmt.Errorf("generate webhook certificate serial: %w", err) + } + + serviceDNS := operatorWebhookServiceName + "." + core.NamespaceMCPRuntime + ".svc" + certTemplate := x509.Certificate{ + SerialNumber: serialNumber, + Subject: pkix.Name{ + CommonName: serviceDNS, + }, + NotBefore: now.Add(-time.Hour), + NotAfter: now.AddDate(1, 0, 0), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + BasicConstraintsValid: true, + DNSNames: []string{ + operatorWebhookServiceName, + operatorWebhookServiceName + "." + core.NamespaceMCPRuntime, + serviceDNS, + serviceDNS + ".cluster.local", + }, + } + + certDER, err := x509.CreateCertificate(rand.Reader, &certTemplate, caCert, &privateKey.PublicKey, caPrivateKey) + if err != nil { + return nil, nil, fmt.Errorf("create webhook certificate: %w", err) + } + + certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER}) + keyPEM := pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(privateKey)}) + return certPEM, keyPEM, nil +} + +func generateOperatorWebhookCertificate(now time.Time) ([]byte, []byte, []byte, []byte, error) { + caCertPEM, caKeyPEM, caCert, caPrivateKey, err := generateOperatorWebhookCA(now) + if err != nil { + return nil, nil, nil, nil, err + } + certPEM, keyPEM, err := signOperatorWebhookServingCertificate(now, caCert, caPrivateKey) + if err != nil { + return nil, nil, nil, nil, err + } + return caCertPEM, caKeyPEM, certPEM, keyPEM, nil +} + +func renewOperatorWebhookServingCertificate(now time.Time, caCertPEM, caKeyPEM []byte) ([]byte, []byte, []byte, error) { + caCert, caPrivateKey, err := parseOperatorWebhookCA(caCertPEM, caKeyPEM) + if err != nil { + return nil, nil, nil, err + } + certPEM, keyPEM, err := signOperatorWebhookServingCertificate(now, caCert, caPrivateKey) + if err != nil { + return nil, nil, nil, err + } + return caCertPEM, certPEM, keyPEM, nil +} + +func parseOperatorWebhookCA(caCertPEM, caKeyPEM []byte) (*x509.Certificate, *rsa.PrivateKey, error) { + caCert, err := parsePEMCertificate(caCertPEM) + if err != nil { + return nil, nil, fmt.Errorf("parse webhook CA certificate: %w", err) + } + if !caCert.IsCA { + return nil, nil, errors.New("webhook CA certificate is not a CA") + } + keyBlock, _ := pem.Decode(caKeyPEM) + if keyBlock == nil { + return nil, nil, errors.New("no PEM CA private key block") + } + caPrivateKey, err := x509.ParsePKCS1PrivateKey(keyBlock.Bytes) + if err != nil { + return nil, nil, fmt.Errorf("parse webhook CA private key: %w", err) + } + caPublicKey, ok := caCert.PublicKey.(*rsa.PublicKey) + if !ok || !caPrivateKey.PublicKey.Equal(caPublicKey) { + return nil, nil, errors.New("webhook CA private key does not match CA certificate") + } + return caCert, caPrivateKey, nil +} + +func operatorWebhookTLSSecretManifest(caCertPEM, caKeyPEM, certPEM, keyPEM []byte) string { + return fmt.Sprintf(`apiVersion: v1 +kind: Secret +metadata: + name: %s + namespace: %s +type: kubernetes.io/tls +data: + ca.crt: %s + ca.key: %s + tls.crt: %s + tls.key: %s +`, operatorWebhookSecretName, core.NamespaceMCPRuntime, base64.StdEncoding.EncodeToString(caCertPEM), base64.StdEncoding.EncodeToString(caKeyPEM), base64.StdEncoding.EncodeToString(certPEM), base64.StdEncoding.EncodeToString(keyPEM)) +} + +func resolveOperatorWebhookTLSSecret(now time.Time, caCertPEM, caKeyPEM, certPEM, keyPEM []byte) ([]byte, []byte, []byte, []byte, bool, error) { + if reusableOperatorWebhookServingCert(caCertPEM, certPEM, keyPEM, now) { + return caCertPEM, caKeyPEM, certPEM, keyPEM, false, nil + } + if operatorWebhookServingCertRenewalNeeded(certPEM, now) && + reusableOperatorWebhookCA(caCertPEM, caKeyPEM, now) && + reusableOperatorWebhookServingCertChain(caCertPEM, certPEM, keyPEM, now) { + renewedCA, renewedCert, renewedKey, err := renewOperatorWebhookServingCertificate(now, caCertPEM, caKeyPEM) + if err != nil { + return nil, nil, nil, nil, false, err + } + return renewedCA, caKeyPEM, renewedCert, renewedKey, true, nil + } + caCertPEM, caKeyPEM, certPEM, keyPEM, err := generateOperatorWebhookCertificate(now) + if err != nil { + return nil, nil, nil, nil, false, err + } + return caCertPEM, caKeyPEM, certPEM, keyPEM, true, nil +} + +// ensureOperatorWebhookTLSSecretClientGo reuses the existing webhook Secret +// when its certificate chain is still valid, so setup re-runs do not rotate +// the CA. Serving-certificate renewal re-signs with the stored ca.key so +// caBundle stays stable. Rotation rolls the operator Deployment and, until +// the rollout completes, fail-closed validating webhooks reject mcpruntime.org +// writes — reuse and CA-stable renewal keep re-runs free of that window. +func ensureOperatorWebhookTLSSecretClientGo() ([]byte, error) { + now := time.Now().UTC() + caCertPEM, caKeyPEM, certPEM, keyPEM := readOperatorWebhookTLSSecretClientGo() + caCertPEM, caKeyPEM, certPEM, keyPEM, changed, err := resolveOperatorWebhookTLSSecret(now, caCertPEM, caKeyPEM, certPEM, keyPEM) + if err != nil { + return nil, err + } + if !changed { + return caCertPEM, nil + } + if err := applyManifestYAML(operatorWebhookTLSSecretManifest(caCertPEM, caKeyPEM, certPEM, keyPEM), "", os.Stdout); err != nil { + return nil, err + } + return caCertPEM, nil +} + +// ensureOperatorWebhookTLSSecret mirrors ensureOperatorWebhookTLSSecretClientGo +// for the kubectl deploy path. +func ensureOperatorWebhookTLSSecret(kubectl core.KubectlRunner) ([]byte, error) { + now := time.Now().UTC() + caCertPEM, caKeyPEM, certPEM, keyPEM := readOperatorWebhookTLSSecret(kubectl) + caCertPEM, caKeyPEM, certPEM, keyPEM, changed, err := resolveOperatorWebhookTLSSecret(now, caCertPEM, caKeyPEM, certPEM, keyPEM) + if err != nil { + return nil, err + } + if !changed { + return caCertPEM, nil + } + if err := kube.ApplyManifestContent(kubectl.CommandArgs, operatorWebhookTLSSecretManifest(caCertPEM, caKeyPEM, certPEM, keyPEM)); err != nil { + return nil, err + } + return caCertPEM, nil +} + +func readOperatorWebhookTLSSecretClientGo() (caCertPEM, caKeyPEM, certPEM, keyPEM []byte) { + clients, err := platformKubernetesClients() + if err != nil { + return nil, nil, nil, nil + } + secret, err := clients.Clientset.CoreV1().Secrets(core.NamespaceMCPRuntime).Get(context.Background(), operatorWebhookSecretName, metav1.GetOptions{}) + if err != nil { + return nil, nil, nil, nil + } + return secret.Data["ca.crt"], secret.Data["ca.key"], secret.Data["tls.crt"], secret.Data["tls.key"] +} + +func readOperatorWebhookTLSSecret(kubectl core.KubectlRunner) (caCertPEM, caKeyPEM, certPEM, keyPEM []byte) { + cmd, err := kubectl.CommandArgs([]string{ + "get", "secret", operatorWebhookSecretName, + "-n", core.NamespaceMCPRuntime, + "--ignore-not-found", "-o", "json", + }) + if err != nil { + return nil, nil, nil, nil + } + out, err := cmd.Output() + if err != nil || len(bytes.TrimSpace(out)) == 0 { + return nil, nil, nil, nil + } + var secret struct { + Data map[string]string `json:"data"` + } + if err := json.Unmarshal(out, &secret); err != nil { + return nil, nil, nil, nil + } + decode := func(key string) []byte { + raw, err := base64.StdEncoding.DecodeString(secret.Data[key]) + if err != nil { + return nil + } + return raw + } + return decode("ca.crt"), decode("ca.key"), decode("tls.crt"), decode("tls.key") +} + +func operatorWebhookServingCertRenewalNeeded(certPEM []byte, now time.Time) bool { + cert, err := parsePEMCertificate(certPEM) + if err != nil { + return true + } + return now.Add(operatorWebhookCertRenewalWindow).After(cert.NotAfter) +} + +func reusableOperatorWebhookCA(caCertPEM, caKeyPEM []byte, now time.Time) bool { + if len(caCertPEM) == 0 || len(caKeyPEM) == 0 { + return false + } + caCert, err := parsePEMCertificate(caCertPEM) + if err != nil { + return false + } + if now.Add(operatorWebhookCertRenewalWindow).After(caCert.NotAfter) { + return false + } + _, _, err = parseOperatorWebhookCA(caCertPEM, caKeyPEM) + return err == nil +} + +func reusableOperatorWebhookServingCertChain(caCertPEM, certPEM, keyPEM []byte, now time.Time) bool { + caCert, err := parsePEMCertificate(caCertPEM) + if err != nil { + return false + } + cert, err := parsePEMCertificate(certPEM) + if err != nil { + return false + } + keyBlock, _ := pem.Decode(keyPEM) + if keyBlock == nil { + return false + } + key, err := x509.ParsePKCS1PrivateKey(keyBlock.Bytes) + if err != nil { + return false + } + certKey, ok := cert.PublicKey.(*rsa.PublicKey) + if !ok || !key.PublicKey.Equal(certKey) { + return false + } + roots := x509.NewCertPool() + roots.AddCert(caCert) + _, err = cert.Verify(x509.VerifyOptions{ + DNSName: operatorWebhookServiceName + "." + core.NamespaceMCPRuntime + ".svc", + Roots: roots, + CurrentTime: now, + }) + return err == nil +} + +// reusableOperatorWebhookServingCert reports whether an existing webhook +// Secret can be kept as-is: the serving certificate must match the stored +// private key, chain to the stored CA for the webhook service DNS name, and +// stay valid beyond the renewal window. Secrets written before ca.crt or +// ca.key was stored fail the check and are regenerated. +func reusableOperatorWebhookServingCert(caCertPEM, certPEM, keyPEM []byte, now time.Time) bool { + if len(caCertPEM) == 0 { + return false + } + if operatorWebhookServingCertRenewalNeeded(certPEM, now) { + return false + } + return reusableOperatorWebhookServingCertChain(caCertPEM, certPEM, keyPEM, now) +} + +func parsePEMCertificate(pemBytes []byte) (*x509.Certificate, error) { + block, _ := pem.Decode(pemBytes) + if block == nil { + return nil, errors.New("no PEM certificate block") + } + return x509.ParseCertificate(block.Bytes) +} + +func configureOperatorWebhookDeployment(mutator *manifest.Mutator, caBundlePEM []byte) error { + if err := mutator.MergeDeploymentEnv(core.OperatorDeploymentName, core.OperatorManagerContainerName, map[string]string{ + "MCP_ENABLE_WEBHOOKS": "true", + }); err != nil { + return fmt.Errorf("enable operator webhooks: %w", err) + } + webhookCAHash := sha256.Sum256(caBundlePEM) + if err := mutator.MergeDeploymentTemplateAnnotations(core.OperatorDeploymentName, map[string]string{ + operatorWebhookCertHashAnnotation: hex.EncodeToString(webhookCAHash[:]), + }); err != nil { + return fmt.Errorf("annotate operator webhook certificate hash: %w", err) + } + if err := mutator.MergeDeploymentVolumes(core.OperatorDeploymentName, []map[string]any{{ + "name": operatorWebhookVolumeName, + "secret": map[string]any{ + "secretName": operatorWebhookSecretName, + }, + }}); err != nil { + return fmt.Errorf("add operator webhook certificate volume: %w", err) + } + if err := mutator.MergeDeploymentVolumeMounts(core.OperatorDeploymentName, core.OperatorManagerContainerName, []map[string]any{{ + "name": operatorWebhookVolumeName, + "mountPath": operatorWebhookCertDir, + "readOnly": true, + }}); err != nil { + return fmt.Errorf("add operator webhook certificate mount: %w", err) + } + return nil +} + +func waitForOperatorWebhookRolloutClientGo() error { + core.Info("Waiting for operator webhook rollout") + return waitForRolloutStatusWithClientGo("deployment", core.OperatorDeploymentName, core.NamespaceMCPRuntime, analyticsRolloutTimeoutDuration()) +} + +func waitForOperatorWebhookRollout(kubectl core.KubectlRunner) error { + core.Info("Waiting for operator webhook rollout") + return waitForRolloutStatusWithKubectl(kubectl, "deployment", core.OperatorDeploymentName, core.NamespaceMCPRuntime, analyticsRolloutTimeoutString()) +} + +func applyOperatorWebhookManifestsClientGo(caBundlePEM []byte) error { + if err := waitForOperatorWebhookRolloutClientGo(); err != nil { + return fmt.Errorf("operator deployment not ready before webhook registration: %w", err) + } + + servicePath, err := assetpath.ResolveRepoAssetPath("config/webhook/service.yaml") + if err != nil { + return err + } + if err := applyManifestFile(servicePath, "", os.Stdout); err != nil { + return err + } + + webhookYAML, err := readRepoAsset("config/webhook/manifests.yaml") + if err != nil { + return fmt.Errorf("read operator webhook manifests: %w", err) + } + rendered, err := injectOperatorWebhookCABundle(webhookYAML, caBundlePEM) + if err != nil { + return err + } + return applyManifestYAML(string(rendered), "", os.Stdout) +} + +func applyOperatorWebhookManifests(kubectl core.KubectlRunner, caBundlePEM []byte) error { + if err := waitForOperatorWebhookRollout(kubectl); err != nil { + return fmt.Errorf("operator deployment not ready before webhook registration: %w", err) + } + + serviceYAML, err := readRepoAsset("config/webhook/service.yaml") + if err != nil { + return fmt.Errorf("read operator webhook service manifest: %w", err) + } + if err := kube.ApplyManifestContent(kubectl.CommandArgs, string(serviceYAML)); err != nil { + return err + } + + webhookYAML, err := readRepoAsset("config/webhook/manifests.yaml") + if err != nil { + return fmt.Errorf("read operator webhook manifests: %w", err) + } + rendered, err := injectOperatorWebhookCABundle(webhookYAML, caBundlePEM) + if err != nil { + return err + } + return kube.ApplyManifestContent(kubectl.CommandArgs, string(rendered)) +} + +func readRepoAsset(path string) ([]byte, error) { + rootPath, err := assetpath.ResolveRepoRoot() + if err != nil { + return nil, err + } + root, err := os.OpenRoot(rootPath) + if err != nil { + return nil, fmt.Errorf("open repo root: %w", err) + } + defer root.Close() + return root.ReadFile(path) +} + +func injectOperatorWebhookCABundle(webhookYAML, caBundlePEM []byte) ([]byte, error) { + caBundle := base64.StdEncoding.EncodeToString(caBundlePEM) + decoder := yaml.NewDecoder(bytes.NewReader(webhookYAML)) + var docs []map[string]any + injected := 0 + + for { + var doc map[string]any + err := decoder.Decode(&doc) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, fmt.Errorf("decode webhook manifest: %w", err) + } + if len(doc) == 0 { + continue + } + qualifyWebhookConfigurationName(doc) + + webhooks, ok := doc["webhooks"].([]any) + if !ok { + docs = append(docs, doc) + continue + } + for _, item := range webhooks { + webhook, ok := item.(map[string]any) + if !ok { + continue + } + clientConfig, ok := webhook["clientConfig"].(map[string]any) + if !ok { + return nil, fmt.Errorf("webhook %q has no clientConfig", stringValue(webhook, "name")) + } + clientConfig["caBundle"] = caBundle + injected++ + } + docs = append(docs, doc) + } + if injected == 0 { + return nil, fmt.Errorf("no webhook clientConfig blocks found") + } + + var out bytes.Buffer + encoder := yaml.NewEncoder(&out) + for i, doc := range docs { + if err := encoder.Encode(doc); err != nil { + return nil, fmt.Errorf("encode webhook manifest %d: %w", i, err) + } + } + if err := encoder.Close(); err != nil { + return nil, fmt.Errorf("close webhook manifest encoder: %w", err) + } + return out.Bytes(), nil +} + +func qualifyWebhookConfigurationName(doc map[string]any) { + kind := stringValue(doc, "kind") + var name string + switch kind { + case "MutatingWebhookConfiguration": + name = "mcp-runtime-mutating-webhook-configuration" + case "ValidatingWebhookConfiguration": + name = "mcp-runtime-validating-webhook-configuration" + default: + return + } + metadata, ok := doc["metadata"].(map[string]any) + if !ok { + metadata = map[string]any{} + doc["metadata"] = metadata + } + metadata["name"] = name +} + +func stringValue(values map[string]any, key string) string { + value, _ := values[key].(string) + return value +} diff --git a/internal/operator/controller.go b/internal/operator/controller.go index 454249dd..01415536 100644 --- a/internal/operator/controller.go +++ b/internal/operator/controller.go @@ -3,7 +3,6 @@ package operator import ( "context" "fmt" - "reflect" "strings" "time" @@ -114,19 +113,11 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( logger.Info("Reconciling MCPServer", "name", mcpServer.Name, "namespace", mcpServer.Namespace) + mcpServer = r.defaultedMCPServerForReconcile(mcpServer) if err := r.validateMCPServerSpec(ctx, mcpServer, logger); err != nil { return ctrl.Result{}, err } - // Set defaults and update spec only if changed - requeue, err := r.applyDefaultsIfNeeded(ctx, mcpServer, logger) - if err != nil { - return ctrl.Result{}, err - } - if requeue { - return ctrl.Result{}, nil - } - if err := r.validateIngressConfig(ctx, mcpServer, logger); err != nil { return ctrl.Result{}, err } @@ -166,20 +157,6 @@ func (r *MCPServerReconciler) fetchMCPServer(ctx context.Context, req ctrl.Reque return &mcpServer, true, nil } -func (r *MCPServerReconciler) applyDefaultsIfNeeded(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer, logger logr.Logger) (bool, error) { - original := mcpServer.DeepCopy() - r.setDefaults(mcpServer) - if reflect.DeepEqual(original.Spec, mcpServer.Spec) { - return false, nil - } - if err := r.Update(ctx, mcpServer); err != nil { - logger.Error(err, "Failed to update MCPServer spec with defaults") - return false, err - } - // Requeue to work with the updated object and avoid stale data - return true, nil -} - func (r *MCPServerReconciler) validateMCPServerSpec(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer, logger logr.Logger) error { if _, err := mcpServer.ValidateCreate(); err != nil { r.updateStatus(ctx, mcpServer, "Error", err.Error(), resourceReadiness{}) @@ -347,22 +324,15 @@ func determinePhase(readiness resourceReadiness, mcpServer *mcpv1alpha1.MCPServe return "Pending", false } -func (r *MCPServerReconciler) setDefaults(mcpServer *mcpv1alpha1.MCPServer) { - ingressHostUnset := strings.TrimSpace(mcpServer.Spec.IngressHost) == "" - publicPathPrefixUnset := strings.TrimSpace(mcpServer.Spec.PublicPathPrefix) == "" - - mcpServer.Default() - - if ingressHostUnset && publicPathPrefixUnset { - mcpServer.Spec.IngressHost = strings.TrimSpace(r.DefaultIngressHost) - } - - if mcpServer.Spec.Analytics != nil && !mcpServer.Spec.Analytics.Disabled { - if strings.TrimSpace(mcpServer.Spec.Analytics.IngestURL) == "" { - mcpServer.Spec.Analytics.IngestURL = strings.TrimSpace(r.DefaultAnalyticsIngestURL) - } - } +func (r *MCPServerReconciler) defaultedMCPServerForReconcile(mcpServer *mcpv1alpha1.MCPServer) *mcpv1alpha1.MCPServer { + defaulted := mcpServer.DeepCopy() + defaulted.DefaultWithOptions(mcpv1alpha1.MCPServerDefaultOptions{ + DefaultIngressHost: r.DefaultIngressHost, + DefaultAnalyticsIngestURL: r.DefaultAnalyticsIngestURL, + }) + return defaulted } + func (r *MCPServerReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&mcpv1alpha1.MCPServer{}). diff --git a/internal/operator/controller_test.go b/internal/operator/controller_test.go index b713f3f9..48633f24 100644 --- a/internal/operator/controller_test.go +++ b/internal/operator/controller_test.go @@ -2,7 +2,6 @@ package operator import ( "context" - "strings" "testing" "time" @@ -198,99 +197,7 @@ func TestBuildGatewayContainerAppliesConfiguredResources(t *testing.T) { } } -func TestValidateMCPServerSpecRejectsInvalidRolloutValues(t *testing.T) { - scheme := runtime.NewScheme() - if err := mcpv1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("AddToScheme() error = %v", err) - } - - replicas := int32(2) - server := &mcpv1alpha1.MCPServer{ - ObjectMeta: metav1.ObjectMeta{ - Name: "gateway-server", - Namespace: "default", - }, - Spec: mcpv1alpha1.MCPServerSpec{ - Image: "example.com/server", - Replicas: &replicas, - Port: DefaultPort, - PublicPathPrefix: "gateway-server", - Gateway: &mcpv1alpha1.GatewayConfig{ - Enabled: true, - Port: defaultGatewayPort, - Image: "example.com/mcp-gateway:latest", - }, - Rollout: &mcpv1alpha1.RolloutConfig{ - MaxUnavailable: "invalid%", - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(server). - WithObjects(server.DeepCopy()). - Build() - reconciler := &MCPServerReconciler{ - Client: client, - Scheme: scheme, - } - - err := reconciler.validateMCPServerSpec(context.Background(), server, logr.Discard()) - if err == nil { - t.Fatal("expected rollout validation error") - } - if !strings.Contains(err.Error(), "rollout.maxUnavailable") { - t.Fatalf("expected rollout.maxUnavailable error, got %v", err) - } -} - -func TestValidateMCPServerSpecRequiresOAuthIssuer(t *testing.T) { - scheme := runtime.NewScheme() - if err := mcpv1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("AddToScheme() error = %v", err) - } - - server := &mcpv1alpha1.MCPServer{ - ObjectMeta: metav1.ObjectMeta{ - Name: "gateway-server", - Namespace: "default", - }, - Spec: mcpv1alpha1.MCPServerSpec{ - Image: "example.com/server", - Port: DefaultPort, - PublicPathPrefix: "gateway-server", - Gateway: &mcpv1alpha1.GatewayConfig{ - Enabled: true, - Port: defaultGatewayPort, - Image: "example.com/mcp-gateway:latest", - }, - Auth: &mcpv1alpha1.AuthConfig{ - Mode: mcpv1alpha1.AuthModeOAuth, - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(server). - WithObjects(server.DeepCopy()). - Build() - reconciler := &MCPServerReconciler{ - Client: client, - Scheme: scheme, - } - - err := reconciler.validateMCPServerSpec(context.Background(), server, logr.Discard()) - if err == nil { - t.Fatal("expected oauth issuer validation error") - } - if !strings.Contains(err.Error(), "auth.issuerURL") { - t.Fatalf("expected auth.issuerURL error, got %v", err) - } -} - -func TestSetDefaults(t *testing.T) { +func TestDefaultedMCPServerForReconcile(t *testing.T) { t.Run("fills all defaults when unset", func(t *testing.T) { mcpServer := mcpv1alpha1.MCPServer{ ObjectMeta: metav1.ObjectMeta{ @@ -299,7 +206,7 @@ func TestSetDefaults(t *testing.T) { }, } r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertReplicas(t, mcpServer.Spec.Replicas, 1) assertEqual(t, "port", mcpServer.Spec.Port, int32(8088)) @@ -318,7 +225,7 @@ func TestSetDefaults(t *testing.T) { Scheme: runtime.NewScheme(), DefaultIngressHost: "example.com", } - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertEqual(t, "publicPathPrefix", mcpServer.Spec.PublicPathPrefix, "test-server") assertEqual(t, "ingressHost", mcpServer.Spec.IngressHost, "example.com") }) @@ -334,7 +241,7 @@ func TestSetDefaults(t *testing.T) { Scheme: runtime.NewScheme(), DefaultIngressHost: "example.com", } - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertEqual(t, "publicPathPrefix", mcpServer.Spec.PublicPathPrefix, "custom-prefix") assertEqual(t, "ingressHost", mcpServer.Spec.IngressHost, "") }) @@ -352,7 +259,7 @@ func TestSetDefaults(t *testing.T) { }, } r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertReplicas(t, mcpServer.Spec.Replicas, 5) assertEqual(t, "port", mcpServer.Spec.Port, int32(9000)) @@ -370,7 +277,7 @@ func TestSetDefaults(t *testing.T) { }, } r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertEqual(t, "imageTag", mcpServer.Spec.ImageTag, "") }) @@ -383,7 +290,7 @@ func TestSetDefaults(t *testing.T) { }, } r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertEqual(t, "imageTag", mcpServer.Spec.ImageTag, "latest") }) @@ -396,7 +303,7 @@ func TestSetDefaults(t *testing.T) { }, } r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertEqual(t, "imageTag", mcpServer.Spec.ImageTag, "") }) @@ -404,7 +311,7 @@ func TestSetDefaults(t *testing.T) { t.Run("skips ingressPath if name is empty", func(t *testing.T) { mcpServer := mcpv1alpha1.MCPServer{} // No name set r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) assertEqual(t, "ingressPath", mcpServer.Spec.IngressPath, "") }) @@ -424,7 +331,7 @@ func TestSetDefaults(t *testing.T) { } r := MCPServerReconciler{Scheme: runtime.NewScheme()} - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) if mcpServer.Spec.Gateway == nil { t.Fatal("expected gateway defaults to be applied") @@ -454,7 +361,7 @@ func TestSetDefaults(t *testing.T) { Scheme: runtime.NewScheme(), DefaultAnalyticsIngestURL: "http://mcp-sentinel-ingest.mcp-sentinel.svc.cluster.local:8081/events", } - r.setDefaults(&mcpServer) + mcpServer = *r.defaultedMCPServerForReconcile(&mcpServer) if mcpServer.Spec.Analytics == nil { t.Fatal("expected analytics defaults to be applied") @@ -591,7 +498,7 @@ func TestReconcileDeploymentAddsGatewaySidecar(t *testing.T) { Scheme: scheme, GatewayOTLPEndpoint: "http://otel-collector.mcp-sentinel.svc.cluster.local:4318", } - reconciler.setDefaults(&mcpServer) + mcpServer = *reconciler.defaultedMCPServerForReconcile(&mcpServer) if err := reconciler.reconcileDeployment(context.Background(), &mcpServer); err != nil { t.Fatalf("reconcileDeployment() error = %v", err) @@ -909,48 +816,64 @@ func TestCheckResourceReadiness(t *testing.T) { }) } -func TestApplyDefaultsIfNeeded(t *testing.T) { +func TestDefaultedMCPServerForReconcileDoesNotPersistDefaults(t *testing.T) { scheme := runtime.NewScheme() _ = mcpv1alpha1.AddToScheme(scheme) - t.Run("returns requeue true when defaults are applied", func(t *testing.T) { - mcpServer := &mcpv1alpha1.MCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: "test-server", Namespace: "default"}, - Spec: mcpv1alpha1.MCPServerSpec{Image: "test-image"}, - } - client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(mcpServer).Build() - r := MCPServerReconciler{Client: client, Scheme: scheme} - requeue, err := r.applyDefaultsIfNeeded(context.Background(), mcpServer, logr.Discard()) - if err != nil { - t.Fatalf("failed to apply defaults: %v", err) - } - // Returns true to trigger re-reconciliation after defaults are applied - assertEqual(t, "requeue", requeue, true) - }) + mcpServer := &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test-server", Namespace: "default"}, + Spec: mcpv1alpha1.MCPServerSpec{Image: "test-image"}, + } + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(mcpServer).Build() + r := MCPServerReconciler{ + Client: client, + Scheme: scheme, + DefaultIngressHost: "example.com", + DefaultAnalyticsIngestURL: "http://mcp-sentinel-ingest.mcp-sentinel.svc.cluster.local:8081/events", + } - t.Run("returns requeue false when defaults already set", func(t *testing.T) { - replicas := int32(1) - mcpServer := &mcpv1alpha1.MCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: "test-server", Namespace: "default"}, - Spec: mcpv1alpha1.MCPServerSpec{ - Image: "test-image", - ImageTag: "latest", - Port: 8088, - ServicePort: 80, - Replicas: &replicas, - IngressPath: "/test-server/mcp", - IngressClass: "traefik", - PublicPathPrefix: "test-server", - }, - } - client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(mcpServer).Build() - r := MCPServerReconciler{Client: client, Scheme: scheme} - requeue, err := r.applyDefaultsIfNeeded(context.Background(), mcpServer, logr.Discard()) - if err != nil { - t.Fatalf("failed to apply defaults: %v", err) - } - assertEqual(t, "requeue", requeue, false) - }) + defaulted := r.defaultedMCPServerForReconcile(mcpServer) + assertEqual(t, "defaultedPort", defaulted.Spec.Port, int32(8088)) + assertReplicas(t, defaulted.Spec.Replicas, 1) + assertEqual(t, "defaultedIngressHost", defaulted.Spec.IngressHost, "example.com") + + var stored mcpv1alpha1.MCPServer + if err := client.Get(context.Background(), types.NamespacedName{Name: "test-server", Namespace: "default"}, &stored); err != nil { + t.Fatalf("failed to fetch stored MCPServer: %v", err) + } + assertEqual(t, "storedPort", stored.Spec.Port, int32(0)) + if stored.Spec.Replicas != nil { + t.Fatalf("expected stored replicas to stay nil, got %v", *stored.Spec.Replicas) + } + assertEqual(t, "storedIngressHost", stored.Spec.IngressHost, "") +} + +func TestValidateMCPServerSpecRunsAfterDefaulting(t *testing.T) { + scheme := runtime.NewScheme() + if err := mcpv1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("AddToScheme() error = %v", err) + } + server := &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "invalid-server", Namespace: "default"}, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "example.com/server", + Tools: []mcpv1alpha1.ToolConfig{{ + Name: "read_file", + }}, + }, + } + client := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(server). + WithObjects(server.DeepCopy()). + Build() + reconciler := &MCPServerReconciler{Client: client, Scheme: scheme} + defaulted := reconciler.defaultedMCPServerForReconcile(server) + + err := reconciler.validateMCPServerSpec(context.Background(), defaulted, logr.Discard()) + if err == nil { + t.Fatal("expected validation error for missing tool sideEffect") + } } func TestRequireSpecField(t *testing.T) { @@ -1767,7 +1690,7 @@ func TestReconcile(t *testing.T) { assertEqual(t, "result", result, ctrl.Result{RequeueAfter: 10 * time.Second}) }) - t.Run("returns after applying defaults", func(t *testing.T) { + t.Run("reconciles with local defaults", func(t *testing.T) { mcpServer := &mcpv1alpha1.MCPServer{ ObjectMeta: metav1.ObjectMeta{Name: "test-server", Namespace: "default"}, Spec: mcpv1alpha1.MCPServerSpec{ @@ -1786,7 +1709,16 @@ func TestReconcile(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - assertEqual(t, "result", result, ctrl.Result{}) + assertEqual(t, "result", result, ctrl.Result{RequeueAfter: 10 * time.Second}) + + var stored mcpv1alpha1.MCPServer + if err := client.Get(context.Background(), types.NamespacedName{Name: "test-server", Namespace: "default"}, &stored); err != nil { + t.Fatalf("failed to fetch stored MCPServer: %v", err) + } + assertEqual(t, "storedPort", stored.Spec.Port, int32(0)) + if stored.Spec.Replicas != nil { + t.Fatalf("expected stored replicas to stay nil, got %v", *stored.Spec.Replicas) + } }) } diff --git a/pkg/manifest/mutator.go b/pkg/manifest/mutator.go index 888a5e26..2a0d5681 100644 --- a/pkg/manifest/mutator.go +++ b/pkg/manifest/mutator.go @@ -62,9 +62,7 @@ func (m *Mutator) FindDeployment(name, namespace string) map[string]any { return nil } -// withContainer is a helper that finds a deployment and container, then invokes a callback -// to mutate the container. This eliminates duplicated scaffolding across SetDeployment* methods. -func (m *Mutator) withContainer(deploymentName, containerName string, fn func(map[string]any) error) error { +func (m *Mutator) withPodSpec(deploymentName string, fn func(map[string]any) error) error { deployment := m.FindDeployment(deploymentName, "") if deployment == nil { return fmt.Errorf("deployment %s not found", deploymentName) @@ -74,27 +72,110 @@ func (m *Mutator) withContainer(deploymentName, containerName string, fn func(ma if spec == nil { return fmt.Errorf("deployment %s has no pod spec", deploymentName) } + return fn(spec) +} - containers, ok := spec["containers"].([]any) - if !ok || len(containers) == 0 { - return fmt.Errorf("deployment %s has no containers", deploymentName) - } +// withContainer is a helper that finds a deployment and container, then invokes a callback +// to mutate the container. This eliminates duplicated scaffolding across SetDeployment* methods. +func (m *Mutator) withContainer(deploymentName, containerName string, fn func(map[string]any) error) error { + return m.withPodSpec(deploymentName, func(spec map[string]any) error { + containers, ok := spec["containers"].([]any) + if !ok || len(containers) == 0 { + return fmt.Errorf("deployment %s has no containers", deploymentName) + } - for _, c := range containers { - container, ok := c.(map[string]any) - if !ok { - continue + for _, c := range containers { + container, ok := c.(map[string]any) + if !ok { + continue + } + if containerName == "" || getString(container, "name") == containerName { + return fn(container) + } + } + + if containerName != "" { + return fmt.Errorf("container %s not found in deployment %s", containerName, deploymentName) + } + + return fmt.Errorf("no containers found in deployment %s", deploymentName) + }) +} + +// MergeDeploymentVolumes merges volumes by name into a deployment pod spec. +func (m *Mutator) MergeDeploymentVolumes(deploymentName string, volumes []map[string]any) error { + return m.withPodSpec(deploymentName, func(spec map[string]any) error { + orderedVolumes := make([]any, 0) + nameToIndex := make(map[string]int) + + if existing, ok := spec["volumes"].([]any); ok { + for _, volume := range existing { + volumeEntry, ok := volume.(map[string]any) + if !ok { + continue + } + name := getString(volumeEntry, "name") + if name == "" { + continue + } + nameToIndex[name] = len(orderedVolumes) + orderedVolumes = append(orderedVolumes, volumeEntry) + } } - if containerName == "" || getString(container, "name") == containerName { - return fn(container) + + for _, volume := range volumes { + name := getString(volume, "name") + if name == "" { + return fmt.Errorf("volume name is required") + } + if idx, exists := nameToIndex[name]; exists { + orderedVolumes[idx] = volume + continue + } + nameToIndex[name] = len(orderedVolumes) + orderedVolumes = append(orderedVolumes, volume) } + + spec["volumes"] = orderedVolumes + return nil + }) +} + +// MergeDeploymentTemplateAnnotations merges annotations into a deployment pod template. +func (m *Mutator) MergeDeploymentTemplateAnnotations(deploymentName string, annotations map[string]string) error { + deployment := m.FindDeployment(deploymentName, "") + if deployment == nil { + return fmt.Errorf("deployment %s not found", deploymentName) } - if containerName != "" { - return fmt.Errorf("container %s not found in deployment %s", containerName, deploymentName) + spec, ok := deployment["spec"].(map[string]any) + if !ok { + return fmt.Errorf("deployment %s has no spec", deploymentName) + } + template, ok := spec["template"].(map[string]any) + if !ok { + return fmt.Errorf("deployment %s has no pod template", deploymentName) + } + metadata, ok := template["metadata"].(map[string]any) + if !ok { + metadata = map[string]any{} + template["metadata"] = metadata + } + existing, ok := metadata["annotations"].(map[string]any) + if !ok { + existing = map[string]any{} + metadata["annotations"] = existing } - return fmt.Errorf("no containers found in deployment %s", deploymentName) + names := make([]string, 0, len(annotations)) + for name := range annotations { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + existing[name] = annotations[name] + } + return nil } // SetDeploymentImage sets the container image for a specific container in a deployment. @@ -280,6 +361,45 @@ func (m *Mutator) MergeDeploymentEnv(deploymentName, containerName string, envVa }) } +// MergeDeploymentVolumeMounts merges volume mounts by name into a deployment container. +func (m *Mutator) MergeDeploymentVolumeMounts(deploymentName, containerName string, volumeMounts []map[string]any) error { + return m.withContainer(deploymentName, containerName, func(container map[string]any) error { + orderedMounts := make([]any, 0) + nameToIndex := make(map[string]int) + + if existing, ok := container["volumeMounts"].([]any); ok { + for _, mount := range existing { + mountEntry, ok := mount.(map[string]any) + if !ok { + continue + } + name := getString(mountEntry, "name") + if name == "" { + continue + } + nameToIndex[name] = len(orderedMounts) + orderedMounts = append(orderedMounts, mountEntry) + } + } + + for _, mount := range volumeMounts { + name := getString(mount, "name") + if name == "" { + return fmt.Errorf("volume mount name is required") + } + if idx, exists := nameToIndex[name]; exists { + orderedMounts[idx] = mount + continue + } + nameToIndex[name] = len(orderedMounts) + orderedMounts = append(orderedMounts, mount) + } + + container["volumeMounts"] = orderedMounts + return nil + }) +} + // ToYAML renders the mutated manifests back to YAML. func (m *Mutator) ToYAML() ([]byte, error) { var buf bytes.Buffer diff --git a/pkg/manifest/mutator_test.go b/pkg/manifest/mutator_test.go index 5b90cfa9..26af40c8 100644 --- a/pkg/manifest/mutator_test.go +++ b/pkg/manifest/mutator_test.go @@ -228,6 +228,103 @@ spec: } } +func TestMergeDeploymentVolumesAndVolumeMounts(t *testing.T) { + yaml := ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-deployment +spec: + template: + spec: + volumes: + - name: tmp + emptyDir: {} + containers: + - name: test-container + image: nginx:latest + volumeMounts: + - name: tmp + mountPath: /tmp +` + m, err := NewMutator([]byte(yaml)) + if err != nil { + t.Fatalf("NewMutator failed: %v", err) + } + + if err := m.MergeDeploymentVolumes("test-deployment", []map[string]any{ + { + "name": "webhook-server-cert", + "secret": map[string]any{ + "secretName": "webhook-cert", + }, + }, + }); err != nil { + t.Fatalf("MergeDeploymentVolumes failed: %v", err) + } + if err := m.MergeDeploymentVolumeMounts("test-deployment", "test-container", []map[string]any{ + { + "name": "webhook-server-cert", + "mountPath": "/tmp/k8s-webhook-server/serving-certs", + "readOnly": true, + }, + }); err != nil { + t.Fatalf("MergeDeploymentVolumeMounts failed: %v", err) + } + + deployment := m.FindDeployment("test-deployment", "") + spec := getMap(deployment, "spec", "template", "spec") + volumes := spec["volumes"].([]any) + if len(volumes) != 2 { + t.Fatalf("volumes len = %d, want 2", len(volumes)) + } + containers := spec["containers"].([]any) + container := containers[0].(map[string]any) + mounts := container["volumeMounts"].([]any) + if len(mounts) != 2 { + t.Fatalf("volumeMounts len = %d, want 2", len(mounts)) + } +} + +func TestMergeDeploymentTemplateAnnotations(t *testing.T) { + yaml := ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-deployment +spec: + template: + metadata: + annotations: + existing: value + spec: + containers: + - name: test-container + image: nginx:latest +` + m, err := NewMutator([]byte(yaml)) + if err != nil { + t.Fatalf("NewMutator failed: %v", err) + } + + if err := m.MergeDeploymentTemplateAnnotations("test-deployment", map[string]string{ + "existing": "updated", + "new": "value", + }); err != nil { + t.Fatalf("MergeDeploymentTemplateAnnotations failed: %v", err) + } + + deployment := m.FindDeployment("test-deployment", "") + metadata := getMap(deployment, "spec", "template", "metadata") + annotations := metadata["annotations"].(map[string]any) + if annotations["existing"] != "updated" { + t.Fatalf("existing annotation = %v, want updated", annotations["existing"]) + } + if annotations["new"] != "value" { + t.Fatalf("new annotation = %v, want value", annotations["new"]) + } +} + func TestMergeDeploymentArgsHandlesSpaceSeparatedFlags(t *testing.T) { yaml := ` apiVersion: apps/v1 diff --git a/test/integration/envtest_test.go b/test/integration/envtest_test.go index 4225ef4f..a33f905f 100644 --- a/test/integration/envtest_test.go +++ b/test/integration/envtest_test.go @@ -95,8 +95,8 @@ func TestControllerWithEnvtest(t *testing.T) { testReconcileHandlesDeletion(t, cfg, scheme) }) - t.Run("ReconcileAppliesDefaults", func(t *testing.T) { - testReconcileAppliesDefaults(t, cfg, scheme) + t.Run("ReconcileUsesDefaultsWithoutPersistingSpec", func(t *testing.T) { + testReconcileUsesDefaultsWithoutPersistingSpec(t, cfg, scheme) }) } @@ -225,7 +225,7 @@ func testReconcileHandlesDeletion(t *testing.T, cfg *rest.Config, scheme *runtim t.Log("MCPServer deletion handled successfully") } -func testReconcileAppliesDefaults(t *testing.T, cfg *rest.Config, scheme *runtime.Scheme) { +func testReconcileUsesDefaultsWithoutPersistingSpec(t *testing.T, cfg *rest.Config, scheme *runtime.Scheme) { _, k8sClient, cancel := startManager(t, cfg, scheme) defer cancel() @@ -249,27 +249,41 @@ func testReconcileAppliesDefaults(t *testing.T, cfg *rest.Config, scheme *runtim t.Fatalf("failed to create MCPServer: %v", err) } - // Wait for defaults to be applied - time.Sleep(3 * time.Second) + key := types.NamespacedName{Name: "defaults-test", Namespace: "test-defaults"} + + var deployment appsv1.Deployment + if err := waitForResource(ctx, k8sClient, &deployment, key, 30*time.Second); err != nil { + t.Fatalf("deployment not created: %v", err) + } + if len(deployment.Spec.Template.Spec.Containers) == 0 { + t.Fatal("deployment should have at least one container") + } + ports := deployment.Spec.Template.Spec.Containers[0].Ports + if len(ports) == 0 || ports[0].ContainerPort != 8088 { + t.Fatalf("expected default container port 8088, got %#v", ports) + } + + var service corev1.Service + if err := waitForResource(ctx, k8sClient, &service, key, 10*time.Second); err != nil { + t.Fatalf("service not created: %v", err) + } + if len(service.Spec.Ports) == 0 || service.Spec.Ports[0].Port != 80 { + t.Fatalf("expected default service port 80, got %#v", service.Spec.Ports) + } var updated mcpv1alpha1.MCPServer - key := types.NamespacedName{Name: "defaults-test", Namespace: "test-defaults"} if err := k8sClient.Get(ctx, key, &updated); err != nil { t.Fatalf("failed to get MCPServer: %v", err) } - - if updated.Spec.Port == 0 { - t.Error("default port should be applied") + if updated.Spec.Port != 0 { + t.Fatalf("controller should not persist default port, got %d", updated.Spec.Port) } - if updated.Spec.Replicas == nil || *updated.Spec.Replicas == 0 { - t.Error("default replicas should be applied") + if updated.Spec.Replicas != nil { + t.Fatalf("controller should not persist default replicas, got %v", *updated.Spec.Replicas) } - if updated.Spec.ImageTag == "" { - t.Error("default imageTag should be applied") + if updated.Spec.ImageTag != "" { + t.Fatalf("controller should not persist default imageTag, got %q", updated.Spec.ImageTag) } - - t.Logf("Defaults applied: port=%d, replicas=%d, imageTag=%s", - updated.Spec.Port, *updated.Spec.Replicas, updated.Spec.ImageTag) } // Helper functions