From 566032d34dab1a43084b55a4e55a761c4e64044c Mon Sep 17 00:00:00 2001 From: Prince Roshan Date: Sat, 16 May 2026 01:02:56 +0530 Subject: [PATCH] fix(sentinel): close audit drop and login attempt gaps --- services/api/platform_auth.go | 46 ++++++++++++++++++++++++- services/api/platform_auth_test.go | 38 +++++++++++++++++++++ services/mcp-gateway/analytics.go | 6 +++- services/mcp-gateway/main_test.go | 3 ++ services/mcp-gateway/types.go | 1 + services/ui/main.go | 54 ++++++++++++++++++++++++++---- services/ui/main_test.go | 39 +++++++++++++++++++++ 7 files changed, 179 insertions(+), 8 deletions(-) diff --git a/services/api/platform_auth.go b/services/api/platform_auth.go index 8b4a405a..e394c130 100644 --- a/services/api/platform_auth.go +++ b/services/api/platform_auth.go @@ -20,6 +20,10 @@ const ( apiLoginLockoutBase = 15 * time.Second apiLoginLockoutMax = 5 * time.Minute ) +const ( + apiLoginAttemptIdleTTL = 30 * time.Minute + apiLoginAttemptMaxEntries = 4096 +) const ( platformSignupRequestMaxBytes = 4 * 1024 platformPasswordLoginRequestMaxBytes = 4 * 1024 @@ -43,6 +47,7 @@ type passwordUserEnsurer interface { type apiLoginAttempt struct { failures int lockedUntil time.Time + lastSeen time.Time } type apiLoginAttemptTracker struct { @@ -61,8 +66,14 @@ func newAPILoginAttemptTracker(nowFn func() time.Time) *apiLoginAttemptTracker { func (t *apiLoginAttemptTracker) allow(key string) bool { t.mu.Lock() defer t.mu.Unlock() - state := t.entries[key] now := t.nowFunc() + t.pruneLocked(now) + state, ok := t.entries[key] + if !ok { + return true + } + state.lastSeen = now + t.entries[key] = state if state.lockedUntil.IsZero() || !state.lockedUntil.After(now) { return true } @@ -73,22 +84,55 @@ func (t *apiLoginAttemptTracker) recordFailure(key string) int { t.mu.Lock() defer t.mu.Unlock() now := t.nowFunc() + t.pruneLocked(now) state := t.entries[key] state.failures++ state.lockedUntil = now.Add(lockoutDurationForFailures(state.failures)) + state.lastSeen = now t.entries[key] = state + t.enforceMaxLocked() return state.failures } func (t *apiLoginAttemptTracker) recordSuccess(key string) int { t.mu.Lock() defer t.mu.Unlock() + now := t.nowFunc() + t.pruneLocked(now) state := t.entries[key] failures := state.failures delete(t.entries, key) return failures } +func (t *apiLoginAttemptTracker) pruneLocked(now time.Time) { + if apiLoginAttemptIdleTTL <= 0 { + return + } + for key, state := range t.entries { + if state.lastSeen.IsZero() || (now.Sub(state.lastSeen) > apiLoginAttemptIdleTTL && !state.lockedUntil.After(now)) { + delete(t.entries, key) + } + } +} + +func (t *apiLoginAttemptTracker) enforceMaxLocked() { + for len(t.entries) > apiLoginAttemptMaxEntries { + var oldestKey string + var oldestSeen time.Time + for key, state := range t.entries { + if oldestKey == "" || state.lastSeen.Before(oldestSeen) { + oldestKey = key + oldestSeen = state.lastSeen + } + } + if oldestKey == "" { + return + } + delete(t.entries, oldestKey) + } +} + func lockoutDurationForFailures(failures int) time.Duration { if failures <= 2 { return 0 diff --git a/services/api/platform_auth_test.go b/services/api/platform_auth_test.go index 98aca0e1..11bbe4c1 100644 --- a/services/api/platform_auth_test.go +++ b/services/api/platform_auth_test.go @@ -3,6 +3,7 @@ package main import ( "context" "errors" + "fmt" "testing" "time" ) @@ -99,3 +100,40 @@ func TestOpenPlatformStoreWithRetryRetriesUntilSuccess(t *testing.T) { t.Fatalf("open attempts = %d, want 2", calls) } } + +func TestAPILoginAttemptTrackerPrunesIdleEntries(t *testing.T) { + now := time.Unix(1_700_000_000, 0) + tracker := newAPILoginAttemptTracker(func() time.Time { + return now + }) + + tracker.recordFailure("client-old") + now = now.Add(apiLoginAttemptIdleTTL + time.Second) + tracker.recordFailure("client-new") + + if _, ok := tracker.entries["client-old"]; ok { + t.Fatal("idle login attempt entry was not pruned") + } + if _, ok := tracker.entries["client-new"]; !ok { + t.Fatal("new login attempt entry missing") + } +} + +func TestAPILoginAttemptTrackerCapsRetainedEntries(t *testing.T) { + now := time.Unix(1_700_000_000, 0) + tracker := newAPILoginAttemptTracker(func() time.Time { + return now + }) + + for i := 0; i < apiLoginAttemptMaxEntries+1; i++ { + tracker.recordFailure(fmt.Sprintf("client-%d", i)) + now = now.Add(time.Millisecond) + } + + if got := len(tracker.entries); got != apiLoginAttemptMaxEntries { + t.Fatalf("login attempt entries = %d, want %d", got, apiLoginAttemptMaxEntries) + } + if _, ok := tracker.entries["client-0"]; ok { + t.Fatal("oldest login attempt entry was not evicted") + } +} diff --git a/services/mcp-gateway/analytics.go b/services/mcp-gateway/analytics.go index aa5f4be4..1b80c14f 100644 --- a/services/mcp-gateway/analytics.go +++ b/services/mcp-gateway/analytics.go @@ -72,8 +72,8 @@ func (s *gatewayServer) emitIfEnabled(ctx context.Context, event events.Envelope return } s.analyticsMu.Lock() - defer s.analyticsMu.Unlock() if s.analyticsClosed { + s.analyticsMu.Unlock() return } item := analyticsEvent{ @@ -82,7 +82,11 @@ func (s *gatewayServer) emitIfEnabled(ctx context.Context, event events.Envelope } select { case queue <- item: + s.analyticsMu.Unlock() default: + dropped := s.analyticsDropped.Add(1) + s.analyticsMu.Unlock() + log.Printf("gateway analytics queue full; dropped event total=%d source=%q event_type=%q", dropped, event.Source, event.EventType) } } diff --git a/services/mcp-gateway/main_test.go b/services/mcp-gateway/main_test.go index 92c7a732..c5378c40 100644 --- a/services/mcp-gateway/main_test.go +++ b/services/mcp-gateway/main_test.go @@ -489,6 +489,9 @@ func TestEmitIfEnabledDropsWhenQueueIsFull(t *testing.T) { default: t.Fatal("analytics queue unexpectedly drained") } + if got := proxy.analyticsDropped.Load(); got != 1 { + t.Fatalf("analytics dropped count = %d, want 1", got) + } } func TestStopAnalyticsDispatcherDrainsQueue(t *testing.T) { diff --git a/services/mcp-gateway/types.go b/services/mcp-gateway/types.go index b6997e22..bf2e8257 100644 --- a/services/mcp-gateway/types.go +++ b/services/mcp-gateway/types.go @@ -79,6 +79,7 @@ type gatewayServer struct { analyticsOnce sync.Once analyticsWG sync.WaitGroup analyticsClosed bool + analyticsDropped atomic.Uint64 oauthMu sync.Mutex oauthProviders map[string]*oauthProvider policyState atomic.Value diff --git a/services/ui/main.go b/services/ui/main.go index 3255ebe9..314a329c 100644 --- a/services/ui/main.go +++ b/services/ui/main.go @@ -39,6 +39,8 @@ const ( defaultLoginFailureWindow = 15 * time.Minute defaultLoginFailureThreshold = 5 defaultLoginLockoutDuration = 5 * time.Minute + loginAttemptIdleTTL = 30 * time.Minute + loginAttemptMaxClients = 4096 loginFailureLogEvery = 3 loginRequestMaxBytes = 8 * 1024 ) @@ -84,6 +86,7 @@ type loginAttemptTracker struct { type loginClientState struct { tokens int lastRefill time.Time + lastSeen time.Time failures int failuresExpire time.Time lockedUntil time.Time @@ -721,8 +724,10 @@ func (t *loginAttemptTracker) allow(clientID string) bool { t.mu.Lock() defer t.mu.Unlock() - state := t.stateForLocked(clientID) now := t.now() + t.pruneLocked(now) + state := t.stateForLocked(clientID, now) + state.lastSeen = now refillLoginTokens(state, now) if now.Before(state.lockedUntil) { return false @@ -731,6 +736,7 @@ func (t *loginAttemptTracker) allow(clientID string) bool { return false } state.tokens-- + t.enforceMaxLocked() return true } @@ -738,8 +744,10 @@ func (t *loginAttemptTracker) recordFailure(clientID string) int { t.mu.Lock() defer t.mu.Unlock() - state := t.stateForLocked(clientID) now := t.now() + t.pruneLocked(now) + state := t.stateForLocked(clientID, now) + state.lastSeen = now if now.After(state.failuresExpire) { state.failures = 0 } @@ -748,6 +756,7 @@ func (t *loginAttemptTracker) recordFailure(clientID string) int { if state.failures >= loginFailureThreshold { state.lockedUntil = now.Add(loginLockoutDuration) } + t.enforceMaxLocked() return state.failures } @@ -755,7 +764,13 @@ func (t *loginAttemptTracker) recordSuccess(clientID string) int { t.mu.Lock() defer t.mu.Unlock() - state := t.stateForLocked(clientID) + now := t.now() + t.pruneLocked(now) + state := t.clients[clientID] + if state == nil { + return 0 + } + state.lastSeen = now prior := state.failures state.failures = 0 state.failuresExpire = time.Time{} @@ -763,16 +778,43 @@ func (t *loginAttemptTracker) recordSuccess(clientID string) int { return prior } -func (t *loginAttemptTracker) stateForLocked(clientID string) *loginClientState { +func (t *loginAttemptTracker) stateForLocked(clientID string, now time.Time) *loginClientState { state := t.clients[clientID] if state == nil { - now := t.now() - state = &loginClientState{tokens: loginRateLimitCapacity, lastRefill: now} + state = &loginClientState{tokens: loginRateLimitCapacity, lastRefill: now, lastSeen: now} t.clients[clientID] = state } return state } +func (t *loginAttemptTracker) pruneLocked(now time.Time) { + if loginAttemptIdleTTL <= 0 { + return + } + for clientID, state := range t.clients { + if state.lastSeen.IsZero() || (now.Sub(state.lastSeen) > loginAttemptIdleTTL && !now.Before(state.lockedUntil)) { + delete(t.clients, clientID) + } + } +} + +func (t *loginAttemptTracker) enforceMaxLocked() { + for len(t.clients) > loginAttemptMaxClients { + var oldestClientID string + var oldestSeen time.Time + for clientID, state := range t.clients { + if oldestClientID == "" || state.lastSeen.Before(oldestSeen) { + oldestClientID = clientID + oldestSeen = state.lastSeen + } + } + if oldestClientID == "" { + return + } + delete(t.clients, oldestClientID) + } +} + func refillLoginTokens(state *loginClientState, now time.Time) { if state.lastRefill.IsZero() { state.lastRefill = now diff --git a/services/ui/main_test.go b/services/ui/main_test.go index bf7f8fe4..ef9944f1 100644 --- a/services/ui/main_test.go +++ b/services/ui/main_test.go @@ -982,6 +982,45 @@ func TestHandleLoginSuccessResetsFailureCounter(t *testing.T) { } } +func TestLoginAttemptTrackerPrunesIdleClients(t *testing.T) { + now := time.Unix(1_700_000_000, 0) + tracker := newLoginAttemptTracker(func() time.Time { + return now + }) + + tracker.recordFailure("client-old") + now = now.Add(loginAttemptIdleTTL + time.Second) + tracker.recordFailure("client-new") + + if _, ok := tracker.clients["client-old"]; ok { + t.Fatal("idle login attempt client was not pruned") + } + if _, ok := tracker.clients["client-new"]; !ok { + t.Fatal("new login attempt client missing") + } +} + +func TestLoginAttemptTrackerCapsRetainedClients(t *testing.T) { + now := time.Unix(1_700_000_000, 0) + tracker := newLoginAttemptTracker(func() time.Time { + return now + }) + + for i := 0; i < loginAttemptMaxClients+1; i++ { + if !tracker.allow(fmt.Sprintf("client-%d", i)) { + t.Fatalf("client-%d should be allowed on first attempt", i) + } + now = now.Add(time.Millisecond) + } + + if got := len(tracker.clients); got != loginAttemptMaxClients { + t.Fatalf("login attempt clients = %d, want %d", got, loginAttemptMaxClients) + } + if _, ok := tracker.clients["client-0"]; ok { + t.Fatal("oldest login attempt client was not evicted") + } +} + func useLoginAttemptTrackerForTest(t *testing.T) func() { t.Helper() previous := loginAttempts